All of lore.kernel.org
 help / color / mirror / Atom feed
* + aoe-ensure-we-initialise-the-request_queue-correctly.patch added to -mm tree
@ 2009-09-02 19:55 akpm
       [not found] ` <20090903063539.GH12579@kernel.dk>
  0 siblings, 1 reply; 18+ messages in thread
From: akpm @ 2009-09-02 19:55 UTC (permalink / raw)
  To: mm-commits; +Cc: apw, bonbons, ecashin, jens.axboe, martin.petersen, rjw


The patch titled
     aoe: ensure we initialise the request_queue correctly
has been added to the -mm tree.  Its filename is
     aoe-ensure-we-initialise-the-request_queue-correctly.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this

The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/

------------------------------------------------------
Subject: aoe: ensure we initialise the request_queue correctly
From: Andy Whitcroft <apw@canonical.com>

When using the aoe driver we see an oops triggered by use of an
incorrectly initialised request_queue object:

  [ 2645.959090] kobject '<NULL>' (ffff880059ca22c0): tried to add
		an uninitialized object, something is seriously wrong.
  [ 2645.959104] Pid: 6, comm: events/0 Not tainted 2.6.31-5-generic #24-Ubuntu
  [ 2645.959107] Call Trace:
  [ 2645.959139] [<ffffffff8126ca2f>] kobject_add+0x5f/0x70
  [ 2645.959151] [<ffffffff8125b4ab>] blk_register_queue+0x8b/0xf0
  [ 2645.959155] [<ffffffff8126043f>] add_disk+0x8f/0x160
  [ 2645.959161] [<ffffffffa01673c4>] aoeblk_gdalloc+0x164/0x1c0 [aoe]

It seems this driver mearly embeds a request_queue object in its main
control structure and does not attempt to initialise it.  Pull this out
and use blk_init_queue/blk_cleanup_queue to handle initialisation and
teardown of this object.

Bruno bisected this regression down to

  cd43e26f071524647e660706b784ebcbefbd2e44

  block: Expose stacked device queues in sysfs

"This seems to generate /sys/block/$device/queue and its contents for
 everyone who is using queues, not just for those queues that have a
 non-NULL queue->request_fn."

Addresses http://bugs.launchpad.net/bugs/410198
Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13942

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Ed Cashin <ecashin@coraid.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Bruno Premont <bonbons@linux-vserver.org>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 drivers/block/aoe/aoe.h    |    2 +-
 drivers/block/aoe/aoeblk.c |    6 +++---
 drivers/block/aoe/aoedev.c |   11 ++++++++++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff -puN drivers/block/aoe/aoe.h~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoe.h
--- a/drivers/block/aoe/aoe.h~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoe.h
@@ -155,7 +155,7 @@ struct aoedev {
 	u16 fw_ver;		/* version of blade's firmware */
 	struct work_struct work;/* disk create work struct */
 	struct gendisk *gd;
-	struct request_queue blkq;
+	struct request_queue *blkq;
 	struct hd_geometry geo; 
 	sector_t ssize;
 	struct timer_list timer;
diff -puN drivers/block/aoe/aoeblk.c~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoeblk.c
--- a/drivers/block/aoe/aoeblk.c~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoeblk.c
@@ -264,8 +264,8 @@ aoeblk_gdalloc(void *vp)
 		goto err_disk;
 	}
 
-	blk_queue_make_request(&d->blkq, aoeblk_make_request);
-	if (bdi_init(&d->blkq.backing_dev_info))
+	blk_queue_make_request(d->blkq, aoeblk_make_request);
+	if (bdi_init(&d->blkq->backing_dev_info))
 		goto err_mempool;
 	spin_lock_irqsave(&d->lock, flags);
 	gd->major = AOE_MAJOR;
@@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp)
 	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
 		d->aoemajor, d->aoeminor);
 
-	gd->queue = &d->blkq;
+	gd->queue = d->blkq;
 	d->gd = gd;
 	d->flags &= ~DEVFL_GDALLOC;
 	d->flags |= DEVFL_UP;
diff -puN drivers/block/aoe/aoedev.c~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoedev.c
--- a/drivers/block/aoe/aoedev.c~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoedev.c
@@ -113,6 +113,7 @@ aoedev_freedev(struct aoedev *d)
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);
 	skbpoolfree(d);
+	blk_cleanup_queue(d->blkq);
 	kfree(d);
 }
 
@@ -202,6 +203,7 @@ aoedev_by_sysminor_m(ulong sysminor)
 {
 	struct aoedev *d;
 	ulong flags;
+	struct request_queue *rq;
 
 	spin_lock_irqsave(&devlist_lock, flags);
 
@@ -210,9 +212,13 @@ aoedev_by_sysminor_m(ulong sysminor)
 			break;
 	if (d)
 		goto out;
+	rq = blk_init_queue(NULL, NULL);
+	if (!rq)
+		goto out;
 	d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 	if (!d)
-		goto out;
+		goto out_rq;
+	d->blkq = rq;
 	INIT_WORK(&d->work, aoecmd_sleepwork);
 	spin_lock_init(&d->lock);
 	skb_queue_head_init(&d->sendq);
@@ -234,6 +240,9 @@ aoedev_by_sysminor_m(ulong sysminor)
  out:
 	spin_unlock_irqrestore(&devlist_lock, flags);
 	return d;
+ out_rq:
+	blk_cleanup_queue(rq);
+	goto out;
 }
 
 static void
_

Patches currently in -mm which might be from apw@canonical.com are

aoe-ensure-we-initialise-the-request_queue-correctly.patch
hugetlb-balance-freeing-of-huge-pages-across-nodes.patch
hugetlb-use-free_pool_huge_page-to-return-unused-surplus-pages.patch
hugetlb-use-free_pool_huge_page-to-return-unused-surplus-pages-fix.patch
hugetlb-clean-up-and-update-huge-pages-documentation.patch
hugetlb-restore-interleaving-of-bootmem-huge-pages.patch
checkpatch-possible-types-else-cannot-start-a-type.patch
checkpatch-handle-c99-comments-correctly-performance-issue.patch
checkpatch-indent-checks-stop-when-we-run-out-of-continuation-lines.patch
checkpatch-make-f-alias-file-add-help-more-verbose-help-message.patch
checkpatch-format-strings-should-not-have-brackets-in-macros.patch
checkpatch-limit-sn-un-matches-to-actual-bit-sizes.patch
checkpatch-version-029.patch


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] aoe: end barrier bios with EOPNOTSUPP
       [not found]             ` <20090908193540.GB18599@kernel.dk>
@ 2009-09-09 16:45               ` Ed Cashin
  2009-09-09 16:50                 ` Ed Cashin
                                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Ed Cashin @ 2009-09-09 16:45 UTC (permalink / raw)
  To: ecashin, jens.axboe, akpm, apw, bonbons, linux-kernel

BugLink: http://bugzilla.kernel.org/show_bug.cgi?id=13942

Bruno Premont noticed that aoe throws a BUG during umount of an XFS in
2.6.31:

[ 5259.349897] aoe: bi_io_vec is NULL
[ 5259.349940] ------------[ cut here ]------------
[ 5259.349958] kernel BUG at /usr/src/linux-2.6/drivers/block/aoe/aoeblk.c:177!
[ 5259.349990] invalid opcode: 0000 [#1]

The bio in question is a barrier.  Jens Axboe suggested that such bios
need to be recognized and ended with -EOPNOTSUPP by any driver that
provides its own ->make_request_fn handler and does not handle
barriers.

In testing the changes below eliminate the BUG.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
After this patch has been reviewed I plan to add this patch to
the aoe quilt tree for linux-next.

 drivers/block/aoe/aoeblk.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 2307a27..d6806fb 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 		BUG();
 		bio_endio(bio, -ENXIO);
 		return 0;
+	} else if (bio_barrier(bio)) {
+                bio_endio(bio, -EOPNOTSUPP);
+                return 0;
 	} else if (bio->bi_io_vec == NULL) {
 		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
 		BUG();
-- 
1.5.6.5


-- 
  Ed Cashin
  http://noserose.net/e/
  http://www.coraid.com/

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 16:45               ` [PATCH] aoe: end barrier bios with EOPNOTSUPP Ed Cashin
@ 2009-09-09 16:50                 ` Ed Cashin
  2009-09-09 19:03                   ` Ed Cashin
  2009-09-09 16:51                 ` Daniel Walker
  2009-09-09 17:08                 ` Alan Cox
  2 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-09 16:50 UTC (permalink / raw)
  To: ecashin, jens.axboe, akpm, apw, bonbons, linux-kernel

On Wed Sep  9 12:45:17 EDT 2009, ecashin@coraid.com wrote:
...
> +	} else if (bio_barrier(bio)) {
> +                bio_endio(bio, -EOPNOTSUPP);
> +                return 0;
>  	} else if (bio->bi_io_vec == NULL) {
>  		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
>  		BUG();

Bah.  I should have run checkpatch.pl.  I will re-do this patch using
real tabs.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 16:45               ` [PATCH] aoe: end barrier bios with EOPNOTSUPP Ed Cashin
  2009-09-09 16:50                 ` Ed Cashin
@ 2009-09-09 16:51                 ` Daniel Walker
  2009-09-09 17:08                 ` Alan Cox
  2 siblings, 0 replies; 18+ messages in thread
From: Daniel Walker @ 2009-09-09 16:51 UTC (permalink / raw)
  To: Ed Cashin; +Cc: jens.axboe, akpm, apw, bonbons, linux-kernel

On Wed, 2009-09-09 at 12:45 -0400, Ed Cashin wrote:
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q,
> struct bio *bio)
>                 BUG();
>                 bio_endio(bio, -ENXIO);
>                 return 0;
> +       } else if (bio_barrier(bio)) {
> +                bio_endio(bio, -EOPNOTSUPP);
> +                return 0;
>         } else if (bio->bi_io_vec == NULL) {
>                 printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
>                 BUG();

Could you run this through script/checkpatch.pl and fix any errors you
find.. It looks like your not indenting properly ..

Daniel


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 16:45               ` [PATCH] aoe: end barrier bios with EOPNOTSUPP Ed Cashin
  2009-09-09 16:50                 ` Ed Cashin
  2009-09-09 16:51                 ` Daniel Walker
@ 2009-09-09 17:08                 ` Alan Cox
  2009-09-09 18:00                   ` Ed Cashin
  2 siblings, 1 reply; 18+ messages in thread
From: Alan Cox @ 2009-09-09 17:08 UTC (permalink / raw)
  To: Ed Cashin; +Cc: ecashin, jens.axboe, akpm, apw, bonbons, linux-kernel

> The bio in question is a barrier.  Jens Axboe suggested that such bios
> need to be recognized and ended with -EOPNOTSUPP by any driver that
> provides its own ->make_request_fn handler and does not handle
> barriers.
> 
> In testing the changes below eliminate the BUG.

Presumably AoE should actually issue an ATA cache flush for this case ?


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 17:08                 ` Alan Cox
@ 2009-09-09 18:00                   ` Ed Cashin
  2009-09-09 20:58                     ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-09 18:00 UTC (permalink / raw)
  To: alan, ecashin, jens.axboe, akpm, apw, bonbons, linux-kernel

On Wed Sep  9 13:06:42 EDT 2009, alan@lxorguk.ukuu.org.uk wrote:
> > The bio in question is a barrier.  Jens Axboe suggested that such bios
> > need to be recognized and ended with -EOPNOTSUPP by any driver that
> > provides its own ->make_request_fn handler and does not handle
> > barriers.
> > 
> > In testing the changes below eliminate the BUG.
> 
> Presumably AoE should actually issue an ATA cache flush for this case ?

Yes.  The aoe driver juggles a set of in-process I/O operations that
have been sent to the AoE target but have not yet received responses.

To implement the barrier, it would stop generating new AoE write
commands, wait for AoE write responses for all the outstanding write
commands, issue the ATA cache flush command, wait for the response to
the flush, and then resume normal activity.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 16:50                 ` Ed Cashin
@ 2009-09-09 19:03                   ` Ed Cashin
  0 siblings, 0 replies; 18+ messages in thread
From: Ed Cashin @ 2009-09-09 19:03 UTC (permalink / raw)
  To: ecashin, jens.axboe, akpm, apw, bonbons, linux-kernel

aoe: end barrier bios with EOPNOTSUPP

BugLink: http://bugzilla.kernel.org/show_bug.cgi?id=13942

Bruno Premont noticed that aoe throws a BUG during umount of an XFS in
2.6.31:

[ 5259.349897] aoe: bi_io_vec is NULL
[ 5259.349940] ------------[ cut here ]------------
[ 5259.349958] kernel BUG at /usr/src/linux-2.6/drivers/block/aoe/aoeblk.c:177!
[ 5259.349990] invalid opcode: 0000 [#1]

The bio in question is a barrier.  Jens Axboe suggested that such bios
need to be recognized and ended with -EOPNOTSUPP by any driver that
provides its own ->make_request_fn handler and does not handle
barriers.

In testing the changes below eliminate the BUG.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
After this patch has been reviewed I plan to add this patch to
the aoe quilt tree for linux-next.

 drivers/block/aoe/aoeblk.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 2307a27..22efb33 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 		BUG();
 		bio_endio(bio, -ENXIO);
 		return 0;
+	} else if (bio_barrier(bio)) {
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
 	} else if (bio->bi_io_vec == NULL) {
 		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
 		BUG();
-- 
1.5.6.5


-- 
  Ed Cashin <ecashin@coraid.com>
  http://noserose.net/e/
  http://www.coraid.com/

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 18:00                   ` Ed Cashin
@ 2009-09-09 20:58                     ` Jens Axboe
  2009-09-09 21:23                       ` Ed Cashin
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2009-09-09 20:58 UTC (permalink / raw)
  To: Ed Cashin; +Cc: alan, akpm, apw, bonbons, linux-kernel

On Wed, Sep 09 2009, Ed Cashin wrote:
> On Wed Sep  9 13:06:42 EDT 2009, alan@lxorguk.ukuu.org.uk wrote:
> > > The bio in question is a barrier.  Jens Axboe suggested that such bios
> > > need to be recognized and ended with -EOPNOTSUPP by any driver that
> > > provides its own ->make_request_fn handler and does not handle
> > > barriers.
> > > 
> > > In testing the changes below eliminate the BUG.
> > 
> > Presumably AoE should actually issue an ATA cache flush for this case ?
> 
> Yes.  The aoe driver juggles a set of in-process I/O operations that
> have been sent to the AoE target but have not yet received responses.
> 
> To implement the barrier, it would stop generating new AoE write
> commands, wait for AoE write responses for all the outstanding write
> commands, issue the ATA cache flush command, wait for the response to
> the flush, and then resume normal activity.

That's how barriers work with sata to begin with, so I'd very much
recommend that you do the same in aoe instead of just not supporting it.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 20:58                     ` Jens Axboe
@ 2009-09-09 21:23                       ` Ed Cashin
  2009-09-10  7:48                         ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-09 21:23 UTC (permalink / raw)
  To: jens.axboe, ecashin, alan, akpm, apw, bonbons, linux-kernel

On Wed Sep  9 16:59:30 EDT 2009, jens.axboe@oracle.com wrote:
> On Wed, Sep 09 2009, Ed Cashin wrote:
...
> > To implement the barrier, [aoe] would stop generating new AoE write
> > commands, wait for AoE write responses for all the outstanding write
> > commands, issue the ATA cache flush command, wait for the response to
> > the flush, and then resume normal activity.
> 
> That's how barriers work with sata to begin with, so I'd very much
> recommend that you do the same in aoe instead of just not supporting it.

This patch with EOPNOTSUPP just fixes the regression in 2.6.31-rc9.

A patch adding support for barriers would be nice.  Since I have
limited time I tried to motivate that work by using the torture tests
that Chris Mason (I believe it was) posted a while ago, but I could
not get any problems to manifest using that torture test.

If I had a reproducable problem case that the barrier implementation
could fix, it would be very helpful.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-09 21:23                       ` Ed Cashin
@ 2009-09-10  7:48                         ` Jens Axboe
  2009-09-10 15:16                           ` Ed Cashin
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2009-09-10  7:48 UTC (permalink / raw)
  To: Ed Cashin; +Cc: alan, akpm, apw, bonbons, linux-kernel

On Wed, Sep 09 2009, Ed Cashin wrote:
> On Wed Sep  9 16:59:30 EDT 2009, jens.axboe@oracle.com wrote:
> > On Wed, Sep 09 2009, Ed Cashin wrote:
> ...
> > > To implement the barrier, [aoe] would stop generating new AoE write
> > > commands, wait for AoE write responses for all the outstanding write
> > > commands, issue the ATA cache flush command, wait for the response to
> > > the flush, and then resume normal activity.
> > 
> > That's how barriers work with sata to begin with, so I'd very much
> > recommend that you do the same in aoe instead of just not supporting it.
> 
> This patch with EOPNOTSUPP just fixes the regression in 2.6.31-rc9.
> 
> A patch adding support for barriers would be nice.  Since I have
> limited time I tried to motivate that work by using the torture tests
> that Chris Mason (I believe it was) posted a while ago, but I could
> not get any problems to manifest using that torture test.
> 
> If I had a reproducable problem case that the barrier implementation
> could fix, it would be very helpful.

Depending on timing and the other end, it may not be easy to reproduce.
But the problem is indeed real, so I think you should just add the
proper barrier implementation instead of spending too much time trying
to create a reproducable problem. By the time you get there, you could
have fixed it many times over :-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10  7:48                         ` Jens Axboe
@ 2009-09-10 15:16                           ` Ed Cashin
  2009-09-10 19:50                             ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-10 15:16 UTC (permalink / raw)
  To: jens.axboe, ecashin, alan, akpm, apw, bonbons, linux-kernel

On Thu Sep 10 03:48:22 EDT 2009, jens.axboe@oracle.com wrote:
> On Wed, Sep 09 2009, Ed Cashin wrote:
...
> > If I had a reproducable problem case that the barrier implementation
> > could fix, it would be very helpful.
> 
> Depending on timing and the other end, it may not be easy to reproduce.
> But the problem is indeed real, so I think you should just add the
> proper barrier implementation instead of spending too much time trying
> to create a reproducable problem. By the time you get there, you could
> have fixed it many times over :-)

OK. I will start work on it.  But if anybody can give me a torture
test that works on aoe devices, I'll be grateful.

I think the EOPNOTSUPP patch is good for the for-next aoe quilt tree,
while the new barrier support will be good for 2.6.32.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 15:16                           ` Ed Cashin
@ 2009-09-10 19:50                             ` Jens Axboe
  2009-09-10 20:03                               ` Ed Cashin
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2009-09-10 19:50 UTC (permalink / raw)
  To: Ed Cashin; +Cc: alan, akpm, apw, bonbons, linux-kernel

On Thu, Sep 10 2009, Ed Cashin wrote:
> On Thu Sep 10 03:48:22 EDT 2009, jens.axboe@oracle.com wrote:
> > On Wed, Sep 09 2009, Ed Cashin wrote:
> ...
> > > If I had a reproducable problem case that the barrier implementation
> > > could fix, it would be very helpful.
> > 
> > Depending on timing and the other end, it may not be easy to reproduce.
> > But the problem is indeed real, so I think you should just add the
> > proper barrier implementation instead of spending too much time trying
> > to create a reproducable problem. By the time you get there, you could
> > have fixed it many times over :-)
> 
> OK. I will start work on it.  But if anybody can give me a torture
> test that works on aoe devices, I'll be grateful.
> 
> I think the EOPNOTSUPP patch is good for the for-next aoe quilt tree,
> while the new barrier support will be good for 2.6.32.

.31 is already out, so there's plenty of time to fix it for .32 for
real. I don't mind putting in the temporary fix in the mean time,
though.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 19:50                             ` Jens Axboe
@ 2009-09-10 20:03                               ` Ed Cashin
  2009-09-10 20:07                                 ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-10 20:03 UTC (permalink / raw)
  To: jens.axboe, ecashin, alan, akpm, apw, bonbons, linux-kernel

On Thu Sep 10 15:50:23 EDT 2009, jens.axboe@oracle.com wrote:
> On Thu, Sep 10 2009, Ed Cashin wrote:
...
> > I think the EOPNOTSUPP patch is good for the for-next aoe quilt tree,
> > while the new barrier support will be good for 2.6.32.
> 
> .31 is already out, so there's plenty of time to fix it for .32 for
> real. I don't mind putting in the temporary fix in the mean time,
> though.

I missed that.  It was still rc9 when I pulled earlier today, so I put
the temporary fix into the aoe quilt tree for linux-next.

Now that 2.6.31 is out, I suppose this patch should be taken out of
the aoe quilt tree for linux-next and submitted for inclusion in
2.6.31.1.

So, yes, if you don't mind pushing the patch to the appropriate place,
that would be great.

Now I have to figure out why the other patch in aoe's quilt tree for
linux-next never made it to 2.6.31.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 20:03                               ` Ed Cashin
@ 2009-09-10 20:07                                 ` Jens Axboe
  2009-09-10 20:20                                   ` Ed Cashin
  0 siblings, 1 reply; 18+ messages in thread
From: Jens Axboe @ 2009-09-10 20:07 UTC (permalink / raw)
  To: Ed Cashin; +Cc: alan, akpm, apw, bonbons, linux-kernel

On Thu, Sep 10 2009, Ed Cashin wrote:
> On Thu Sep 10 15:50:23 EDT 2009, jens.axboe@oracle.com wrote:
> > On Thu, Sep 10 2009, Ed Cashin wrote:
> ...
> > > I think the EOPNOTSUPP patch is good for the for-next aoe quilt tree,
> > > while the new barrier support will be good for 2.6.32.
> > 
> > .31 is already out, so there's plenty of time to fix it for .32 for
> > real. I don't mind putting in the temporary fix in the mean time,
> > though.
> 
> I missed that.  It was still rc9 when I pulled earlier today, so I put
> the temporary fix into the aoe quilt tree for linux-next.
> 
> Now that 2.6.31 is out, I suppose this patch should be taken out of
> the aoe quilt tree for linux-next and submitted for inclusion in
> 2.6.31.1.
> 
> So, yes, if you don't mind pushing the patch to the appropriate place,
> that would be great.

Did you repost a non-whitespace damaged version?

> Now I have to figure out why the other patch in aoe's quilt tree for
> linux-next never made it to 2.6.31.

Things don't go from -next to mainstream automatically, -next is just a
testing base. You have to submit it explicitly, eg send it to me.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 20:07                                 ` Jens Axboe
@ 2009-09-10 20:20                                   ` Ed Cashin
  2009-09-10 20:27                                     ` Ed Cashin
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-10 20:20 UTC (permalink / raw)
  To: jens.axboe, ecashin, alan, akpm, apw, bonbons, linux-kernel

On Thu Sep 10 16:07:37 EDT 2009, jens.axboe@oracle.com wrote:
> On Thu, Sep 10 2009, Ed Cashin wrote:
...
> > So, yes, if you don't mind pushing the patch to the appropriate place,
> > that would be great.
> 
> Did you repost a non-whitespace damaged version?

Yes.  I'll post it again as a response to this email.

> > Now I have to figure out why the other patch in aoe's quilt tree for
> > linux-next never made it to 2.6.31.
> 
> Things don't go from -next to mainstream automatically, -next is just a
> testing base. You have to submit it explicitly, eg send it to me.

Ah.  Thanks.

-- 
  Ed

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 20:20                                   ` Ed Cashin
@ 2009-09-10 20:27                                     ` Ed Cashin
  2009-09-10 20:29                                       ` Jens Axboe
  0 siblings, 1 reply; 18+ messages in thread
From: Ed Cashin @ 2009-09-10 20:27 UTC (permalink / raw)
  To: ecashin, jens.axboe, alan, akpm, apw, bonbons, linux-kernel

aoe: end barrier bios with EOPNOTSUPP

BugLink: http://bugzilla.kernel.org/show_bug.cgi?id=13942

Bruno Premont noticed that aoe throws a BUG during umount of an XFS in
2.6.31:

[ 5259.349897] aoe: bi_io_vec is NULL
[ 5259.349940] ------------[ cut here ]------------
[ 5259.349958] kernel BUG at /usr/src/linux-2.6/drivers/block/aoe/aoeblk.c:177!
[ 5259.349990] invalid opcode: 0000 [#1]

The bio in question is a barrier.  Jens Axboe suggested that such bios
need to be recognized and ended with -EOPNOTSUPP by any driver that
provides its own ->make_request_fn handler and does not handle
barriers.

In testing the changes below eliminate the BUG.

Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
---
 drivers/block/aoe/aoeblk.c |    3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 2307a27..22efb33 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
 		BUG();
 		bio_endio(bio, -ENXIO);
 		return 0;
+	} else if (bio_barrier(bio)) {
+		bio_endio(bio, -EOPNOTSUPP);
+		return 0;
 	} else if (bio->bi_io_vec == NULL) {
 		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
 		BUG();
-- 
1.5.6.5


-- 
  Ed Cashin <ecashin@coraid.com>
  http://noserose.net/e/
  http://www.coraid.com/

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] aoe: end barrier bios with EOPNOTSUPP
  2009-09-10 20:27                                     ` Ed Cashin
@ 2009-09-10 20:29                                       ` Jens Axboe
  0 siblings, 0 replies; 18+ messages in thread
From: Jens Axboe @ 2009-09-10 20:29 UTC (permalink / raw)
  To: Ed Cashin; +Cc: alan, akpm, apw, bonbons, linux-kernel

On Thu, Sep 10 2009, Ed Cashin wrote:
> aoe: end barrier bios with EOPNOTSUPP
> 
> BugLink: http://bugzilla.kernel.org/show_bug.cgi?id=13942
> 
> Bruno Premont noticed that aoe throws a BUG during umount of an XFS in
> 2.6.31:
> 
> [ 5259.349897] aoe: bi_io_vec is NULL
> [ 5259.349940] ------------[ cut here ]------------
> [ 5259.349958] kernel BUG at /usr/src/linux-2.6/drivers/block/aoe/aoeblk.c:177!
> [ 5259.349990] invalid opcode: 0000 [#1]
> 
> The bio in question is a barrier.  Jens Axboe suggested that such bios
> need to be recognized and ended with -EOPNOTSUPP by any driver that
> provides its own ->make_request_fn handler and does not handle
> barriers.
> 
> In testing the changes below eliminate the BUG.
> 
> Signed-off-by: Ed L. Cashin <ecashin@coraid.com>
> ---
>  drivers/block/aoe/aoeblk.c |    3 +++
>  1 files changed, 3 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
> index 2307a27..22efb33 100644
> --- a/drivers/block/aoe/aoeblk.c
> +++ b/drivers/block/aoe/aoeblk.c
> @@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
>  		BUG();
>  		bio_endio(bio, -ENXIO);
>  		return 0;
> +	} else if (bio_barrier(bio)) {
> +		bio_endio(bio, -EOPNOTSUPP);
> +		return 0;
>  	} else if (bio->bi_io_vec == NULL) {
>  		printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
>  		BUG();
> -- 
> 1.5.6.5

I have applied this with a note that proper barrier support is coming.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 18+ messages in thread

* + aoe-ensure-we-initialise-the-request_queue-correctly.patch added to -mm tree
@ 2009-09-04 16:05 akpm
  0 siblings, 0 replies; 18+ messages in thread
From: akpm @ 2009-09-04 16:05 UTC (permalink / raw)
  To: mm-commits; +Cc: apw, bonbons, ecashin, jens.axboe, martin.petersen, rjw


The patch titled
     aoe: ensure we initialise the request_queue correctly
has been added to the -mm tree.  Its filename is
     aoe-ensure-we-initialise-the-request_queue-correctly.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find
out what to do about this

The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/

------------------------------------------------------
Subject: aoe: ensure we initialise the request_queue correctly
From: Andy Whitcroft <apw@canonical.com>

When using the aoe driver we see an oops triggered by use of an
incorrectly initialised request_queue object:

  [ 2645.959090] kobject '<NULL>' (ffff880059ca22c0): tried to add
		an uninitialized object, something is seriously wrong.
  [ 2645.959104] Pid: 6, comm: events/0 Not tainted 2.6.31-5-generic #24-Ubuntu
  [ 2645.959107] Call Trace:
  [ 2645.959139] [<ffffffff8126ca2f>] kobject_add+0x5f/0x70
  [ 2645.959151] [<ffffffff8125b4ab>] blk_register_queue+0x8b/0xf0
  [ 2645.959155] [<ffffffff8126043f>] add_disk+0x8f/0x160
  [ 2645.959161] [<ffffffffa01673c4>] aoeblk_gdalloc+0x164/0x1c0 [aoe]

It seems this driver mearly embeds a request_queue object in its main
control structure and does not attempt to initialise it.  Pull this out
and use blk_init_queue/blk_cleanup_queue to handle initialisation and
teardown of this object.

Bruno bisected this regression down to

  cd43e26f071524647e660706b784ebcbefbd2e44

  block: Expose stacked device queues in sysfs

"This seems to generate /sys/block/$device/queue and its contents for
 everyone who is using queues, not just for those queues that have a
 non-NULL queue->request_fn."

Addresses http://bugs.launchpad.net/bugs/410198
Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13942

Signed-off-by: Andy Whitcroft <apw@canonical.com>
Acked-by: Ed Cashin <ecashin@coraid.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Bruno Premont <bonbons@linux-vserver.org>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 drivers/block/aoe/aoe.h    |    2 +-
 drivers/block/aoe/aoeblk.c |    6 +++---
 drivers/block/aoe/aoedev.c |   11 ++++++++++-
 3 files changed, 14 insertions(+), 5 deletions(-)

diff -puN drivers/block/aoe/aoe.h~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoe.h
--- a/drivers/block/aoe/aoe.h~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoe.h
@@ -155,7 +155,7 @@ struct aoedev {
 	u16 fw_ver;		/* version of blade's firmware */
 	struct work_struct work;/* disk create work struct */
 	struct gendisk *gd;
-	struct request_queue blkq;
+	struct request_queue *blkq;
 	struct hd_geometry geo; 
 	sector_t ssize;
 	struct timer_list timer;
diff -puN drivers/block/aoe/aoeblk.c~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoeblk.c
--- a/drivers/block/aoe/aoeblk.c~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoeblk.c
@@ -264,9 +264,9 @@ aoeblk_gdalloc(void *vp)
 		goto err_disk;
 	}
 
-	blk_queue_make_request(&d->blkq, aoeblk_make_request);
+	blk_queue_make_request(d->blkq, aoeblk_make_request);
 	d->blkq.backing_dev_info.name = "aoe";
-	if (bdi_init(&d->blkq.backing_dev_info))
+	if (bdi_init(d->blkq.backing_dev_info))
 		goto err_mempool;
 	spin_lock_irqsave(&d->lock, flags);
 	gd->major = AOE_MAJOR;
@@ -277,7 +277,7 @@ aoeblk_gdalloc(void *vp)
 	snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
 		d->aoemajor, d->aoeminor);
 
-	gd->queue = &d->blkq;
+	gd->queue = d->blkq;
 	d->gd = gd;
 	d->flags &= ~DEVFL_GDALLOC;
 	d->flags |= DEVFL_UP;
diff -puN drivers/block/aoe/aoedev.c~aoe-ensure-we-initialise-the-request_queue-correctly drivers/block/aoe/aoedev.c
--- a/drivers/block/aoe/aoedev.c~aoe-ensure-we-initialise-the-request_queue-correctly
+++ a/drivers/block/aoe/aoedev.c
@@ -113,6 +113,7 @@ aoedev_freedev(struct aoedev *d)
 	if (d->bufpool)
 		mempool_destroy(d->bufpool);
 	skbpoolfree(d);
+	blk_cleanup_queue(d->blkq);
 	kfree(d);
 }
 
@@ -202,6 +203,7 @@ aoedev_by_sysminor_m(ulong sysminor)
 {
 	struct aoedev *d;
 	ulong flags;
+	struct request_queue *rq;
 
 	spin_lock_irqsave(&devlist_lock, flags);
 
@@ -210,9 +212,13 @@ aoedev_by_sysminor_m(ulong sysminor)
 			break;
 	if (d)
 		goto out;
+	rq = blk_init_queue(NULL, NULL);
+	if (!rq)
+		goto out;
 	d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 	if (!d)
-		goto out;
+		goto out_rq;
+	d->blkq = rq;
 	INIT_WORK(&d->work, aoecmd_sleepwork);
 	spin_lock_init(&d->lock);
 	skb_queue_head_init(&d->sendq);
@@ -234,6 +240,9 @@ aoedev_by_sysminor_m(ulong sysminor)
  out:
 	spin_unlock_irqrestore(&devlist_lock, flags);
 	return d;
+ out_rq:
+	blk_cleanup_queue(rq);
+	goto out;
 }
 
 static void
_

Patches currently in -mm which might be from apw@canonical.com are

aoe-ensure-we-initialise-the-request_queue-correctly.patch
hugetlb-balance-freeing-of-huge-pages-across-nodes.patch
hugetlb-use-free_pool_huge_page-to-return-unused-surplus-pages.patch
hugetlb-use-free_pool_huge_page-to-return-unused-surplus-pages-fix.patch
hugetlb-clean-up-and-update-huge-pages-documentation.patch
hugetlb-restore-interleaving-of-bootmem-huge-pages.patch
checkpatch-possible-types-else-cannot-start-a-type.patch
checkpatch-handle-c99-comments-correctly-performance-issue.patch
checkpatch-indent-checks-stop-when-we-run-out-of-continuation-lines.patch
checkpatch-make-f-alias-file-add-help-more-verbose-help-message.patch
checkpatch-format-strings-should-not-have-brackets-in-macros.patch
checkpatch-limit-sn-un-matches-to-actual-bit-sizes.patch
checkpatch-version-029.patch


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-09-10 20:29 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-02 19:55 + aoe-ensure-we-initialise-the-request_queue-correctly.patch added to -mm tree akpm
     [not found] ` <20090903063539.GH12579@kernel.dk>
     [not found]   ` <2339aa04a4e8fb440a52624273728352@coraid.com>
     [not found]     ` <20090904183525.GB18599@kernel.dk>
     [not found]       ` <f48e25c99e41e375f2ddd85989cd39e1@coraid.com>
     [not found]         ` <20090905051735.GH18599@kernel.dk>
     [not found]           ` <366469f19c41e0150028b886f6859019@coraid.com>
     [not found]             ` <20090908193540.GB18599@kernel.dk>
2009-09-09 16:45               ` [PATCH] aoe: end barrier bios with EOPNOTSUPP Ed Cashin
2009-09-09 16:50                 ` Ed Cashin
2009-09-09 19:03                   ` Ed Cashin
2009-09-09 16:51                 ` Daniel Walker
2009-09-09 17:08                 ` Alan Cox
2009-09-09 18:00                   ` Ed Cashin
2009-09-09 20:58                     ` Jens Axboe
2009-09-09 21:23                       ` Ed Cashin
2009-09-10  7:48                         ` Jens Axboe
2009-09-10 15:16                           ` Ed Cashin
2009-09-10 19:50                             ` Jens Axboe
2009-09-10 20:03                               ` Ed Cashin
2009-09-10 20:07                                 ` Jens Axboe
2009-09-10 20:20                                   ` Ed Cashin
2009-09-10 20:27                                     ` Ed Cashin
2009-09-10 20:29                                       ` Jens Axboe
2009-09-04 16:05 + aoe-ensure-we-initialise-the-request_queue-correctly.patch added to -mm tree akpm

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.