All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features()
@ 2015-07-21  3:30 Bob Liu
  2015-07-21  3:30 ` [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free Bob Liu
                   ` (5 more replies)
  0 siblings, 6 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21  3:30 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, linux-kernel, roger.pau, konrad.wilk, Bob Liu

There is a bug when migrate from !feature-persistent host to feature-persistent
host, because domU still think new host/backend don't support persistent.
Dmesg like:
backed has not unmapped grant: 839
backed has not unmapped grant: 773
backed has not unmapped grant: 773
backed has not unmapped grant: 773
backed has not unmapped grant: 839

The fix is to recheck feature-persistent of new backend in blkif_recover().
See: https://lkml.org/lkml/2015/5/25/469

As Roger suggested, we can split the part of blkfront_connect that checks for
optional features, like persistent grants, indirect descriptors and
flush/barrier features to a separate function and call it from both
blkfront_connect and blkif_recover

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c |  118 +++++++++++++++++++++++-------------------
 1 file changed, 66 insertions(+), 52 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 5b45ee5..e266d17 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -181,6 +181,7 @@ static DEFINE_SPINLOCK(minor_lock);
 	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
 
 static int blkfront_setup_indirect(struct blkfront_info *info);
+static void blkfront_gather_backend_features(struct blkfront_info *info);
 
 static int get_id_from_freelist(struct blkfront_info *info)
 {
@@ -1514,6 +1515,7 @@ static int blkif_recover(struct blkfront_info *info)
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
 
+	blkfront_gather_backend_features(info);
 	rc = blkfront_setup_indirect(info);
 	if (rc) {
 		kfree(copy);
@@ -1694,20 +1696,13 @@ static void blkfront_setup_discard(struct blkfront_info *info)
 
 static int blkfront_setup_indirect(struct blkfront_info *info)
 {
-	unsigned int indirect_segments, segs;
+	unsigned int segs;
 	int err, i;
 
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-max-indirect-segments", "%u", &indirect_segments,
-			    NULL);
-	if (err) {
-		info->max_indirect_segments = 0;
+	if (info->max_indirect_segments == 0)
 		segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	} else {
-		info->max_indirect_segments = min(indirect_segments,
-						  xen_blkif_max_segments);
+	else
 		segs = info->max_indirect_segments;
-	}
 
 	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
 	if (err)
@@ -1771,6 +1766,66 @@ out_of_memory:
 }
 
 /*
+ * Gather all backend feature-*
+ */
+static void blkfront_gather_backend_features(struct blkfront_info *info)
+{
+	int err;
+	int barrier, flush, discard, persistent;
+	unsigned int indirect_segments;
+
+	info->feature_flush = 0;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			"feature-barrier", "%d", &barrier,
+			NULL);
+
+	/*
+	 * If there's no "feature-barrier" defined, then it means
+	 * we're dealing with a very old backend which writes
+	 * synchronously; nothing to do.
+	 *
+	 * If there are barriers, then we use flush.
+	 */
+	if (!err && barrier)
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
+	/*
+	 * And if there is "feature-flush-cache" use that above
+	 * barriers.
+	 */
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			"feature-flush-cache", "%d", &flush,
+			NULL);
+
+	if (!err && flush)
+		info->feature_flush = REQ_FLUSH;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			"feature-discard", "%d", &discard,
+			NULL);
+
+	if (!err && discard)
+		blkfront_setup_discard(info);
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			"feature-persistent", "%u", &persistent,
+			NULL);
+	if (err)
+		info->feature_persistent = 0;
+	else
+		info->feature_persistent = persistent;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &indirect_segments,
+			    NULL);
+	if (err)
+		info->max_indirect_segments = 0;
+	else
+		info->max_indirect_segments = min(indirect_segments,
+						  xen_blkif_max_segments);
+}
+
+/*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
  */
@@ -1781,7 +1836,6 @@ static void blkfront_connect(struct blkfront_info *info)
 	unsigned int physical_sector_size;
 	unsigned int binfo;
 	int err;
-	int barrier, flush, discard, persistent;
 
 	switch (info->connected) {
 	case BLKIF_STATE_CONNECTED:
@@ -1838,47 +1892,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	if (err != 1)
 		physical_sector_size = sector_size;
 
-	info->feature_flush = 0;
-
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-barrier", "%d", &barrier,
-			    NULL);
-
-	/*
-	 * If there's no "feature-barrier" defined, then it means
-	 * we're dealing with a very old backend which writes
-	 * synchronously; nothing to do.
-	 *
-	 * If there are barriers, then we use flush.
-	 */
-	if (!err && barrier)
-		info->feature_flush = REQ_FLUSH | REQ_FUA;
-	/*
-	 * And if there is "feature-flush-cache" use that above
-	 * barriers.
-	 */
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-flush-cache", "%d", &flush,
-			    NULL);
-
-	if (!err && flush)
-		info->feature_flush = REQ_FLUSH;
-
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-discard", "%d", &discard,
-			    NULL);
-
-	if (!err && discard)
-		blkfront_setup_discard(info);
-
-	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-persistent", "%u", &persistent,
-			    NULL);
-	if (err)
-		info->feature_persistent = 0;
-	else
-		info->feature_persistent = persistent;
-
+	blkfront_gather_backend_features(info);
 	err = blkfront_setup_indirect(info);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
  2015-07-21  3:30 ` [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free Bob Liu
@ 2015-07-21  3:30 ` Bob Liu
  2015-07-21  9:25   ` Roger Pau Monné
  2015-07-21  9:25   ` Roger Pau Monné
  2015-07-21  3:30 ` [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt() Bob Liu
                   ` (3 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21  3:30 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, linux-kernel, roger.pau, konrad.wilk, Bob Liu

This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
to list info->indirect_pages in blkif_completion() no matter feature_persistent
is true or false.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e266d17..c98fcd0 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -986,7 +986,6 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	if (!list_empty(&info->indirect_pages)) {
 		struct page *indirect_page, *n;
 
-		BUG_ON(info->feature_persistent);
 		list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
 			list_del(&indirect_page->lru);
 			__free_page(indirect_page);
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
@ 2015-07-21  3:30 ` Bob Liu
  2015-07-21  3:30 ` Bob Liu
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21  3:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Bob Liu, roger.pau, david.vrabel, linux-kernel

This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
to list info->indirect_pages in blkif_completion() no matter feature_persistent
is true or false.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkfront.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e266d17..c98fcd0 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -986,7 +986,6 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	if (!list_empty(&info->indirect_pages)) {
 		struct page *indirect_page, *n;
 
-		BUG_ON(info->feature_persistent);
 		list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
 			list_del(&indirect_page->lru);
 			__free_page(indirect_page);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
  2015-07-21  3:30 ` [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free Bob Liu
  2015-07-21  3:30 ` Bob Liu
@ 2015-07-21  3:30 ` Bob Liu
  2015-07-21  9:13   ` Roger Pau Monné
  2015-07-21  9:13   ` Roger Pau Monné
  2015-07-21  3:30 ` Bob Liu
                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21  3:30 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, linux-kernel, roger.pau, konrad.wilk, Bob Liu

This BUG_ON() will be triggered when previous purge work haven't finished.
It's reasonable under pretty extreme load and should not panic the system.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkback/blkback.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index ced9677..b90ac8e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
 	pr_debug("Going to purge %u persistent grants\n", num_clean);
 
-	BUG_ON(!list_empty(&blkif->persistent_purge_list));
+	if (!list_empty(&blkif->persistent_purge_list))
+		return;
+
 	root = &blkif->persistent_gnts;
 purge_list:
 	foreach_grant_safe(persistent_gnt, n, root, node) {
-- 
1.7.10.4


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
                   ` (2 preceding siblings ...)
  2015-07-21  3:30 ` [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt() Bob Liu
@ 2015-07-21  3:30 ` Bob Liu
  2015-07-21  9:32 ` [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Roger Pau Monné
  2015-07-21  9:32 ` Roger Pau Monné
  5 siblings, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21  3:30 UTC (permalink / raw)
  To: xen-devel; +Cc: Bob Liu, roger.pau, david.vrabel, linux-kernel

This BUG_ON() will be triggered when previous purge work haven't finished.
It's reasonable under pretty extreme load and should not panic the system.

Signed-off-by: Bob Liu <bob.liu@oracle.com>
---
 drivers/block/xen-blkback/blkback.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index ced9677..b90ac8e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
 
 	pr_debug("Going to purge %u persistent grants\n", num_clean);
 
-	BUG_ON(!list_empty(&blkif->persistent_purge_list));
+	if (!list_empty(&blkif->persistent_purge_list))
+		return;
+
 	root = &blkif->persistent_gnts;
 purge_list:
 	foreach_grant_safe(persistent_gnt, n, root, node) {
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  3:30 ` [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt() Bob Liu
  2015-07-21  9:13   ` Roger Pau Monné
@ 2015-07-21  9:13   ` Roger Pau Monné
  2015-07-21 10:50     ` Bob Liu
  2015-07-21 10:50     ` Bob Liu
  1 sibling, 2 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:13 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel, konrad.wilk

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> This BUG_ON() will be triggered when previous purge work haven't finished.
> It's reasonable under pretty extreme load and should not panic the system.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> ---
>  drivers/block/xen-blkback/blkback.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> index ced9677..b90ac8e 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
>  
>  	pr_debug("Going to purge %u persistent grants\n", num_clean);
>  
> -	BUG_ON(!list_empty(&blkif->persistent_purge_list));
> +	if (!list_empty(&blkif->persistent_purge_list))
> +		return;
> +

I see the problem with this, there's a check for work_pending before
this BUG_ON, but it doesn't account if the work is currently running. I
would rather prefer to replace the work_pending check with work_busy
instead, which will also take into account if the work is still running.
The comment on work_busy however makes me nervous:

* Test whether @work is currently pending or running.  There is no
* synchronization around this function and the test result is
* unreliable and only useful as advisory hints or for debugging.

AFAICT I think it should be safe because we don't have concurrent
purge_persistent_gnt calls, but I'm no expert on Linux workqueues. It
also makes me wonder why we have such a half-baked function in the Linux
kernel.

Roger.


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  3:30 ` [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt() Bob Liu
@ 2015-07-21  9:13   ` Roger Pau Monné
  2015-07-21  9:13   ` Roger Pau Monné
  1 sibling, 0 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:13 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> This BUG_ON() will be triggered when previous purge work haven't finished.
> It's reasonable under pretty extreme load and should not panic the system.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> ---
>  drivers/block/xen-blkback/blkback.c |    4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
> index ced9677..b90ac8e 100644
> --- a/drivers/block/xen-blkback/blkback.c
> +++ b/drivers/block/xen-blkback/blkback.c
> @@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
>  
>  	pr_debug("Going to purge %u persistent grants\n", num_clean);
>  
> -	BUG_ON(!list_empty(&blkif->persistent_purge_list));
> +	if (!list_empty(&blkif->persistent_purge_list))
> +		return;
> +

I see the problem with this, there's a check for work_pending before
this BUG_ON, but it doesn't account if the work is currently running. I
would rather prefer to replace the work_pending check with work_busy
instead, which will also take into account if the work is still running.
The comment on work_busy however makes me nervous:

* Test whether @work is currently pending or running.  There is no
* synchronization around this function and the test result is
* unreliable and only useful as advisory hints or for debugging.

AFAICT I think it should be safe because we don't have concurrent
purge_persistent_gnt calls, but I'm no expert on Linux workqueues. It
also makes me wonder why we have such a half-baked function in the Linux
kernel.

Roger.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  3:30 ` Bob Liu
@ 2015-07-21  9:25   ` Roger Pau Monné
  2015-07-22  4:43     ` Bob Liu
  2015-07-22  4:43     ` Bob Liu
  2015-07-21  9:25   ` Roger Pau Monné
  1 sibling, 2 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:25 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel, konrad.wilk

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
> to list info->indirect_pages in blkif_completion() no matter feature_persistent
> is true or false.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

This was probably an oversight from when blkif_completion was changed to
check for gnttab_query_foreign_access. It should be backported to stable
trees.

Roger.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  3:30 ` Bob Liu
  2015-07-21  9:25   ` Roger Pau Monné
@ 2015-07-21  9:25   ` Roger Pau Monné
  1 sibling, 0 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:25 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
> to list info->indirect_pages in blkif_completion() no matter feature_persistent
> is true or false.
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Acked-by: Roger Pau Monné <roger.pau@citrix.com>

This was probably an oversight from when blkif_completion was changed to
check for gnttab_query_foreign_access. It should be backported to stable
trees.

Roger.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features()
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
                   ` (3 preceding siblings ...)
  2015-07-21  3:30 ` Bob Liu
@ 2015-07-21  9:32 ` Roger Pau Monné
  2015-07-21  9:32 ` Roger Pau Monné
  5 siblings, 0 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:32 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel, konrad.wilk

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> There is a bug when migrate from !feature-persistent host to feature-persistent
> host, because domU still think new host/backend don't support persistent.
> Dmesg like:
> backed has not unmapped grant: 839
> backed has not unmapped grant: 773
> backed has not unmapped grant: 773
> backed has not unmapped grant: 773
> backed has not unmapped grant: 839
> 
> The fix is to recheck feature-persistent of new backend in blkif_recover().
> See: https://lkml.org/lkml/2015/5/25/469
> 
> As Roger suggested, we can split the part of blkfront_connect that checks for
> optional features, like persistent grants, indirect descriptors and
> flush/barrier features to a separate function and call it from both
> blkfront_connect and blkif_recover
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Thanks for taking care of this. The patch looks fine, just one minor nit.

> ---
>  drivers/block/xen-blkfront.c |  118 +++++++++++++++++++++++-------------------
>  1 file changed, 66 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 5b45ee5..e266d17 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -181,6 +181,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
>  
>  static int blkfront_setup_indirect(struct blkfront_info *info);
> +static void blkfront_gather_backend_features(struct blkfront_info *info);
>  
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
> @@ -1514,6 +1515,7 @@ static int blkif_recover(struct blkfront_info *info)
>  	info->shadow_free = info->ring.req_prod_pvt;
>  	info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
>  
> +	blkfront_gather_backend_features(info);
>  	rc = blkfront_setup_indirect(info);

AFAICT you can put the call to blkfront_setup_indirect inside of
blkfront_gather_backend_features, like it's done for blkfront_setup_discard.

Roger.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features()
  2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
                   ` (4 preceding siblings ...)
  2015-07-21  9:32 ` [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Roger Pau Monné
@ 2015-07-21  9:32 ` Roger Pau Monné
  5 siblings, 0 replies; 17+ messages in thread
From: Roger Pau Monné @ 2015-07-21  9:32 UTC (permalink / raw)
  To: Bob Liu, xen-devel; +Cc: david.vrabel, linux-kernel

El 21/07/15 a les 5.30, Bob Liu ha escrit:
> There is a bug when migrate from !feature-persistent host to feature-persistent
> host, because domU still think new host/backend don't support persistent.
> Dmesg like:
> backed has not unmapped grant: 839
> backed has not unmapped grant: 773
> backed has not unmapped grant: 773
> backed has not unmapped grant: 773
> backed has not unmapped grant: 839
> 
> The fix is to recheck feature-persistent of new backend in blkif_recover().
> See: https://lkml.org/lkml/2015/5/25/469
> 
> As Roger suggested, we can split the part of blkfront_connect that checks for
> optional features, like persistent grants, indirect descriptors and
> flush/barrier features to a separate function and call it from both
> blkfront_connect and blkif_recover
> 
> Signed-off-by: Bob Liu <bob.liu@oracle.com>

Thanks for taking care of this. The patch looks fine, just one minor nit.

> ---
>  drivers/block/xen-blkfront.c |  118 +++++++++++++++++++++++-------------------
>  1 file changed, 66 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 5b45ee5..e266d17 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -181,6 +181,7 @@ static DEFINE_SPINLOCK(minor_lock);
>  	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
>  
>  static int blkfront_setup_indirect(struct blkfront_info *info);
> +static void blkfront_gather_backend_features(struct blkfront_info *info);
>  
>  static int get_id_from_freelist(struct blkfront_info *info)
>  {
> @@ -1514,6 +1515,7 @@ static int blkif_recover(struct blkfront_info *info)
>  	info->shadow_free = info->ring.req_prod_pvt;
>  	info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
>  
> +	blkfront_gather_backend_features(info);
>  	rc = blkfront_setup_indirect(info);

AFAICT you can put the call to blkfront_setup_indirect inside of
blkfront_gather_backend_features, like it's done for blkfront_setup_discard.

Roger.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  9:13   ` Roger Pau Monné
  2015-07-21 10:50     ` Bob Liu
@ 2015-07-21 10:50     ` Bob Liu
  1 sibling, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21 10:50 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel, konrad.wilk


On 07/21/2015 05:13 PM, Roger Pau Monné wrote:
> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>> This BUG_ON() will be triggered when previous purge work haven't finished.
>> It's reasonable under pretty extreme load and should not panic the system.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> ---
>>  drivers/block/xen-blkback/blkback.c |    4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
>> index ced9677..b90ac8e 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
>>  
>>  	pr_debug("Going to purge %u persistent grants\n", num_clean);
>>  
>> -	BUG_ON(!list_empty(&blkif->persistent_purge_list));
>> +	if (!list_empty(&blkif->persistent_purge_list))
>> +		return;
>> +
> 
> I see the problem with this, there's a check for work_pending before
> this BUG_ON, but it doesn't account if the work is currently running. I

Exactly.

> would rather prefer to replace the work_pending check with work_busy
> instead, which will also take into account if the work is still running.
> The comment on work_busy however makes me nervous:
> 
> * Test whether @work is currently pending or running.  There is no
> * synchronization around this function and the test result is
> * unreliable and only useful as advisory hints or for debugging.
> 
> AFAICT I think it should be safe because we don't have concurrent
> purge_persistent_gnt calls, but I'm no expert on Linux workqueues. It

Me neither, that's why I just replace this BUG_ON() with a simple return.

> also makes me wonder why we have such a half-baked function in the Linux
> kernel.
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt()
  2015-07-21  9:13   ` Roger Pau Monné
@ 2015-07-21 10:50     ` Bob Liu
  2015-07-21 10:50     ` Bob Liu
  1 sibling, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-21 10:50 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel


On 07/21/2015 05:13 PM, Roger Pau Monné wrote:
> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>> This BUG_ON() will be triggered when previous purge work haven't finished.
>> It's reasonable under pretty extreme load and should not panic the system.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>> ---
>>  drivers/block/xen-blkback/blkback.c |    4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
>> index ced9677..b90ac8e 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -394,7 +394,9 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
>>  
>>  	pr_debug("Going to purge %u persistent grants\n", num_clean);
>>  
>> -	BUG_ON(!list_empty(&blkif->persistent_purge_list));
>> +	if (!list_empty(&blkif->persistent_purge_list))
>> +		return;
>> +
> 
> I see the problem with this, there's a check for work_pending before
> this BUG_ON, but it doesn't account if the work is currently running. I

Exactly.

> would rather prefer to replace the work_pending check with work_busy
> instead, which will also take into account if the work is still running.
> The comment on work_busy however makes me nervous:
> 
> * Test whether @work is currently pending or running.  There is no
> * synchronization around this function and the test result is
> * unreliable and only useful as advisory hints or for debugging.
> 
> AFAICT I think it should be safe because we don't have concurrent
> purge_persistent_gnt calls, but I'm no expert on Linux workqueues. It

Me neither, that's why I just replace this BUG_ON() with a simple return.

> also makes me wonder why we have such a half-baked function in the Linux
> kernel.
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  9:25   ` Roger Pau Monné
  2015-07-22  4:43     ` Bob Liu
@ 2015-07-22  4:43     ` Bob Liu
  2015-07-22  5:34       ` Bob Liu
  2015-07-22  5:34       ` Bob Liu
  1 sibling, 2 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-22  4:43 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel, konrad.wilk


On 07/21/2015 05:25 PM, Roger Pau Monné wrote:
> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
>> to list info->indirect_pages in blkif_completion() no matter feature_persistent
>> is true or false.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> 
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> This was probably an oversight from when blkif_completion was changed to
> check for gnttab_query_foreign_access. It should be backported to stable
> trees.
> 

Sorry, this patch is buggy and I haven't figure out why.

general protection fault: 0000 [#1] SMP 
Modules linked in:
CPU: 0 PID: 39 Comm: xenwatch Tainted: G        W       4.1.0-rc3-00003-g718cf80-dirty #67
Hardware name: Xen HVM domU, BIOS 4.5.0-rc 11/23/2014
task: ffff880283f4eca0 ti: ffff880283fb4000 task.ti: ffff880283fb4000
RIP: 0010:[<ffffffff813d577b>]  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
RSP: 0018:ffff880283fb7c48  EFLAGS: 00010087
RAX: dead000000200200 RBX: ffff880141400000 RCX: 0000000000000000
RDX: dead000000100100 RSI: dead000000100100 RDI: ffff88028f418bb8
RBP: ffff880283fb7ca8 R08: dead000000200200 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: ffff8801414481c8
R13: dead0000001000e0 R14: ffff8801414481b8 R15: ffffea0000000000
FS:  0000000000000000(0000) GS:ffff88028f400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000001582e08 CR3: 000000013345b000 CR4: 00000000001406f0
Stack:
 ffff880023aa8420 0000000000000286 ffff880283fb7cb7 ffff880023aa8420
 ffff8800363fe240 ffffffff81862c50 ffff880283fb7ce8 ffff880023aa8440
 ffffffff81870000 ffff880023aa8400 ffff880141400000 ffff880141400008
Call Trace:
 [<ffffffff813d8e76>] blkfront_remove+0x4c/0xff
 [<ffffffff813772fa>] xenbus_dev_remove+0x76/0xb0
 [<ffffffff813bd611>] __device_release_driver+0x84/0xf8
 [<ffffffff813bd6a3>] device_release_driver+0x1e/0x2b
 [<ffffffff813bd1ef>] bus_remove_device+0x12c/0x141
 [<ffffffff813ba51d>] device_del+0x161/0x1e5
 [<ffffffff81375ef3>] ? xenbus_thread+0x239/0x239
 [<ffffffff813ba5e4>] device_unregister+0x43/0x4f
 [<ffffffff81377853>] xenbus_dev_changed+0x82/0x17f
 [<ffffffff81377566>] ? xenbus_otherend_changed+0xf0/0xff
 [<ffffffff81378d8d>] frontend_changed+0x43/0x48
 [<ffffffff81375fec>] xenwatch_thread+0xf9/0x127
 [<ffffffff81078fc4>] ? add_wait_queue+0x44/0x44
 [<ffffffff8106195b>] kthread+0xcd/0xd5
 [<ffffffff81060000>] ? alloc_pid+0xe8/0x492
 [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
 [<ffffffff81533ee2>] ret_from_fork+0x42/0x70
 [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
Code: 04 00 4c 8b 28 48 8d 78 e0 49 83 ed 20 eb 3d 48 8b 47 28 48 8b 57 20 48 be 00 01 10 00 00 00 ad de 49 b8 00 02 20 00 00 00 ad de <48> 89 42 08 48 89 10 48 89 77 20 4c 89 47 28 31 f6 e8 26 7d cf 
RIP  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
 RSP <ffff880283fb7c48>
---[ end trace 5321d7f1ef8414d0 ]---

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-21  9:25   ` Roger Pau Monné
@ 2015-07-22  4:43     ` Bob Liu
  2015-07-22  4:43     ` Bob Liu
  1 sibling, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-22  4:43 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel


On 07/21/2015 05:25 PM, Roger Pau Monné wrote:
> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
>> to list info->indirect_pages in blkif_completion() no matter feature_persistent
>> is true or false.
>>
>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
> 
> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> This was probably an oversight from when blkif_completion was changed to
> check for gnttab_query_foreign_access. It should be backported to stable
> trees.
> 

Sorry, this patch is buggy and I haven't figure out why.

general protection fault: 0000 [#1] SMP 
Modules linked in:
CPU: 0 PID: 39 Comm: xenwatch Tainted: G        W       4.1.0-rc3-00003-g718cf80-dirty #67
Hardware name: Xen HVM domU, BIOS 4.5.0-rc 11/23/2014
task: ffff880283f4eca0 ti: ffff880283fb4000 task.ti: ffff880283fb4000
RIP: 0010:[<ffffffff813d577b>]  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
RSP: 0018:ffff880283fb7c48  EFLAGS: 00010087
RAX: dead000000200200 RBX: ffff880141400000 RCX: 0000000000000000
RDX: dead000000100100 RSI: dead000000100100 RDI: ffff88028f418bb8
RBP: ffff880283fb7ca8 R08: dead000000200200 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: ffff8801414481c8
R13: dead0000001000e0 R14: ffff8801414481b8 R15: ffffea0000000000
FS:  0000000000000000(0000) GS:ffff88028f400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000001582e08 CR3: 000000013345b000 CR4: 00000000001406f0
Stack:
 ffff880023aa8420 0000000000000286 ffff880283fb7cb7 ffff880023aa8420
 ffff8800363fe240 ffffffff81862c50 ffff880283fb7ce8 ffff880023aa8440
 ffffffff81870000 ffff880023aa8400 ffff880141400000 ffff880141400008
Call Trace:
 [<ffffffff813d8e76>] blkfront_remove+0x4c/0xff
 [<ffffffff813772fa>] xenbus_dev_remove+0x76/0xb0
 [<ffffffff813bd611>] __device_release_driver+0x84/0xf8
 [<ffffffff813bd6a3>] device_release_driver+0x1e/0x2b
 [<ffffffff813bd1ef>] bus_remove_device+0x12c/0x141
 [<ffffffff813ba51d>] device_del+0x161/0x1e5
 [<ffffffff81375ef3>] ? xenbus_thread+0x239/0x239
 [<ffffffff813ba5e4>] device_unregister+0x43/0x4f
 [<ffffffff81377853>] xenbus_dev_changed+0x82/0x17f
 [<ffffffff81377566>] ? xenbus_otherend_changed+0xf0/0xff
 [<ffffffff81378d8d>] frontend_changed+0x43/0x48
 [<ffffffff81375fec>] xenwatch_thread+0xf9/0x127
 [<ffffffff81078fc4>] ? add_wait_queue+0x44/0x44
 [<ffffffff8106195b>] kthread+0xcd/0xd5
 [<ffffffff81060000>] ? alloc_pid+0xe8/0x492
 [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
 [<ffffffff81533ee2>] ret_from_fork+0x42/0x70
 [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
Code: 04 00 4c 8b 28 48 8d 78 e0 49 83 ed 20 eb 3d 48 8b 47 28 48 8b 57 20 48 be 00 01 10 00 00 00 ad de 49 b8 00 02 20 00 00 00 ad de <48> 89 42 08 48 89 10 48 89 77 20 4c 89 47 28 31 f6 e8 26 7d cf 
RIP  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
 RSP <ffff880283fb7c48>
---[ end trace 5321d7f1ef8414d0 ]---

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-22  4:43     ` Bob Liu
  2015-07-22  5:34       ` Bob Liu
@ 2015-07-22  5:34       ` Bob Liu
  1 sibling, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-22  5:34 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel, konrad.wilk



On 07/22/2015 12:43 PM, Bob Liu wrote:
> 
> On 07/21/2015 05:25 PM, Roger Pau Monné wrote:
>> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>>> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
>>> to list info->indirect_pages in blkif_completion() no matter feature_persistent
>>> is true or false.
>>>
>>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>>
>> This was probably an oversight from when blkif_completion was changed to
>> check for gnttab_query_foreign_access. It should be backported to stable
>> trees.
>>
> 
> Sorry, this patch is buggy and I haven't figure out why.
> 
> general protection fault: 0000 [#1] SMP 
> Modules linked in:
> CPU: 0 PID: 39 Comm: xenwatch Tainted: G        W       4.1.0-rc3-00003-g718cf80-dirty #67
> Hardware name: Xen HVM domU, BIOS 4.5.0-rc 11/23/2014
> task: ffff880283f4eca0 ti: ffff880283fb4000 task.ti: ffff880283fb4000
> RIP: 0010:[<ffffffff813d577b>]  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
> RSP: 0018:ffff880283fb7c48  EFLAGS: 00010087
> RAX: dead000000200200 RBX: ffff880141400000 RCX: 0000000000000000
> RDX: dead000000100100 RSI: dead000000100100 RDI: ffff88028f418bb8
> RBP: ffff880283fb7ca8 R08: dead000000200200 R09: 0000000000000001
> R10: 0000000000000001 R11: 0000000000000000 R12: ffff8801414481c8
> R13: dead0000001000e0 R14: ffff8801414481b8 R15: ffffea0000000000
> FS:  0000000000000000(0000) GS:ffff88028f400000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000001582e08 CR3: 000000013345b000 CR4: 00000000001406f0
> Stack:
>  ffff880023aa8420 0000000000000286 ffff880283fb7cb7 ffff880023aa8420
>  ffff8800363fe240 ffffffff81862c50 ffff880283fb7ce8 ffff880023aa8440
>  ffffffff81870000 ffff880023aa8400 ffff880141400000 ffff880141400008
> Call Trace:
>  [<ffffffff813d8e76>] blkfront_remove+0x4c/0xff
>  [<ffffffff813772fa>] xenbus_dev_remove+0x76/0xb0
>  [<ffffffff813bd611>] __device_release_driver+0x84/0xf8
>  [<ffffffff813bd6a3>] device_release_driver+0x1e/0x2b
>  [<ffffffff813bd1ef>] bus_remove_device+0x12c/0x141
>  [<ffffffff813ba51d>] device_del+0x161/0x1e5
>  [<ffffffff81375ef3>] ? xenbus_thread+0x239/0x239
>  [<ffffffff813ba5e4>] device_unregister+0x43/0x4f
>  [<ffffffff81377853>] xenbus_dev_changed+0x82/0x17f
>  [<ffffffff81377566>] ? xenbus_otherend_changed+0xf0/0xff
>  [<ffffffff81378d8d>] frontend_changed+0x43/0x48
>  [<ffffffff81375fec>] xenwatch_thread+0xf9/0x127
>  [<ffffffff81078fc4>] ? add_wait_queue+0x44/0x44
>  [<ffffffff8106195b>] kthread+0xcd/0xd5
>  [<ffffffff81060000>] ? alloc_pid+0xe8/0x492
>  [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
>  [<ffffffff81533ee2>] ret_from_fork+0x42/0x70
>  [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
> Code: 04 00 4c 8b 28 48 8d 78 e0 49 83 ed 20 eb 3d 48 8b 47 28 48 8b 57 20 48 be 00 01 10 00 00 00 ad de 49 b8 00 02 20 00 00 00 ad de <48> 89 42 08 48 89 10 48 89 77 20 4c 89 47 28 31 f6 e8 26 7d cf 
> RIP  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
>  RSP <ffff880283fb7c48>
> ---[ end trace 5321d7f1ef8414d0 ]---
> 

The right fix should be:

--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1124,8 +1124,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                                 * Add the used indirect page back to the list of
                                 * available pages for indirect grefs.
                                 */
-                               indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
-                               list_add(&indirect_page->lru, &info->indirect_pages);
+                               if (!info->feature_persistent) {
+                                       indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
+                                       list_add(&indirect_page->lru, &info->indirect_pages);
+                               }
                                s->indirect_grants[i]->gref = GRANT_INVALID_REF;
                                list_add_tail(&s->indirect_grants[i]->node, &info->grants);
                        }

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free
  2015-07-22  4:43     ` Bob Liu
@ 2015-07-22  5:34       ` Bob Liu
  2015-07-22  5:34       ` Bob Liu
  1 sibling, 0 replies; 17+ messages in thread
From: Bob Liu @ 2015-07-22  5:34 UTC (permalink / raw)
  To: Roger Pau Monné; +Cc: xen-devel, david.vrabel, linux-kernel



On 07/22/2015 12:43 PM, Bob Liu wrote:
> 
> On 07/21/2015 05:25 PM, Roger Pau Monné wrote:
>> El 21/07/15 a les 5.30, Bob Liu ha escrit:
>>> This BUG_ON() in blkif_free() is incorrect, because indirect page can be added
>>> to list info->indirect_pages in blkif_completion() no matter feature_persistent
>>> is true or false.
>>>
>>> Signed-off-by: Bob Liu <bob.liu@oracle.com>
>>
>> Acked-by: Roger Pau Monné <roger.pau@citrix.com>
>>
>> This was probably an oversight from when blkif_completion was changed to
>> check for gnttab_query_foreign_access. It should be backported to stable
>> trees.
>>
> 
> Sorry, this patch is buggy and I haven't figure out why.
> 
> general protection fault: 0000 [#1] SMP 
> Modules linked in:
> CPU: 0 PID: 39 Comm: xenwatch Tainted: G        W       4.1.0-rc3-00003-g718cf80-dirty #67
> Hardware name: Xen HVM domU, BIOS 4.5.0-rc 11/23/2014
> task: ffff880283f4eca0 ti: ffff880283fb4000 task.ti: ffff880283fb4000
> RIP: 0010:[<ffffffff813d577b>]  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
> RSP: 0018:ffff880283fb7c48  EFLAGS: 00010087
> RAX: dead000000200200 RBX: ffff880141400000 RCX: 0000000000000000
> RDX: dead000000100100 RSI: dead000000100100 RDI: ffff88028f418bb8
> RBP: ffff880283fb7ca8 R08: dead000000200200 R09: 0000000000000001
> R10: 0000000000000001 R11: 0000000000000000 R12: ffff8801414481c8
> R13: dead0000001000e0 R14: ffff8801414481b8 R15: ffffea0000000000
> FS:  0000000000000000(0000) GS:ffff88028f400000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000001582e08 CR3: 000000013345b000 CR4: 00000000001406f0
> Stack:
>  ffff880023aa8420 0000000000000286 ffff880283fb7cb7 ffff880023aa8420
>  ffff8800363fe240 ffffffff81862c50 ffff880283fb7ce8 ffff880023aa8440
>  ffffffff81870000 ffff880023aa8400 ffff880141400000 ffff880141400008
> Call Trace:
>  [<ffffffff813d8e76>] blkfront_remove+0x4c/0xff
>  [<ffffffff813772fa>] xenbus_dev_remove+0x76/0xb0
>  [<ffffffff813bd611>] __device_release_driver+0x84/0xf8
>  [<ffffffff813bd6a3>] device_release_driver+0x1e/0x2b
>  [<ffffffff813bd1ef>] bus_remove_device+0x12c/0x141
>  [<ffffffff813ba51d>] device_del+0x161/0x1e5
>  [<ffffffff81375ef3>] ? xenbus_thread+0x239/0x239
>  [<ffffffff813ba5e4>] device_unregister+0x43/0x4f
>  [<ffffffff81377853>] xenbus_dev_changed+0x82/0x17f
>  [<ffffffff81377566>] ? xenbus_otherend_changed+0xf0/0xff
>  [<ffffffff81378d8d>] frontend_changed+0x43/0x48
>  [<ffffffff81375fec>] xenwatch_thread+0xf9/0x127
>  [<ffffffff81078fc4>] ? add_wait_queue+0x44/0x44
>  [<ffffffff8106195b>] kthread+0xcd/0xd5
>  [<ffffffff81060000>] ? alloc_pid+0xe8/0x492
>  [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
>  [<ffffffff81533ee2>] ret_from_fork+0x42/0x70
>  [<ffffffff8106188e>] ? kthread_freezable_should_stop+0x48/0x48
> Code: 04 00 4c 8b 28 48 8d 78 e0 49 83 ed 20 eb 3d 48 8b 47 28 48 8b 57 20 48 be 00 01 10 00 00 00 ad de 49 b8 00 02 20 00 00 00 ad de <48> 89 42 08 48 89 10 48 89 77 20 4c 89 47 28 31 f6 e8 26 7d cf 
> RIP  [<ffffffff813d577b>] blkif_free+0x162/0x5a9
>  RSP <ffff880283fb7c48>
> ---[ end trace 5321d7f1ef8414d0 ]---
> 

The right fix should be:

--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1124,8 +1124,10 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
                                 * Add the used indirect page back to the list of
                                 * available pages for indirect grefs.
                                 */
-                               indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
-                               list_add(&indirect_page->lru, &info->indirect_pages);
+                               if (!info->feature_persistent) {
+                                       indirect_page = pfn_to_page(s->indirect_grants[i]->pfn);
+                                       list_add(&indirect_page->lru, &info->indirect_pages);
+                               }
                                s->indirect_grants[i]->gref = GRANT_INVALID_REF;
                                list_add_tail(&s->indirect_grants[i]->node, &info->grants);
                        }

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2015-07-22  5:34 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-21  3:30 [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Bob Liu
2015-07-21  3:30 ` [PATCH 2/3] xen-blkfront: rm BUG_ON(info->feature_persistent) in blkif_free Bob Liu
2015-07-21  3:30 ` Bob Liu
2015-07-21  9:25   ` Roger Pau Monné
2015-07-22  4:43     ` Bob Liu
2015-07-22  4:43     ` Bob Liu
2015-07-22  5:34       ` Bob Liu
2015-07-22  5:34       ` Bob Liu
2015-07-21  9:25   ` Roger Pau Monné
2015-07-21  3:30 ` [PATCH 3/3] xen-blkback: rm BUG_ON() in purge_persistent_gnt() Bob Liu
2015-07-21  9:13   ` Roger Pau Monné
2015-07-21  9:13   ` Roger Pau Monné
2015-07-21 10:50     ` Bob Liu
2015-07-21 10:50     ` Bob Liu
2015-07-21  3:30 ` Bob Liu
2015-07-21  9:32 ` [PATCH 1/3] xen-blkfront: introduce blkfront_gather_backend_features() Roger Pau Monné
2015-07-21  9:32 ` Roger Pau Monné

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.