All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] nvme-multipath: fix double initialization of ANA state
@ 2021-05-06 13:48 Christoph Hellwig
  2021-05-06 14:51 ` Keith Busch
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Christoph Hellwig @ 2021-05-06 13:48 UTC (permalink / raw)
  To: kbusch, sagi; +Cc: linux-nvme, Martin Wilck

nvme_init_identify and thus nvme_mpath_init can be called multiple
times and thus must not overwrite potentially initialized or in-use
fields.  Split out a helper for the basic initialization when the
controller is initialized and make sure the init_identify path does
not blindly change in-use data structures.

Fixes: 0d0b660f214d ("nvme: add ANA support")
Reported-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c      |  3 +-
 drivers/nvme/host/multipath.c | 53 ++++++++++++++++++-----------------
 drivers/nvme/host/nvme.h      |  8 ++++--
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e6612971f4eb..d896b3d170ec 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2911,7 +2911,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
 	}
 
-	ret = nvme_mpath_init(ctrl, id);
+	ret = nvme_mpath_init_identify(ctrl, id);
 	if (ret < 0)
 		goto out_free;
 
@@ -4373,6 +4373,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
 	nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+	nvme_mpath_init_ctrl(ctrl);
 
 	return 0;
 out_free_name:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0d0de3433f37..b831e80bbb76 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -778,8 +778,17 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	put_disk(head->disk);
 }
 
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
 {
+	mutex_init(&ctrl->ana_lock);
+	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
+	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
+}
+
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+	size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
+	size_t ana_log_size;
 	int error;
 
 	/* check if multipath is enabled and we have the capability */
@@ -792,37 +801,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
 	ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
 
-	mutex_init(&ctrl->ana_lock);
-	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
-	ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
-		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
-	ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
-
-	if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
+	ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
+		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
+		ctrl->max_namespaces * sizeof(__le32);
+	if (ana_log_size > max_transfer_size) {
 		dev_err(ctrl->device,
-			"ANA log page size (%zd) larger than MDTS (%d).\n",
-			ctrl->ana_log_size,
-			ctrl->max_hw_sectors << SECTOR_SHIFT);
+			"ANA log page size (%zd) larger than MDTS (%zd).\n",
+			ana_log_size, max_transfer_size);
 		dev_err(ctrl->device, "disabling ANA support.\n");
-		return 0;
+		goto out_uninit;
 	}
-
-	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
-	if (!ctrl->ana_log_buf) {
-		error = -ENOMEM;
-		goto out;
+	if (ana_log_size > ctrl->ana_log_size) {
+		nvme_mpath_stop(ctrl);
+		kfree(ctrl->ana_log_buf);
+		ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
+		if (!ctrl->ana_log_buf)
+			return -ENOMEM;
 	}
-
+	ctrl->ana_log_size = ana_log_size;
 	error = nvme_read_ana_log(ctrl);
 	if (error)
-		goto out_free_ana_log_buf;
+		goto out_uninit;
 	return 0;
-out_free_ana_log_buf:
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = NULL;
-out:
+
+out_uninit:
+	nvme_mpath_uninit(ctrl);
 	return error;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 05f31a2c64bb..0015860ec12b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
@@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 static inline void nvme_trace_bio_complete(struct request *req)
 {
 }
-static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
+static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
+{
+}
+static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
 		struct nvme_id_ctrl *id)
 {
 	if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
-- 
2.30.2


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-06 13:48 [PATCH] nvme-multipath: fix double initialization of ANA state Christoph Hellwig
@ 2021-05-06 14:51 ` Keith Busch
  2021-05-07 16:54 ` Hannes Reinecke
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Keith Busch @ 2021-05-06 14:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: sagi, linux-nvme, Martin Wilck

On Thu, May 06, 2021 at 03:48:23PM +0200, Christoph Hellwig wrote:
> nvme_init_identify and thus nvme_mpath_init can be called multiple
> times and thus must not overwrite potentially initialized or in-use
> fields.  Split out a helper for the basic initialization when the
> controller is initialized and make sure the init_identify path does
> not blindly change in-use data structures.

Looks good.

Reviewed-by: Keith Busch <kbusch@kernel.org>

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-06 13:48 [PATCH] nvme-multipath: fix double initialization of ANA state Christoph Hellwig
  2021-05-06 14:51 ` Keith Busch
@ 2021-05-07 16:54 ` Hannes Reinecke
  2021-05-07 17:16 ` Sagi Grimberg
  2021-05-12 14:53 ` Martin Wilck
  3 siblings, 0 replies; 10+ messages in thread
From: Hannes Reinecke @ 2021-05-07 16:54 UTC (permalink / raw)
  To: Christoph Hellwig, kbusch, sagi; +Cc: linux-nvme, Martin Wilck

On 5/6/21 3:48 PM, Christoph Hellwig wrote:
> nvme_init_identify and thus nvme_mpath_init can be called multiple
> times and thus must not overwrite potentially initialized or in-use
> fields.  Split out a helper for the basic initialization when the
> controller is initialized and make sure the init_identify path does
> not blindly change in-use data structures.
> 
> Fixes: 0d0b660f214d ("nvme: add ANA support")
> Reported-by: Martin Wilck <mwilck@suse.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   drivers/nvme/host/core.c      |  3 +-
>   drivers/nvme/host/multipath.c | 53 ++++++++++++++++++-----------------
>   drivers/nvme/host/nvme.h      |  8 ++++--
>   3 files changed, 36 insertions(+), 28 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-06 13:48 [PATCH] nvme-multipath: fix double initialization of ANA state Christoph Hellwig
  2021-05-06 14:51 ` Keith Busch
  2021-05-07 16:54 ` Hannes Reinecke
@ 2021-05-07 17:16 ` Sagi Grimberg
  2021-05-12 14:53 ` Martin Wilck
  3 siblings, 0 replies; 10+ messages in thread
From: Sagi Grimberg @ 2021-05-07 17:16 UTC (permalink / raw)
  To: Christoph Hellwig, kbusch; +Cc: linux-nvme, Martin Wilck

Reviewed-by: Sagi Grimberg <sagi@grimberg.me>

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-06 13:48 [PATCH] nvme-multipath: fix double initialization of ANA state Christoph Hellwig
                   ` (2 preceding siblings ...)
  2021-05-07 17:16 ` Sagi Grimberg
@ 2021-05-12 14:53 ` Martin Wilck
  2021-05-14 21:08   ` Martin Wilck
  3 siblings, 1 reply; 10+ messages in thread
From: Martin Wilck @ 2021-05-12 14:53 UTC (permalink / raw)
  To: Christoph Hellwig, kbusch, sagi; +Cc: linux-nvme

On Thu, 2021-05-06 at 15:48 +0200, Christoph Hellwig wrote:
> nvme_init_identify and thus nvme_mpath_init can be called multiple
> times and thus must not overwrite potentially initialized or in-use
> fields.  Split out a helper for the basic initialization when the
> controller is initialized and make sure the init_identify path does
> not blindly change in-use data structures.
> 
> Fixes: 0d0b660f214d ("nvme: add ANA support")
> Reported-by: Martin Wilck <mwilck@suse.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Thank you. I'll prepare another test kernel for our partner.

Martin



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-12 14:53 ` Martin Wilck
@ 2021-05-14 21:08   ` Martin Wilck
  2021-05-14 23:05     ` Sagi Grimberg
  2021-05-17 12:48     ` Christoph Hellwig
  0 siblings, 2 replies; 10+ messages in thread
From: Martin Wilck @ 2021-05-14 21:08 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nvme, Daniel Wagner, kbusch, sagi

Hello Christoph,

On Wed, 2021-05-12 at 16:53 +0200, Martin Wilck wrote:
> On Thu, 2021-05-06 at 15:48 +0200, Christoph Hellwig wrote:
> > nvme_init_identify and thus nvme_mpath_init can be called multiple
> > times and thus must not overwrite potentially initialized or in-use
> > fields.  Split out a helper for the basic initialization when the
> > controller is initialized and make sure the init_identify path does
> > not blindly change in-use data structures.
> > 
> > Fixes: 0d0b660f214d ("nvme: add ANA support")
> > Reported-by: Martin Wilck <mwilck@suse.com>
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Thank you. I'll prepare another test kernel for our partner.

Our partner reported a crash during NVMe controller initialization with
the kernel I built with this patch applied. I'm still looking at the
dump, and it's not impossible that I made a mistake backporting your
patch. But I thought I should inform you anyway.

[ 1010.869437] nvme-fabrics ctl: Failed to read smart log (error -5)
[ 1010.869444] nvme nvme0: queue_size 128 > ctrl sqsize 32, clamping down
[ 1010.879383] nvme nvme0: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 192.168.1.14:4420
[ 1010.929700] nvme nvme0: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery"
[ 1011.041659] BUG: kernel NULL pointer dereference, address: 0000000000000010
[ 1011.041665] #PF: supervisor write access in kernel mode
[ 1011.041666] #PF: error_code(0x0002) - not-present page
[ 1011.041668] PGD 0 P4D 0 
[ 1011.041672] Oops: 0002 [#1] SMP PTI
[ 1011.041675] CPU: 13 PID: 0 Comm: swapper/13 Kdump: loaded Tainted: G               X    5.3.18-6.g7ea043c-default #1 SLE15-SP2 (unreleased)
[ 1011.041678] Hardware name: FUJITSU PRIMERGY RX2530 M2/D3279-B1, BIOS V5.0.0.11 R1.20.0 for D3279-B1x                    06/15/2018
[ 1011.041689] RIP: 0010:bio_copy_kern_endio_read+0xc6/0x130
[ 1011.041691] Code: c0 75 87 8b 4e 0c 44 89 df 89 ca 81 e1 ff 0f 00 00 c1 ea 0c 29 cf 48 c1 e2 06 89 f9 48 03 16 e9 6f ff ff ff 48 8b 3e 4c 89 c5 <49> 89 38 4a 8b 7c 0e f8 4b 89 7c 08 f8 49 8d 78 08 4d 01 c8 48 83
[ 1011.041695] RSP: 0018:ffffab41804c8ee8 EFLAGS: 00010212
[ 1011.041697] RAX: 0000000000000000 RBX: ffff9ff1b73e1500 RCX: 0000000000001000
[ 1011.041699] RDX: fffff2b810ce1240 RSI: ffff9ff1b3849000 RDI: 0000000000000000
[ 1011.041701] RBP: 0000000000000010 R08: 0000000000000010 R09: 0000000000001000
[ 1011.041702] R10: 0000000000000001 R11: 0000000000001000 R12: ffff9ff168ace140
[ 1011.041703] R13: 0000000000004810 R14: 0000000000000000 R15: 0000000000000000
[ 1011.041705] FS:  0000000000000000(0000) GS:ffff9ff1ff2c0000(0000) knlGS:0000000000000000
[ 1011.041707] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1011.041708] CR2: 0000000000000010 CR3: 00000001de60a006 CR4: 00000000003606e0
[ 1011.041710] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 1011.041711] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 1011.041713] Call Trace:
[ 1011.041716]  <IRQ>
[ 1011.041722]  blk_update_request+0x8a/0x3a0
[ 1011.041726]  blk_mq_end_request+0x1a/0x130
[ 1011.041729]  blk_done_softirq+0x8f/0xc0
[ 1011.041736]  __do_softirq+0xe3/0x2dc
[ 1011.041744]  irq_exit+0xd5/0xe0
[ 1011.041747]  call_function_single_interrupt+0xf/0x20
[ 1011.041749]  </IRQ>

bio_copy_kern_endio_read() means that this was a command sent via 
__nvme_submit_sync_cmd(). I don't know yet which one.

Regards,
Martin





_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-14 21:08   ` Martin Wilck
@ 2021-05-14 23:05     ` Sagi Grimberg
  2021-05-17 12:48     ` Christoph Hellwig
  1 sibling, 0 replies; 10+ messages in thread
From: Sagi Grimberg @ 2021-05-14 23:05 UTC (permalink / raw)
  To: Martin Wilck, Christoph Hellwig; +Cc: linux-nvme, Daniel Wagner, kbusch


> 
> Our partner reported a crash during NVMe controller initialization with
> the kernel I built with this patch applied. I'm still looking at the
> dump, and it's not impossible that I made a mistake backporting your
> patch. But I thought I should inform you anyway.

Strange, the log indicates the discovery controller, which should not
even have any ANA related activity...

> 
> [ 1010.869437] nvme-fabrics ctl: Failed to read smart log (error -5)
> [ 1010.869444] nvme nvme0: queue_size 128 > ctrl sqsize 32, clamping down
> [ 1010.879383] nvme nvme0: new ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery", addr 192.168.1.14:4420
> [ 1010.929700] nvme nvme0: Removing ctrl: NQN "nqn.2014-08.org.nvmexpress.discovery"
> [ 1011.041659] BUG: kernel NULL pointer dereference, address: 0000000000000010
> [ 1011.041665] #PF: supervisor write access in kernel mode
> [ 1011.041666] #PF: error_code(0x0002) - not-present page
> [ 1011.041668] PGD 0 P4D 0
> [ 1011.041672] Oops: 0002 [#1] SMP PTI
> [ 1011.041675] CPU: 13 PID: 0 Comm: swapper/13 Kdump: loaded Tainted: G               X    5.3.18-6.g7ea043c-default #1 SLE15-SP2 (unreleased)
> [ 1011.041678] Hardware name: FUJITSU PRIMERGY RX2530 M2/D3279-B1, BIOS V5.0.0.11 R1.20.0 for D3279-B1x                    06/15/2018
> [ 1011.041689] RIP: 0010:bio_copy_kern_endio_read+0xc6/0x130
> [ 1011.041691] Code: c0 75 87 8b 4e 0c 44 89 df 89 ca 81 e1 ff 0f 00 00 c1 ea 0c 29 cf 48 c1 e2 06 89 f9 48 03 16 e9 6f ff ff ff 48 8b 3e 4c 89 c5 <49> 89 38 4a 8b 7c 0e f8 4b 89 7c 08 f8 49 8d 78 08 4d 01 c8 48 83
> [ 1011.041695] RSP: 0018:ffffab41804c8ee8 EFLAGS: 00010212
> [ 1011.041697] RAX: 0000000000000000 RBX: ffff9ff1b73e1500 RCX: 0000000000001000
> [ 1011.041699] RDX: fffff2b810ce1240 RSI: ffff9ff1b3849000 RDI: 0000000000000000
> [ 1011.041701] RBP: 0000000000000010 R08: 0000000000000010 R09: 0000000000001000
> [ 1011.041702] R10: 0000000000000001 R11: 0000000000001000 R12: ffff9ff168ace140
> [ 1011.041703] R13: 0000000000004810 R14: 0000000000000000 R15: 0000000000000000
> [ 1011.041705] FS:  0000000000000000(0000) GS:ffff9ff1ff2c0000(0000) knlGS:0000000000000000
> [ 1011.041707] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1011.041708] CR2: 0000000000000010 CR3: 00000001de60a006 CR4: 00000000003606e0
> [ 1011.041710] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [ 1011.041711] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [ 1011.041713] Call Trace:
> [ 1011.041716]  <IRQ>
> [ 1011.041722]  blk_update_request+0x8a/0x3a0
> [ 1011.041726]  blk_mq_end_request+0x1a/0x130
> [ 1011.041729]  blk_done_softirq+0x8f/0xc0
> [ 1011.041736]  __do_softirq+0xe3/0x2dc
> [ 1011.041744]  irq_exit+0xd5/0xe0
> [ 1011.041747]  call_function_single_interrupt+0xf/0x20
> [ 1011.041749]  </IRQ>
> 
> bio_copy_kern_endio_read() means that this was a command sent via
> __nvme_submit_sync_cmd(). I don't know yet which one.
> 
> Regards,
> Martin
> 
> 
> 
> 

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-14 21:08   ` Martin Wilck
  2021-05-14 23:05     ` Sagi Grimberg
@ 2021-05-17 12:48     ` Christoph Hellwig
  2021-05-17 16:47       ` Martin Wilck
  1 sibling, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2021-05-17 12:48 UTC (permalink / raw)
  To: Martin Wilck; +Cc: Christoph Hellwig, linux-nvme, Daniel Wagner, kbusch, sagi

On Fri, May 14, 2021 at 11:08:25PM +0200, Martin Wilck wrote:
> Hello Christoph,
> 
> On Wed, 2021-05-12 at 16:53 +0200, Martin Wilck wrote:
> > On Thu, 2021-05-06 at 15:48 +0200, Christoph Hellwig wrote:
> > > nvme_init_identify and thus nvme_mpath_init can be called multiple
> > > times and thus must not overwrite potentially initialized or in-use
> > > fields.  Split out a helper for the basic initialization when the
> > > controller is initialized and make sure the init_identify path does
> > > not blindly change in-use data structures.
> > > 
> > > Fixes: 0d0b660f214d ("nvme: add ANA support")
> > > Reported-by: Martin Wilck <mwilck@suse.com>
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Thank you. I'll prepare another test kernel for our partner.
> 
> Our partner reported a crash during NVMe controller initialization with
> the kernel I built with this patch applied. I'm still looking at the
> dump, and it's not impossible that I made a mistake backporting your
> patch. But I thought I should inform you anyway.

It does look weird for sure.  Did you backport
"nvmet: use new ana_log_size instead the old one" as well?  If not please
pick it up.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-17 12:48     ` Christoph Hellwig
@ 2021-05-17 16:47       ` Martin Wilck
  2021-05-17 16:49         ` Martin Wilck
  0 siblings, 1 reply; 10+ messages in thread
From: Martin Wilck @ 2021-05-17 16:47 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nvme, Daniel Wagner, kbusch, sagi

On Mon, 2021-05-17 at 14:48 +0200, Christoph Hellwig wrote:
> On Fri, May 14, 2021 at 11:08:25PM +0200, Martin Wilck wrote:
> > Hello Christoph,
> > 
> > On Wed, 2021-05-12 at 16:53 +0200, Martin Wilck wrote:
> > > On Thu, 2021-05-06 at 15:48 +0200, Christoph Hellwig wrote:
> > > > nvme_init_identify and thus nvme_mpath_init can be called
> > > > multiple
> > > > times and thus must not overwrite potentially initialized or
> > > > in-use
> > > > fields.  Split out a helper for the basic initialization when
> > > > the
> > > > controller is initialized and make sure the init_identify path
> > > > does
> > > > not blindly change in-use data structures.
> > > > 
> > > > Fixes: 0d0b660f214d ("nvme: add ANA support")
> > > > Reported-by: Martin Wilck <mwilck@suse.com>
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > 
> > > Thank you. I'll prepare another test kernel for our partner.
> > 
> > Our partner reported a crash during NVMe controller initialization
> > with
> > the kernel I built with this patch applied. I'm still looking at
> > the
> > dump, and it's not impossible that I made a mistake backporting
> > your
> > patch. But I thought I should inform you anyway.
> 
> It does look weird for sure.  Did you backport
> "nvmet: use new ana_log_size instead the old one" as well?  If not
> please
> pick it up.

I missed that one indeed, and it's the culprit.

Regards and thanks,
Martin



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] nvme-multipath: fix double initialization of ANA state
  2021-05-17 16:47       ` Martin Wilck
@ 2021-05-17 16:49         ` Martin Wilck
  0 siblings, 0 replies; 10+ messages in thread
From: Martin Wilck @ 2021-05-17 16:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-nvme, Daniel Wagner, kbusch, sagi

On Mon, 2021-05-17 at 18:47 +0200, Martin Wilck wrote:
> On Mon, 2021-05-17 at 14:48 +0200, Christoph Hellwig wrote:
> > On Fri, May 14, 2021 at 11:08:25PM +0200, Martin Wilck wrote:
> > > Hello Christoph,
> > > 
> > > On Wed, 2021-05-12 at 16:53 +0200, Martin Wilck wrote:
> > > > On Thu, 2021-05-06 at 15:48 +0200, Christoph Hellwig wrote:
> > > > > nvme_init_identify and thus nvme_mpath_init can be called
> > > > > multiple
> > > > > times and thus must not overwrite potentially initialized or
> > > > > in-use
> > > > > fields.  Split out a helper for the basic initialization when
> > > > > the
> > > > > controller is initialized and make sure the init_identify
> > > > > path
> > > > > does
> > > > > not blindly change in-use data structures.
> > > > > 
> > > > > Fixes: 0d0b660f214d ("nvme: add ANA support")
> > > > > Reported-by: Martin Wilck <mwilck@suse.com>
> > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > 
> > > > Thank you. I'll prepare another test kernel for our partner.
> > > 
> > > Our partner reported a crash during NVMe controller
> > > initialization
> > > with
> > > the kernel I built with this patch applied. I'm still looking at
> > > the
> > > dump, and it's not impossible that I made a mistake backporting
> > > your
> > > patch. But I thought I should inform you anyway.
> > 
> > It does look weird for sure.  Did you backport
> > "nvmet: use new ana_log_size instead the old one" as well?  If not
> > please
> > pick it up.
> 
> I missed that one indeed, and it's the culprit.

I wrote nonsense, sorry. 

Applying "nvmet: use new ana_log_size instead the old one" fixed the
issue for me.

Martin



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-05-17 16:49 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-06 13:48 [PATCH] nvme-multipath: fix double initialization of ANA state Christoph Hellwig
2021-05-06 14:51 ` Keith Busch
2021-05-07 16:54 ` Hannes Reinecke
2021-05-07 17:16 ` Sagi Grimberg
2021-05-12 14:53 ` Martin Wilck
2021-05-14 21:08   ` Martin Wilck
2021-05-14 23:05     ` Sagi Grimberg
2021-05-17 12:48     ` Christoph Hellwig
2021-05-17 16:47       ` Martin Wilck
2021-05-17 16:49         ` Martin Wilck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.