linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] nvme-multipath: fix sysfs dangerously created links
@ 2018-02-26  8:51 baegjae
  2018-02-26 16:24 ` Keith Busch
  0 siblings, 1 reply; 3+ messages in thread
From: baegjae @ 2018-02-26  8:51 UTC (permalink / raw)
  To: keith.busch, axboe, hch, sagi, baegjae; +Cc: linux-nvme, linux-kernel

From: Baegjae Sung <baegjae@gmail.com>

If multipathing is enabled, each NVMe subsystem creates a head
namespace (e.g., nvme0n1) and multiple private namespaces
(e.g., nvme0c0n1 and nvme0c1n1) in sysfs. When creating links for
private namespaces, links of head namespace are used, so the
namespace creation order must be followed (e.g., nvme0n1 ->
nvme0c1n1). If the order is not followed, links of sysfs will be
incomplete or kernel panic will occur.

The kernel panic was:
  kernel BUG at fs/sysfs/symlink.c:27!
  Call Trace:
    nvme_mpath_add_disk_links+0x5d/0x80 [nvme_core]
    nvme_validate_ns+0x5c2/0x850 [nvme_core]
    nvme_scan_work+0x1af/0x2d0 [nvme_core]

Correct order
Context A     Context B
nvme0n1
nvme0c0n1     nvme0c1n1

Incorrect order
Context A     Context B
              nvme0c1n1
nvme0n1
nvme0c0n1

The function of a head namespace creation is moved to maintain the
correct order. We verified the code with or without multipathing
using three vendors of dual-port NVMe SSDs.

Signed-off-by: Baegjae Sung <baegjae@gmail.com>
---
 drivers/nvme/host/core.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0fe7ea35c221..28777b7352a5 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2844,7 +2844,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
 }
 
 static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
-		struct nvme_id_ns *id, bool *new)
+		struct nvme_id_ns *id)
 {
 	struct nvme_ctrl *ctrl = ns->ctrl;
 	bool is_shared = id->nmic & (1 << 0);
@@ -2860,8 +2860,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 			ret = PTR_ERR(head);
 			goto out_unlock;
 		}
-
-		*new = true;
+		nvme_mpath_add_disk(head);
 	} else {
 		struct nvme_ns_ids ids;
 
@@ -2873,8 +2872,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
 			ret = -EINVAL;
 			goto out_unlock;
 		}
-
-		*new = false;
 	}
 
 	list_add_tail(&ns->siblings, &head->list);
@@ -2945,7 +2942,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	struct nvme_id_ns *id;
 	char disk_name[DISK_NAME_LEN];
 	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
-	bool new = true;
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
@@ -2971,7 +2967,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	if (id->ncap == 0)
 		goto out_free_id;
 
-	if (nvme_init_ns_head(ns, nsid, id, &new))
+	if (nvme_init_ns_head(ns, nsid, id))
 		goto out_free_id;
 	nvme_setup_streams_ns(ctrl, ns);
 	
@@ -3037,8 +3033,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
 			ns->disk->disk_name);
 
-	if (new)
-		nvme_mpath_add_disk(ns->head);
 	nvme_mpath_add_disk_links(ns);
 	return;
  out_unlink_ns:
-- 
2.16.2

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme-multipath: fix sysfs dangerously created links
  2018-02-26  8:51 [PATCH] nvme-multipath: fix sysfs dangerously created links baegjae
@ 2018-02-26 16:24 ` Keith Busch
  2018-02-27  4:42   ` Baegjae Sung
  0 siblings, 1 reply; 3+ messages in thread
From: Keith Busch @ 2018-02-26 16:24 UTC (permalink / raw)
  To: baegjae; +Cc: axboe, hch, sagi, linux-nvme, linux-kernel

On Mon, Feb 26, 2018 at 05:51:23PM +0900, baegjae@gmail.com wrote:
> From: Baegjae Sung <baegjae@gmail.com>
> 
> If multipathing is enabled, each NVMe subsystem creates a head
> namespace (e.g., nvme0n1) and multiple private namespaces
> (e.g., nvme0c0n1 and nvme0c1n1) in sysfs. When creating links for
> private namespaces, links of head namespace are used, so the
> namespace creation order must be followed (e.g., nvme0n1 ->
> nvme0c1n1). If the order is not followed, links of sysfs will be
> incomplete or kernel panic will occur.
> 
> The kernel panic was:
>   kernel BUG at fs/sysfs/symlink.c:27!
>   Call Trace:
>     nvme_mpath_add_disk_links+0x5d/0x80 [nvme_core]
>     nvme_validate_ns+0x5c2/0x850 [nvme_core]
>     nvme_scan_work+0x1af/0x2d0 [nvme_core]
> 
> Correct order
> Context A     Context B
> nvme0n1
> nvme0c0n1     nvme0c1n1
> 
> Incorrect order
> Context A     Context B
>               nvme0c1n1
> nvme0n1
> nvme0c0n1
> 
> The function of a head namespace creation is moved to maintain the
> correct order. We verified the code with or without multipathing
> using three vendors of dual-port NVMe SSDs.
> 
> Signed-off-by: Baegjae Sung <baegjae@gmail.com>

Thanks, I see what you mean on the potential ordering problem here.
Calling nvme_mpath_add_disk, though, before the 'head' has any namespace
paths available looks like you'll get a lot of 'no path available'
warnings during the bring-up. It should resolve itself shortly after,
but the warnings will be a bit alarming, right?

> ---
>  drivers/nvme/host/core.c | 12 +++---------
>  1 file changed, 3 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 0fe7ea35c221..28777b7352a5 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2844,7 +2844,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
>  }
>  
>  static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
> -		struct nvme_id_ns *id, bool *new)
> +		struct nvme_id_ns *id)
>  {
>  	struct nvme_ctrl *ctrl = ns->ctrl;
>  	bool is_shared = id->nmic & (1 << 0);
> @@ -2860,8 +2860,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
>  			ret = PTR_ERR(head);
>  			goto out_unlock;
>  		}
> -
> -		*new = true;
> +		nvme_mpath_add_disk(head);
>  	} else {
>  		struct nvme_ns_ids ids;
>  
> @@ -2873,8 +2872,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
>  			ret = -EINVAL;
>  			goto out_unlock;
>  		}
> -
> -		*new = false;
>  	}
>  
>  	list_add_tail(&ns->siblings, &head->list);
> @@ -2945,7 +2942,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  	struct nvme_id_ns *id;
>  	char disk_name[DISK_NAME_LEN];
>  	int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
> -	bool new = true;
>  
>  	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
>  	if (!ns)
> @@ -2971,7 +2967,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  	if (id->ncap == 0)
>  		goto out_free_id;
>  
> -	if (nvme_init_ns_head(ns, nsid, id, &new))
> +	if (nvme_init_ns_head(ns, nsid, id))
>  		goto out_free_id;
>  	nvme_setup_streams_ns(ctrl, ns);
>  	
> @@ -3037,8 +3033,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  		pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
>  			ns->disk->disk_name);
>  
> -	if (new)
> -		nvme_mpath_add_disk(ns->head);
>  	nvme_mpath_add_disk_links(ns);
>  	return;
>   out_unlink_ns:
> -- 
> 2.16.2
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] nvme-multipath: fix sysfs dangerously created links
  2018-02-26 16:24 ` Keith Busch
@ 2018-02-27  4:42   ` Baegjae Sung
  0 siblings, 0 replies; 3+ messages in thread
From: Baegjae Sung @ 2018-02-27  4:42 UTC (permalink / raw)
  To: Keith Busch; +Cc: axboe, hch, sagi, linux-nvme, linux-kernel

2018-02-27 1:24 GMT+09:00 Keith Busch <keith.busch@intel.com>:
> On Mon, Feb 26, 2018 at 05:51:23PM +0900, baegjae@gmail.com wrote:
>> From: Baegjae Sung <baegjae@gmail.com>
>>
>> If multipathing is enabled, each NVMe subsystem creates a head
>> namespace (e.g., nvme0n1) and multiple private namespaces
>> (e.g., nvme0c0n1 and nvme0c1n1) in sysfs. When creating links for
>> private namespaces, links of head namespace are used, so the
>> namespace creation order must be followed (e.g., nvme0n1 ->
>> nvme0c1n1). If the order is not followed, links of sysfs will be
>> incomplete or kernel panic will occur.
>>
>> The kernel panic was:
>>   kernel BUG at fs/sysfs/symlink.c:27!
>>   Call Trace:
>>     nvme_mpath_add_disk_links+0x5d/0x80 [nvme_core]
>>     nvme_validate_ns+0x5c2/0x850 [nvme_core]
>>     nvme_scan_work+0x1af/0x2d0 [nvme_core]
>>
>> Correct order
>> Context A     Context B
>> nvme0n1
>> nvme0c0n1     nvme0c1n1
>>
>> Incorrect order
>> Context A     Context B
>>               nvme0c1n1
>> nvme0n1
>> nvme0c0n1
>>
>> The function of a head namespace creation is moved to maintain the
>> correct order. We verified the code with or without multipathing
>> using three vendors of dual-port NVMe SSDs.
>>
>> Signed-off-by: Baegjae Sung <baegjae@gmail.com>
>
> Thanks, I see what you mean on the potential ordering problem here.
> Calling nvme_mpath_add_disk, though, before the 'head' has any namespace
> paths available looks like you'll get a lot of 'no path available'
> warnings during the bring-up. It should resolve itself shortly after,
> but the warnings will be a bit alarming, right?

Thanks for reply.
I agree what you concern about temporary 'no path available' warnings.
That was why nvme_mpath_add_disk and nvme_mpath_add_disk_links were
coded together.
I will send you a new patch that reflects your concerns.

>
>> ---
>>  drivers/nvme/host/core.c | 12 +++---------
>>  1 file changed, 3 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 0fe7ea35c221..28777b7352a5 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -2844,7 +2844,7 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
>>  }
>>
>>  static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
>> -             struct nvme_id_ns *id, bool *new)
>> +             struct nvme_id_ns *id)
>>  {
>>       struct nvme_ctrl *ctrl = ns->ctrl;
>>       bool is_shared = id->nmic & (1 << 0);
>> @@ -2860,8 +2860,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
>>                       ret = PTR_ERR(head);
>>                       goto out_unlock;
>>               }
>> -
>> -             *new = true;
>> +             nvme_mpath_add_disk(head);
>>       } else {
>>               struct nvme_ns_ids ids;
>>
>> @@ -2873,8 +2872,6 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
>>                       ret = -EINVAL;
>>                       goto out_unlock;
>>               }
>> -
>> -             *new = false;
>>       }
>>
>>       list_add_tail(&ns->siblings, &head->list);
>> @@ -2945,7 +2942,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>>       struct nvme_id_ns *id;
>>       char disk_name[DISK_NAME_LEN];
>>       int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
>> -     bool new = true;
>>
>>       ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
>>       if (!ns)
>> @@ -2971,7 +2967,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>>       if (id->ncap == 0)
>>               goto out_free_id;
>>
>> -     if (nvme_init_ns_head(ns, nsid, id, &new))
>> +     if (nvme_init_ns_head(ns, nsid, id))
>>               goto out_free_id;
>>       nvme_setup_streams_ns(ctrl, ns);
>>
>> @@ -3037,8 +3033,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>>               pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
>>                       ns->disk->disk_name);
>>
>> -     if (new)
>> -             nvme_mpath_add_disk(ns->head);
>>       nvme_mpath_add_disk_links(ns);
>>       return;
>>   out_unlink_ns:
>> --
>> 2.16.2
>>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-02-27  4:42 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-26  8:51 [PATCH] nvme-multipath: fix sysfs dangerously created links baegjae
2018-02-26 16:24 ` Keith Busch
2018-02-27  4:42   ` Baegjae Sung

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).