Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
       [not found] <20210415192848.962891-1-nitram_67@hotmail.com>
@ 2021-04-15 19:28 ` Martin Belanger
  2021-05-01 11:34   ` Hannes Reinecke
  2021-05-04 19:56   ` Sagi Grimberg
  0 siblings, 2 replies; 23+ messages in thread
From: Martin Belanger @ 2021-04-15 19:28 UTC (permalink / raw)
  To: linux-nvme; +Cc: kbusch, axboe, hch, sagi, Martin Belanger

From: Martin Belanger <martin.belanger@dell.com>

---
 drivers/nvme/host/core.c    |  5 +++++
 drivers/nvme/host/fabrics.c | 14 +++++++++++++
 drivers/nvme/host/fabrics.h |  6 +++++-
 drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
 4 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 288ac47ff5b4..91ae11a1ae26 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 		ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
 				opts->host_traddr ?: "none");
+		if (ret)
+			return ret;
+
+		ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
+				opts->host_triface ?: "none");
 	}
 	return ret;
 }
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 604ab0e5a2ad..f5d0d760b53b 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
 		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
 				(len) ? "," : "", ctrl->opts->host_traddr);
+	if (ctrl->opts->mask & NVMF_OPT_HOST_TRIFACE)
+		len += scnprintf(buf + len, size - len, "%shost_triface=%s",
+				(len) ? "," : "", ctrl->opts->host_triface);
 	len += scnprintf(buf + len, size - len, "\n");
 
 	return len;
@@ -604,6 +607,7 @@ static const match_table_t opt_tokens = {
 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
+	{ NVMF_OPT_HOST_TRIFACE,	"host_triface=%s"	},
 	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
 	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
 	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
@@ -813,6 +817,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
 			kfree(opts->host_traddr);
 			opts->host_traddr = p;
 			break;
+		case NVMF_OPT_HOST_TRIFACE:
+			p = match_strdup(args);
+			if (!p) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			kfree(opts->host_triface);
+			opts->host_triface = p;
+			break;
 		case NVMF_OPT_HOST_ID:
 			p = match_strdup(args);
 			if (!p) {
@@ -997,6 +1010,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
 	kfree(opts->trsvcid);
 	kfree(opts->subsysnqn);
 	kfree(opts->host_traddr);
+	kfree(opts->host_triface);
 	kfree(opts);
 }
 EXPORT_SYMBOL_GPL(nvmf_free_options);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 733010d2eafd..17c64ff4db8c 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -59,6 +59,7 @@ enum {
 	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
 	NVMF_OPT_TOS		= 1 << 19,
 	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
+	NVMF_OPT_HOST_TRIFACE	= 1 << 21,
 };
 
 /**
@@ -76,7 +77,9 @@ enum {
  * @trsvcid:	The transport-specific TRSVCID field for a port on the
  *              subsystem which is adding a controller.
  * @host_traddr: A transport-specific field identifying the NVME host port
- *              to use for the connection to the controller.
+ *		to use for the connection to the controller.
+ * @host_triface: A transport-specific field identifying the NVME host
+ *		interface to use for the connection to the controller.
  * @queue_size: Number of IO queue elements.
  * @nr_io_queues: Number of controller IO queues that will be established.
  * @reconnect_delay: Time between two consecutive reconnect attempts.
@@ -101,6 +104,7 @@ struct nvmf_ctrl_options {
 	char			*traddr;
 	char			*trsvcid;
 	char			*host_traddr;
+	char			*host_triface;
 	size_t			queue_size;
 	unsigned int		nr_io_queues;
 	unsigned int		reconnect_delay;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8e55d8bc0c50..28eb7f88b487 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1447,6 +1447,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 		}
 	}
 
+	if (nctrl->opts->mask & NVMF_OPT_HOST_TRIFACE) {
+		char *iface = nctrl->opts->host_triface;
+		sockptr_t optval = KERNEL_SOCKPTR(iface);
+
+		ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
+				      optval, strlen(iface));
+		if (ret) {
+			dev_err(nctrl->device,
+			  "failed to bind to interface %s queue %d err %d\n",
+			  iface, qid, ret);
+			goto err_sock;
+		}
+	}
+
 	queue->hdr_digest = nctrl->opts->hdr_digest;
 	queue->data_digest = nctrl->opts->data_digest;
 	if (queue->hdr_digest || queue->data_digest) {
@@ -2457,6 +2471,10 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
 static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 		struct nvmf_ctrl_options *opts)
 {
+	const char *iface_key = "";
+	const char *iface_val = "";
+	const char *srce_key  = "";
+	const char *srce_val  = "";
 	struct nvme_tcp_ctrl *ctrl;
 	int ret;
 
@@ -2502,6 +2520,22 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 			       opts->host_traddr);
 			goto out_free_ctrl;
 		}
+		srce_key = ", src-addr ";
+		srce_val = opts->host_traddr;
+	}
+
+	if (opts->mask & NVMF_OPT_HOST_TRIFACE) {
+		struct net_device *ndev;
+
+		ndev = dev_get_by_name(&init_net, opts->host_triface);
+		if (!ndev) {
+			pr_err("invalid interface passed: %s\n",
+			       opts->host_triface);
+			ret = -ENODEV;
+			goto out_free_ctrl;
+		}
+		iface_key = ", iface ";
+		iface_val = opts->host_triface;
 	}
 
 	if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
@@ -2530,8 +2564,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
 	if (ret)
 		goto out_uninit_ctrl;
 
-	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
-		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp%s%s%s%s\n",
+		 ctrl->ctrl.opts->subsysnqn, &ctrl->addr,
+		 srce_key, srce_val, iface_key, iface_val);
 
 	mutex_lock(&nvme_tcp_ctrl_mutex);
 	list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
@@ -2560,7 +2595,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
 			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
 			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
-			  NVMF_OPT_TOS,
+			  NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
 	.create_ctrl	= nvme_tcp_create_ctrl,
 };
 
-- 
2.25.1


_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-04-15 19:28 ` [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting Martin Belanger
@ 2021-05-01 11:34   ` Hannes Reinecke
  2021-05-03 16:59     ` Belanger, Martin
  2021-05-04 19:56   ` Sagi Grimberg
  1 sibling, 1 reply; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-01 11:34 UTC (permalink / raw)
  To: Martin Belanger, linux-nvme; +Cc: kbusch, axboe, hch, sagi, Martin Belanger

On 4/15/21 9:28 PM, Martin Belanger wrote:
> From: Martin Belanger <martin.belanger@dell.com>
> 
Please fix up the subject and description.

> ---
>   drivers/nvme/host/core.c    |  5 +++++
>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
>   drivers/nvme/host/fabrics.h |  6 +++++-
>   drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
>   4 files changed, 62 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 288ac47ff5b4..91ae11a1ae26 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
>   
>   		ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>   				opts->host_traddr ?: "none");
> +		if (ret)
> +			return ret;
> +
> +		ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> +				opts->host_triface ?: "none");
>   	}
>   	return ret;
>   }

Why not simply 'host_iface' ? 'triface' is a bit awkward.

> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 604ab0e5a2ad..f5d0d760b53b 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
>   	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
>   		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
>   				(len) ? "," : "", ctrl->opts->host_traddr);
> +	if (ctrl->opts->mask & NVMF_OPT_HOST_TRIFACE)
> +		len += scnprintf(buf + len, size - len, "%shost_triface=%s",
> +				(len) ? "," : "", ctrl->opts->host_triface);
>   	len += scnprintf(buf + len, size - len, "\n");
>   
>   	return len;
> @@ -604,6 +607,7 @@ static const match_table_t opt_tokens = {
>   	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
>   	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
>   	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
> +	{ NVMF_OPT_HOST_TRIFACE,	"host_triface=%s"	},
>   	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
>   	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
>   	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
> @@ -813,6 +817,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   			kfree(opts->host_traddr);
>   			opts->host_traddr = p;
>   			break;
> +		case NVMF_OPT_HOST_TRIFACE:
> +			p = match_strdup(args);
> +			if (!p) {
> +				ret = -ENOMEM;
> +				goto out;
> +			}
> +			kfree(opts->host_triface);
> +			opts->host_triface = p;
> +			break;
>   		case NVMF_OPT_HOST_ID:
>   			p = match_strdup(args);
>   			if (!p) {
> @@ -997,6 +1010,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
>   	kfree(opts->trsvcid);
>   	kfree(opts->subsysnqn);
>   	kfree(opts->host_traddr);
> +	kfree(opts->host_triface);
>   	kfree(opts);
>   }
>   EXPORT_SYMBOL_GPL(nvmf_free_options);
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index 733010d2eafd..17c64ff4db8c 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -59,6 +59,7 @@ enum {
>   	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>   	NVMF_OPT_TOS		= 1 << 19,
>   	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
> +	NVMF_OPT_HOST_TRIFACE	= 1 << 21,
>   };
>   
>   /**
> @@ -76,7 +77,9 @@ enum {
>    * @trsvcid:	The transport-specific TRSVCID field for a port on the
>    *              subsystem which is adding a controller.
>    * @host_traddr: A transport-specific field identifying the NVME host port
> - *              to use for the connection to the controller.
> + *		to use for the connection to the controller.
> + * @host_triface: A transport-specific field identifying the NVME host
> + *		interface to use for the connection to the controller.
>    * @queue_size: Number of IO queue elements.
>    * @nr_io_queues: Number of controller IO queues that will be established.
>    * @reconnect_delay: Time between two consecutive reconnect attempts.
> @@ -101,6 +104,7 @@ struct nvmf_ctrl_options {
>   	char			*traddr;
>   	char			*trsvcid;
>   	char			*host_traddr;
> +	char			*host_triface;
>   	size_t			queue_size;
>   	unsigned int		nr_io_queues;
>   	unsigned int		reconnect_delay;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 8e55d8bc0c50..28eb7f88b487 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1447,6 +1447,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
>   		}
>   	}
>   
> +	if (nctrl->opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +		char *iface = nctrl->opts->host_triface;
> +		sockptr_t optval = KERNEL_SOCKPTR(iface);
> +
> +		ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
> +				      optval, strlen(iface));
> +		if (ret) {
> +			dev_err(nctrl->device,
> +			  "failed to bind to interface %s queue %d err %d\n",
> +			  iface, qid, ret);
> +			goto err_sock;
> +		}
> +	}
> +
>   	queue->hdr_digest = nctrl->opts->hdr_digest;
>   	queue->data_digest = nctrl->opts->data_digest;
>   	if (queue->hdr_digest || queue->data_digest) {

Is this valid for all transports? I guess it would only work for 'tcp', 
and maybe 'rdma' if one would be running ROCE.
Shouldn't we error out on other transports like 'fc' or 'loop'?

> @@ -2457,6 +2471,10 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
>   static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   		struct nvmf_ctrl_options *opts)
>   {
> +	const char *iface_key = "";
> +	const char *iface_val = "";
> +	const char *srce_key  = "";
> +	const char *srce_val  = "";
>   	struct nvme_tcp_ctrl *ctrl;
>   	int ret;
>   
> @@ -2502,6 +2520,22 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   			       opts->host_traddr);
>   			goto out_free_ctrl;
>   		}
> +		srce_key = ", src-addr ";
> +		srce_val = opts->host_traddr;
> +	}
> +
> +	if (opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +		struct net_device *ndev;
> +
> +		ndev = dev_get_by_name(&init_net, opts->host_triface);
> +		if (!ndev) {
> +			pr_err("invalid interface passed: %s\n",
> +			       opts->host_triface);
> +			ret = -ENODEV;
> +			goto out_free_ctrl;
> +		}
> +		iface_key = ", iface ";
> +		iface_val = opts->host_triface;
>   	}
>   
>   	if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {

Normally the options are just parts of the 'address' string; why didn't 
you use that approach here?

> @@ -2530,8 +2564,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   	if (ret)
>   		goto out_uninit_ctrl;
>   
> -	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
> -		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
> +	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp%s%s%s%s\n",
> +		 ctrl->ctrl.opts->subsysnqn, &ctrl->addr,
> +		 srce_key, srce_val, iface_key, iface_val);
>   
>   	mutex_lock(&nvme_tcp_ctrl_mutex);
>   	list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
> @@ -2560,7 +2595,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
>   			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
>   			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
>   			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
> -			  NVMF_OPT_TOS,
> +			  NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
>   	.create_ctrl	= nvme_tcp_create_ctrl,
>   };
>   
> 
Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-01 11:34   ` Hannes Reinecke
@ 2021-05-03 16:59     ` Belanger, Martin
  2021-05-04 13:25       ` Hannes Reinecke
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-03 16:59 UTC (permalink / raw)
  To: Hannes Reinecke, Martin Belanger, linux-nvme; +Cc: kbusch, axboe, hch, sagi

Hi Hannes,

I just noticed there were in-line comments. to answer you questions:

Q1) Why not simply 'host_iface' ? 'triface' is a bit awkward.
A1) I used TRIFACE to keep consistency with all other transport options: traddr, trsvcid, host_traddr. I will rename to host_iface at your suggestion.

Q2) Is this valid for all transports? I guess it would only work for 'tcp', and maybe 'rdma' if one would be running ROCE. Shouldn't we error out on other transports like 'fc' or 'loop'?
A2) This is only for TCP, and we do check that this option is only allowed for TCP by specifying it in the "allowed_opts" as follows (see file tcp.c):

static struct nvmf_transport_ops nvme_tcp_transport = {
        .name           = "tcp",
        .module         = THIS_MODULE,
        .required_opts  = NVMF_OPT_TRADDR,
        .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
                          NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
                          NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
                          NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
                          NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
        .create_ctrl    = nvme_tcp_create_ctrl,
};

Q3) Normally the options are just parts of the 'address' string; why didn't
you use that approach here?
A3) I don't understand what you mean?

Regards,
Martin


________________________________________
From: Hannes Reinecke <hare@suse.de>
Sent: Saturday, May 1, 2021 07:34
To: Martin Belanger; linux-nvme@lists.infradead.org
Cc: kbusch@kernel.org; axboe@fb.com; hch@lst.de; sagi@grimberg.me; Belanger, Martin
Subject: Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.


[EXTERNAL EMAIL]

On 4/15/21 9:28 PM, Martin Belanger wrote:
> From: Martin Belanger <martin.belanger@dell.com>
>
Please fix up the subject and description.

> ---
>   drivers/nvme/host/core.c    |  5 +++++
>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
>   drivers/nvme/host/fabrics.h |  6 +++++-
>   drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
>   4 files changed, 62 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 288ac47ff5b4..91ae11a1ae26 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
>
>               ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>                               opts->host_traddr ?: "none");
> +             if (ret)
> +                     return ret;
> +
> +             ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> +                             opts->host_triface ?: "none");
>       }
>       return ret;
>   }

Why not simply 'host_iface' ? 'triface' is a bit awkward.

> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 604ab0e5a2ad..f5d0d760b53b 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
>       if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
>               len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
>                               (len) ? "," : "", ctrl->opts->host_traddr);
> +     if (ctrl->opts->mask & NVMF_OPT_HOST_TRIFACE)
> +             len += scnprintf(buf + len, size - len, "%shost_triface=%s",
> +                             (len) ? "," : "", ctrl->opts->host_triface);
>       len += scnprintf(buf + len, size - len, "\n");
>
>       return len;
> @@ -604,6 +607,7 @@ static const match_table_t opt_tokens = {
>       { NVMF_OPT_KATO,                "keep_alive_tmo=%d"     },
>       { NVMF_OPT_HOSTNQN,             "hostnqn=%s"            },
>       { NVMF_OPT_HOST_TRADDR,         "host_traddr=%s"        },
> +     { NVMF_OPT_HOST_TRIFACE,        "host_triface=%s"       },
>       { NVMF_OPT_HOST_ID,             "hostid=%s"             },
>       { NVMF_OPT_DUP_CONNECT,         "duplicate_connect"     },
>       { NVMF_OPT_DISABLE_SQFLOW,      "disable_sqflow"        },
> @@ -813,6 +817,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>                       kfree(opts->host_traddr);
>                       opts->host_traddr = p;
>                       break;
> +             case NVMF_OPT_HOST_TRIFACE:
> +                     p = match_strdup(args);
> +                     if (!p) {
> +                             ret = -ENOMEM;
> +                             goto out;
> +                     }
> +                     kfree(opts->host_triface);
> +                     opts->host_triface = p;
> +                     break;
>               case NVMF_OPT_HOST_ID:
>                       p = match_strdup(args);
>                       if (!p) {
> @@ -997,6 +1010,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
>       kfree(opts->trsvcid);
>       kfree(opts->subsysnqn);
>       kfree(opts->host_traddr);
> +     kfree(opts->host_triface);
>       kfree(opts);
>   }
>   EXPORT_SYMBOL_GPL(nvmf_free_options);
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index 733010d2eafd..17c64ff4db8c 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -59,6 +59,7 @@ enum {
>       NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>       NVMF_OPT_TOS            = 1 << 19,
>       NVMF_OPT_FAIL_FAST_TMO  = 1 << 20,
> +     NVMF_OPT_HOST_TRIFACE   = 1 << 21,
>   };
>
>   /**
> @@ -76,7 +77,9 @@ enum {
>    * @trsvcid:        The transport-specific TRSVCID field for a port on the
>    *              subsystem which is adding a controller.
>    * @host_traddr: A transport-specific field identifying the NVME host port
> - *              to use for the connection to the controller.
> + *           to use for the connection to the controller.
> + * @host_triface: A transport-specific field identifying the NVME host
> + *           interface to use for the connection to the controller.
>    * @queue_size: Number of IO queue elements.
>    * @nr_io_queues: Number of controller IO queues that will be established.
>    * @reconnect_delay: Time between two consecutive reconnect attempts.
> @@ -101,6 +104,7 @@ struct nvmf_ctrl_options {
>       char                    *traddr;
>       char                    *trsvcid;
>       char                    *host_traddr;
> +     char                    *host_triface;
>       size_t                  queue_size;
>       unsigned int            nr_io_queues;
>       unsigned int            reconnect_delay;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 8e55d8bc0c50..28eb7f88b487 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1447,6 +1447,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
>               }
>       }
>
> +     if (nctrl->opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +             char *iface = nctrl->opts->host_triface;
> +             sockptr_t optval = KERNEL_SOCKPTR(iface);
> +
> +             ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
> +                                   optval, strlen(iface));
> +             if (ret) {
> +                     dev_err(nctrl->device,
> +                       "failed to bind to interface %s queue %d err %d\n",
> +                       iface, qid, ret);
> +                     goto err_sock;
> +             }
> +     }
> +
>       queue->hdr_digest = nctrl->opts->hdr_digest;
>       queue->data_digest = nctrl->opts->data_digest;
>       if (queue->hdr_digest || queue->data_digest) {

Is this valid for all transports? I guess it would only work for 'tcp',
and maybe 'rdma' if one would be running ROCE.
Shouldn't we error out on other transports like 'fc' or 'loop'?

> @@ -2457,6 +2471,10 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
>   static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>               struct nvmf_ctrl_options *opts)
>   {
> +     const char *iface_key = "";
> +     const char *iface_val = "";
> +     const char *srce_key  = "";
> +     const char *srce_val  = "";
>       struct nvme_tcp_ctrl *ctrl;
>       int ret;
>
> @@ -2502,6 +2520,22 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>                              opts->host_traddr);
>                       goto out_free_ctrl;
>               }
> +             srce_key = ", src-addr ";
> +             srce_val = opts->host_traddr;
> +     }
> +
> +     if (opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +             struct net_device *ndev;
> +
> +             ndev = dev_get_by_name(&init_net, opts->host_triface);
> +             if (!ndev) {
> +                     pr_err("invalid interface passed: %s\n",
> +                            opts->host_triface);
> +                     ret = -ENODEV;
> +                     goto out_free_ctrl;
> +             }
> +             iface_key = ", iface ";
> +             iface_val = opts->host_triface;
>       }
>
>       if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {

Normally the options are just parts of the 'address' string; why didn't
you use that approach here?

> @@ -2530,8 +2564,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>       if (ret)
>               goto out_uninit_ctrl;
>
> -     dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
> -             ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
> +     dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp%s%s%s%s\n",
> +              ctrl->ctrl.opts->subsysnqn, &ctrl->addr,
> +              srce_key, srce_val, iface_key, iface_val);
>
>       mutex_lock(&nvme_tcp_ctrl_mutex);
>       list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
> @@ -2560,7 +2595,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
>                         NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
>                         NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
>                         NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
> -                       NVMF_OPT_TOS,
> +                       NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
>       .create_ctrl    = nvme_tcp_create_ctrl,
>   };
>
>
Cheers,

Hannes
--
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-03 16:59     ` Belanger, Martin
@ 2021-05-04 13:25       ` Hannes Reinecke
  0 siblings, 0 replies; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-04 13:25 UTC (permalink / raw)
  To: Belanger, Martin, Martin Belanger, linux-nvme; +Cc: kbusch, axboe, hch, sagi

On 5/3/21 6:59 PM, Belanger, Martin wrote:
> Hi Hannes,
> 
> I just noticed there were in-line comments. to answer you questions:
> 

This is the recommended style when working with linux patches; please do 
not top-post.

> Q1) Why not simply 'host_iface' ? 'triface' is a bit awkward.
> A1) I used TRIFACE to keep consistency with all other transport options: traddr, trsvcid, host_traddr.
> I will rename to host_iface at your suggestion.
> 
> Q2) Is this valid for all transports? I guess it would only work for 'tcp', and maybe 'rdma' if one
> would be running ROCE. Shouldn't we error out on other transports like 'fc' or 'loop'?
> A2) This is only for TCP, and we do check that this option is only allowed for TCP by specifying it
> in the "allowed_opts" as follows (see file tcp.c):
> 
> static struct nvmf_transport_ops nvme_tcp_transport = {
>          .name           = "tcp",
>          .module         = THIS_MODULE,
>          .required_opts  = NVMF_OPT_TRADDR,
>          .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
>                            NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
>                            NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
>                            NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
>                            NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
>          .create_ctrl    = nvme_tcp_create_ctrl,
> };
>
Right, okay.

> Q3) Normally the options are just parts of the 'address' string; why didn't
> you use that approach here?
> A3) I don't understand what you mean?
> 
The 'address' string contains all transport arguments (without the 
transport type), concatenated by a ','.
So the 'host_iface' value should just be added to the address string 
itself, and not added as separate argument to the sysfs printf() call.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-04-15 19:28 ` [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting Martin Belanger
  2021-05-01 11:34   ` Hannes Reinecke
@ 2021-05-04 19:56   ` Sagi Grimberg
  2021-05-05  8:47     ` Hannes Reinecke
  1 sibling, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-04 19:56 UTC (permalink / raw)
  To: Martin Belanger, linux-nvme; +Cc: kbusch, axboe, hch, Martin Belanger


> From: Martin Belanger <martin.belanger@dell.com>

Change log is missing...

> 
> ---
>   drivers/nvme/host/core.c    |  5 +++++
>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
>   drivers/nvme/host/fabrics.h |  6 +++++-
>   drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
>   4 files changed, 62 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 288ac47ff5b4..91ae11a1ae26 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
>   
>   		ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>   				opts->host_traddr ?: "none");
> +		if (ret)
> +			return ret;
> +
> +		ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> +				opts->host_triface ?: "none");

Given that this was the original intent for host_traddr, why not have
host_traddr resolve the iface from the address and set sockopt
SO_BINDTODEVICE on it?

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-04 19:56   ` Sagi Grimberg
@ 2021-05-05  8:47     ` Hannes Reinecke
  2021-05-05 14:31       ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-05  8:47 UTC (permalink / raw)
  To: Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch, Martin Belanger

On 5/4/21 9:56 PM, Sagi Grimberg wrote:
> 
>> From: Martin Belanger <martin.belanger@dell.com>
> 
> Change log is missing...
> 
>>
>> ---
>>   drivers/nvme/host/core.c    |  5 +++++
>>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
>>   drivers/nvme/host/fabrics.h |  6 +++++-
>>   drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
>>   4 files changed, 62 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 288ac47ff5b4..91ae11a1ae26 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device
>> *dev, struct kobj_uevent_env *env)
>>             ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>>                   opts->host_traddr ?: "none");
>> +        if (ret)
>> +            return ret;
>> +
>> +        ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
>> +                opts->host_triface ?: "none");
> 
> Given that this was the original intent for host_traddr, why not have
> host_traddr resolve the iface from the address and set sockopt
> SO_BINDTODEVICE on it?
> 
That was my question, too.

I would vastly prefer to not have another option to deal with
(as it raises the question whether to add it eg during
'nvme connect-all')
And one could argue that this was the intention of _having_ the
host_traddr argument in the first place ...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		        Kernel Storage Architect
hare@suse.de			               +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-05  8:47     ` Hannes Reinecke
@ 2021-05-05 14:31       ` Belanger, Martin
  2021-05-05 18:33         ` James Smart
                           ` (2 more replies)
  0 siblings, 3 replies; 23+ messages in thread
From: Belanger, Martin @ 2021-05-05 14:31 UTC (permalink / raw)
  To: Hannes Reinecke, Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> -----Original Message-----
> From: Hannes Reinecke <hare@suse.de>
> Sent: Wednesday, May 5, 2021 4:47 AM
> To: Sagi Grimberg; Martin Belanger; linux-nvme@lists.infradead.org
> Cc: kbusch@kernel.org; axboe@fb.com; hch@lst.de; Belanger, Martin
> Subject: Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can
> be used to specify the IP interface to use for the connection. The driver uses
> that to set SO_BINDTODEVICE on the socket before connecting.
> 
> 
> [EXTERNAL EMAIL]
> 
> On 5/4/21 9:56 PM, Sagi Grimberg wrote:
> >
> >> From: Martin Belanger <martin.belanger@dell.com>
> >
> > Change log is missing...
> >
> >>
> >> ---
> >>   drivers/nvme/host/core.c    |  5 +++++
> >>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
> >>   drivers/nvme/host/fabrics.h |  6 +++++-
> >>   drivers/nvme/host/tcp.c     | 41
> >> ++++++++++++++++++++++++++++++++++---
> >>   4 files changed, 62 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> >> index 288ac47ff5b4..91ae11a1ae26 100644
> >> --- a/drivers/nvme/host/core.c
> >> +++ b/drivers/nvme/host/core.c
> >> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device
> >> *dev, struct kobj_uevent_env *env)
> >>             ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
> >>                   opts->host_traddr ?: "none");
> >> +        if (ret)
> >> +            return ret;
> >> +
> >> +        ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> >> +                opts->host_triface ?: "none");
> >
> > Given that this was the original intent for host_traddr, why not have
> > host_traddr resolve the iface from the address and set sockopt
> > SO_BINDTODEVICE on it?
> >
> That was my question, too.
> 
> I would vastly prefer to not have another option to deal with (as it raises the
> question whether to add it eg during 'nvme connect-all') And one could
> argue that this was the intention of _having_ the host_traddr argument in
> the first place ...
> 
> Cheers,
> 
> Hannes
> --
> Dr. Hannes Reinecke		        Kernel Storage Architect
> hare@suse.de			               +49 911 74053 688
> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

Hi Sagi and Hannes,

Correct me if I'm wrong, but it sounds like host_traddr was primarily added for FC (at least it wasn't tested for TCP since it does not work in its current state). I'm not an expert on FC and maybe specifying an address is the right (and only) way to specify and interface for FC. For TCP, however, it's not advisable. Specifying an interface by its associated IP address is less intuitive than specifying the actual interface name and, in some cases, it simply won't work. That's because the association between interfaces and IP addresses is not predictable. IP addresses can be changed or can change by themselves over time (e.g. DHCP). Interface names are predictable [1] and will persist over time. Consider the following configuration. 

1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 100.0.0.100/24 scope global lo
       valid_lft forever preferred_lft forever    
2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
    inet 100.0.0.100/24 scope global enp0s3
       valid_lft forever preferred_lft forever
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
    inet 100.0.0.100/24 scope global enp0s8
       valid_lft forever preferred_lft forever

The above is a VM that I configured with the same IP address (100.0.0.100) on all interfaces. Doing a reverse lookup to identify the unique interface associated with 100.0.0.100 would simply not work here. And this is why the option host_iface is required. I understand that the above config does not represent a standard host system, but I'm using this to prove a point: "we can never know how a user will configure their system and the above configuration is perfectly fine by Linux".

The current TCP implementation for host_traddr uses bind()-before-connect(). This is a common construct to set the source IP address on the socket before connecting. This has no effect on how Linux will select the interface for the connection. That's because Linux uses the Weak End System model as described in RFC1122 [2]. Setting the source address on a connection is a common requirement that linux-nvme needs to support. In fact, specifying the Source IP address is a mandatory FedGov requirement (e.g. connection to a RADIUS/TACACS+ server). Consider the following configuration.

$ ip addr list dev enp0s8
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
    inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic noprefixroute enp0s8
       valid_lft 426sec preferred_lft 426sec
    inet 192.168.56.102/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever
    inet 192.168.56.103/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever
    inet 192.168.56.104/24 scope global secondary enp0s8
       valid_lft forever preferred_lft forever

Here we can see that several addresses are associated with interface enp0s8. By default, Linux will select the default IP address, 192.168.56.101, as the source address when connecting over interface enp0s8. Some users, however, want the ability to specify a different address (e.g., 192.168.56.103) to be used as the source address. The option host_traddr can be used as-is to perform this function (I tested it).

In conclusion, I believe that for TCP we need 2 options. One that can be used to specify an interface. And one that can be used to set the source address. And users should be allowed to use one or the other, or both, or none. Of course, the documentation for host_traddr will need some clarification. It should state that when used for TCP connection, this option only sets the source address. And the documentation for host_iface should say that this option only applies to TCP connections.

References:
[1] https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/
[2] https://tools.ietf.org/html/rfc1122

Regards,

Martin Belanger
Dell Inc.
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-05 14:31       ` Belanger, Martin
@ 2021-05-05 18:33         ` James Smart
  2021-05-05 20:32         ` Sagi Grimberg
  2021-05-06  6:05         ` Hannes Reinecke
  2 siblings, 0 replies; 23+ messages in thread
From: James Smart @ 2021-05-05 18:33 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Sagi Grimberg,
	Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

On 5/5/2021 7:31 AM, Belanger, Martin wrote:
...
>>>
>>> Given that this was the original intent for host_traddr, why not have
>>> host_traddr resolve the iface from the address and set sockopt
>>> SO_BINDTODEVICE on it?
>>>
>> That was my question, too.
>>
>> I would vastly prefer to not have another option to deal with (as it raises the
>> question whether to add it eg during 'nvme connect-all') And one could
>> argue that this was the intention of _having_ the host_traddr argument in
>> the first place ...
>>
>> Cheers,
>>
>> Hannes
> 
> Hi Sagi and Hannes,
> 
> Correct me if I'm wrong, but it sounds like host_traddr was primarily added for FC (at least it wasn't tested for TCP since it does not work in its current state). I'm not an expert on FC and maybe specifying an address is the right (and only) way to specify and interface for FC. For TCP, however, it's not advisable. Specifying an interface by its associated IP address is less intuitive than specifying the actual interface name and, in some cases, it simply won't work. That's because the association between interfaces and IP addresses is not predictable. IP addresses can be changed or can change by themselves over time (e.g. DHCP). 

HOST_TRADDR exists only in linux and has no strict definition other than 
what we define for it. I fully expect its value, just like TRADDR, will 
be transport specific.

For FC, we didn't want to use names based on driver instances, or on FC 
link addresses, so we chose WWNs that were specific to a FC port to 
identify it. The transport has the mechanisms to map the WWNs to a 
nvme-supporting FC host port.

Define whatever is necessary for rdma or TCP and what they put into 
HOST_TRADDR.

...
> 
> In conclusion, I believe that for TCP we need 2 options. One that can be used to specify an interface. And one that can be used to set the source address. And users should be allowed to use one or the other, or both, or none. Of course, the documentation for host_traddr will need some clarification. It should state that when used for TCP connection, this option only sets the source address. And the documentation for host_iface should say that this option only applies to TCP connections.

I don't like seeing 2 options. I'd rather you stuck with a single option 
and added formatting to the option so you could specify one or the other 
or both (or none by not specifying the option).

-- james

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-05 14:31       ` Belanger, Martin
  2021-05-05 18:33         ` James Smart
@ 2021-05-05 20:32         ` Sagi Grimberg
  2021-05-06 18:27           ` Michael Christie
  2021-05-06  6:05         ` Hannes Reinecke
  2 siblings, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-05 20:32 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch


>>> Given that this was the original intent for host_traddr, why not have
>>> host_traddr resolve the iface from the address and set sockopt
>>> SO_BINDTODEVICE on it?
>>>
>> That was my question, too.
>>
>> I would vastly prefer to not have another option to deal with (as it raises the
>> question whether to add it eg during 'nvme connect-all') And one could
>> argue that this was the intention of _having_ the host_traddr argument in
>> the first place ...
>>
>> Cheers,
>>
>> Hannes
>> --
>> Dr. Hannes Reinecke		        Kernel Storage Architect
>> hare@suse.de			               +49 911 74053 688
>> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
>> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)
> 
> Hi Sagi and Hannes,
> 
> Correct me if I'm wrong, but it sounds like host_traddr was primarily added for FC (at least it wasn't tested for TCP since it does not work in its current state). I'm not an expert on FC and maybe specifying an address is the right (and only) way to specify and interface for FC. For TCP, however, it's not advisable. Specifying an interface by its associated IP address is less intuitive than specifying the actual interface name and, in some cases, it simply won't work. That's because the association between interfaces and IP addresses is not predictable. IP addresses can be changed or can change by themselves over time (e.g. DHCP). Interface names are predictable [1] and will persist over time. Consider the following configuration.
> 
> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
>      link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>      inet 100.0.0.100/24 scope global lo
>         valid_lft forever preferred_lft forever
> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
>      inet 100.0.0.100/24 scope global enp0s3
>         valid_lft forever preferred_lft forever
> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>      inet 100.0.0.100/24 scope global enp0s8
>         valid_lft forever preferred_lft forever
> 
> The above is a VM that I configured with the same IP address (100.0.0.100) on all interfaces. Doing a reverse lookup to identify the unique interface associated with 100.0.0.100 would simply not work here. And this is why the option host_iface is required. I understand that the above config does not represent a standard host system, but I'm using this to prove a point: "we can never know how a user will configure their system and the above configuration is perfectly fine by Linux".

Is this a common setting? I'd say that we should probably see a real
life need for it before adding a user interface for it.

What if we start with doing bind + BINDTODEVICE sockopt and if interface
resolution results in multiple devices we just skip the BINDTODEVICE
sockopt (which will not introduce a regression).

If this will cover 99% of the use-cases we are in good shape and we
didn't introduce yet another ABI that may be just confusing...

Having said that, if this setting is a real use-case we need to support
then there is no alternative to have the two options. So I'm a bit
on the fence here.

> The current TCP implementation for host_traddr uses bind()-before-connect(). This is a common construct to set the source IP address on the socket before connecting. This has no effect on how Linux will select the interface for the connection. That's because Linux uses the Weak End System model as described in RFC1122 [2]. Setting the source address on a connection is a common requirement that linux-nvme needs to support. In fact, specifying the Source IP address is a mandatory FedGov requirement (e.g. connection to a RADIUS/TACACS+ server). Consider the following configuration.
> 
> $ ip addr list dev enp0s8
> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>      inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic noprefixroute enp0s8
>         valid_lft 426sec preferred_lft 426sec
>      inet 192.168.56.102/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
>      inet 192.168.56.103/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
>      inet 192.168.56.104/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
> 
> Here we can see that several addresses are associated with interface enp0s8. By default, Linux will select the default IP address, 192.168.56.101, as the source address when connecting over interface enp0s8. Some users, however, want the ability to specify a different address (e.g., 192.168.56.103) to be used as the source address. The option host_traddr can be used as-is to perform this function (I tested it).
> 

I meant that we do both bind and sockopt for host_traddr.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-05 14:31       ` Belanger, Martin
  2021-05-05 18:33         ` James Smart
  2021-05-05 20:32         ` Sagi Grimberg
@ 2021-05-06  6:05         ` Hannes Reinecke
  2021-05-06  7:00           ` Hannes Reinecke
  2 siblings, 1 reply; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-06  6:05 UTC (permalink / raw)
  To: Belanger, Martin, Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

On 5/5/21 4:31 PM, Belanger, Martin wrote:
>> -----Original Message-----
>> From: Hannes Reinecke <hare@suse.de>
>> Sent: Wednesday, May 5, 2021 4:47 AM
>> To: Sagi Grimberg; Martin Belanger; linux-nvme@lists.infradead.org
>> Cc: kbusch@kernel.org; axboe@fb.com; hch@lst.de; Belanger, Martin
>> Subject: Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can
>> be used to specify the IP interface to use for the connection. The driver uses
>> that to set SO_BINDTODEVICE on the socket before connecting.
>>
>>
>> [EXTERNAL EMAIL]
>>
>> On 5/4/21 9:56 PM, Sagi Grimberg wrote:
>>>
>>>> From: Martin Belanger <martin.belanger@dell.com>
>>>
>>> Change log is missing...
>>>
>>>>
>>>> ---
>>>>    drivers/nvme/host/core.c    |  5 +++++
>>>>    drivers/nvme/host/fabrics.c | 14 +++++++++++++
>>>>    drivers/nvme/host/fabrics.h |  6 +++++-
>>>>    drivers/nvme/host/tcp.c     | 41
>>>> ++++++++++++++++++++++++++++++++++---
>>>>    4 files changed, 62 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>>> index 288ac47ff5b4..91ae11a1ae26 100644
>>>> --- a/drivers/nvme/host/core.c
>>>> +++ b/drivers/nvme/host/core.c
>>>> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device
>>>> *dev, struct kobj_uevent_env *env)
>>>>              ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>>>>                    opts->host_traddr ?: "none");
>>>> +        if (ret)
>>>> +            return ret;
>>>> +
>>>> +        ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
>>>> +                opts->host_triface ?: "none");
>>>
>>> Given that this was the original intent for host_traddr, why not have
>>> host_traddr resolve the iface from the address and set sockopt
>>> SO_BINDTODEVICE on it?
>>>
>> That was my question, too.
>>
>> I would vastly prefer to not have another option to deal with (as it raises the
>> question whether to add it eg during 'nvme connect-all') And one could
>> argue that this was the intention of _having_ the host_traddr argument in
>> the first place ...
>>
>> Cheers,
>>
>> Hannes
>> --
>> Dr. Hannes Reinecke		        Kernel Storage Architect
>> hare@suse.de			               +49 911 74053 688
>> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
>> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)
> 
> Hi Sagi and Hannes,
> 
> Correct me if I'm wrong, but it sounds like host_traddr was primarily added for FC (at least it wasn't tested for TCP since it does not work in its current state). I'm not an expert on FC and maybe specifying an address is the right (and only) way to specify and interface for FC. For TCP, however, it's not advisable. Specifying an interface by its associated IP address is less intuitive than specifying the actual interface name and, in some cases, it simply won't work. That's because the association between interfaces and IP addresses is not predictable. IP addresses can be changed or can change by themselves over time (e.g. DHCP). Interface names are predictable [1] and will persist over time. Consider the following configuration.
> 
> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
>      link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>      inet 100.0.0.100/24 scope global lo
>         valid_lft forever preferred_lft forever
> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
>      inet 100.0.0.100/24 scope global enp0s3
>         valid_lft forever preferred_lft forever
> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>      inet 100.0.0.100/24 scope global enp0s8
>         valid_lft forever preferred_lft forever
> 
> The above is a VM that I configured with the same IP address (100.0.0.100) on all interfaces. Doing a reverse
> lookup to identify the unique interface associated with 100.0.0.100 would simply not work here. And this is why
> the option host_iface is required. I understand that the above config does not represent a standard host system,
> but I'm using this to prove a point: "we can never know how a user will configure their system and the above
> configuration is perfectly fine by Linux".
> 

... and messing up any switch MAC address caching when doing so. I guess 
the network admin will come down hard on you if you try that on a 
production system.
And I sincerely question whether this is a valid use-case; I'm already 
getting grief from our network admins if I dare to put two network 
interfaces from the same machine in the same network.

> The current TCP implementation for host_traddr uses bind()-before-connect(). This is a common construct to set the
> source IP address on the socket before connecting. This has no effect on how Linux will select the interface for the
> connection. That's because Linux uses the Weak End System model as described in RFC1122 [2]. Setting the source address
> on a connection is a common requirement that linux-nvme needs to support. In fact, specifying the Source IP address
> is a mandatory FedGov requirement (e.g. connection to a RADIUS/TACACS+ server). Consider the following configuration.
> 
> $ ip addr list dev enp0s8
> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>      inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic noprefixroute enp0s8
>         valid_lft 426sec preferred_lft 426sec
>      inet 192.168.56.102/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
>      inet 192.168.56.103/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
>      inet 192.168.56.104/24 scope global secondary enp0s8
>         valid_lft forever preferred_lft forever
> 
> Here we can see that several addresses are associated with interface enp0s8. By default, Linux will select the
> default IP address, 192.168.56.101, as the source address when connecting over interface enp0s8. Some users,
> however, want the ability to specify a different address (e.g., 192.168.56.103) to be used as the source address.
> The option host_traddr can be used as-is to perform this function (I tested it).
> 

No disagreement here.

> In conclusion, I believe that for TCP we need 2 options. One that can be used to specify an interface. And one
> that can be used to set the source address. And users should be allowed to use one or the other, or both, or none.
> Of course, the documentation for host_traddr will need some clarification. It should state that when used for TCP
> connection, this option only sets the source address. And the documentation for host_iface should say that this
> option only applies to TCP connections.
> 

I'm with James Smart here. I do fail to see the need for 'host_iface' 
_without_ 'host_traddr'; especially for IPv6 where several addresses are 
standard just specifying 'host_iface' simply is not enough, and one has 
to specify 'host_traddr' additionally.

So 'host_iface' should be contingent on 'host_traddr', meaning we can 
just expand the syntax of 'host_traddr'.
One easy possibility would be to add ',nobind' to the host_traddr syntax 
which would indicate that we should _not_ bind to the underlying 
interface; I do think that binding to the respective interface should be 
the default.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-06  6:05         ` Hannes Reinecke
@ 2021-05-06  7:00           ` Hannes Reinecke
  2021-05-06 15:46             ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-06  7:00 UTC (permalink / raw)
  To: Belanger, Martin, Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

On 5/6/21 8:05 AM, Hannes Reinecke wrote:
> On 5/5/21 4:31 PM, Belanger, Martin wrote:
[ .. ]
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
>> group default qlen 1000
>>      link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>>      inet 100.0.0.100/24 scope global lo
>>         valid_lft forever preferred_lft forever
>> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel
>> state UP group default qlen 1000
>>      link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
>>      inet 100.0.0.100/24 scope global enp0s3
>>         valid_lft forever preferred_lft forever
>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel
>> state UP group default qlen 1000
>>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>>      inet 100.0.0.100/24 scope global enp0s8
>>         valid_lft forever preferred_lft forever
>>
>> The above is a VM that I configured with the same IP address
>> (100.0.0.100) on all interfaces. Doing a reverse
>> lookup to identify the unique interface associated with 100.0.0.100
>> would simply not work here. And this is why
>> the option host_iface is required. I understand that the above config
>> does not represent a standard host system,
>> but I'm using this to prove a point: "we can never know how a user
>> will configure their system and the above
>> configuration is perfectly fine by Linux".
>>
> 
> ... and messing up any switch MAC address caching when doing so. I guess
> the network admin will come down hard on you if you try that on a
> production system.
> And I sincerely question whether this is a valid use-case; I'm already
> getting grief from our network admins if I dare to put two network
> interfaces from the same machine in the same network.
> 
>> The current TCP implementation for host_traddr uses
>> bind()-before-connect(). This is a common construct to set the
>> source IP address on the socket before connecting. This has no effect
>> on how Linux will select the interface for the
>> connection. That's because Linux uses the Weak End System model as
>> described in RFC1122 [2]. Setting the source address
>> on a connection is a common requirement that linux-nvme needs to
>> support. In fact, specifying the Source IP address
>> is a mandatory FedGov requirement (e.g. connection to a RADIUS/TACACS+
>> server). Consider the following configuration.
>>
>> $ ip addr list dev enp0s8
>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel
>> state UP group default qlen 1000
>>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>>      inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic
>> noprefixroute enp0s8
>>         valid_lft 426sec preferred_lft 426sec
>>      inet 192.168.56.102/24 scope global secondary enp0s8
>>         valid_lft forever preferred_lft forever
>>      inet 192.168.56.103/24 scope global secondary enp0s8
>>         valid_lft forever preferred_lft forever
>>      inet 192.168.56.104/24 scope global secondary enp0s8
>>         valid_lft forever preferred_lft forever
>>
>> Here we can see that several addresses are associated with interface
>> enp0s8. By default, Linux will select the
>> default IP address, 192.168.56.101, as the source address when
>> connecting over interface enp0s8. Some users,
>> however, want the ability to specify a different address (e.g.,
>> 192.168.56.103) to be used as the source address.
>> The option host_traddr can be used as-is to perform this function (I
>> tested it).
>>
> 
> No disagreement here.
> 
>> In conclusion, I believe that for TCP we need 2 options. One that can
>> be used to specify an interface. And one
>> that can be used to set the source address. And users should be
>> allowed to use one or the other, or both, or none.
>> Of course, the documentation for host_traddr will need some
>> clarification. It should state that when used for TCP
>> connection, this option only sets the source address. And the
>> documentation for host_iface should say that this
>> option only applies to TCP connections.
>>
> 
> I'm with James Smart here. I do fail to see the need for 'host_iface'
> _without_ 'host_traddr'; especially for IPv6 where several addresses are
> standard just specifying 'host_iface' simply is not enough, and one has
> to specify 'host_traddr' additionally.
> 
> So 'host_iface' should be contingent on 'host_traddr', meaning we can
> just expand the syntax of 'host_traddr'.
> One easy possibility would be to add ',nobind' to the host_traddr syntax
> which would indicate that we should _not_ bind to the underlying
> interface; I do think that binding to the respective interface should be
> the default.
> 
A-ha. Just spoke to our network folks, and they clarified the usage of
binding to an IP address vs binding to a network interface.
Apparently, binding to a source IP address does just that, setting the
source IP address of the outgoing packet. That packet will _still_ be
subjected to the normal routing table, as the routing table is just
influenced by the _destination_ IP address.
So if we want to have it routed via a specific interface (and thereby
influencing the routing table) we need to bind it to that interface.

The only valid scenario our network folks could come up with where we do
_not_ want to bind to an interface is for asymmetric flows, ie in cases
where the outgoing flow is routed to one interface and the incoming flow
is arriving on another interface. But even they admitted that it's not a
common scenario, and probably will be killed by anti-spoofing software
running on the core switches ...

But if we want to support _that_ then clearly binding to a specific
interface doesn't work.

So I would vote for making binding to the network interface holding the
IP address the default, and add an option ',nobind' to host_traddr to
skip it.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		        Kernel Storage Architect
hare@suse.de			               +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-06  7:00           ` Hannes Reinecke
@ 2021-05-06 15:46             ` Belanger, Martin
  2021-05-07 18:20               ` Sagi Grimberg
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-06 15:46 UTC (permalink / raw)
  To: Hannes Reinecke, Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> On 5/6/21 8:05 AM, Hannes Reinecke wrote:
> > On 5/5/21 4:31 PM, Belanger, Martin wrote:
> [ .. ]
> >> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state
> UNKNOWN
> >> group default qlen 1000
> >>      link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> >>      inet 100.0.0.100/24 scope global lo
> >>         valid_lft forever preferred_lft forever
> >> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> fq_codel
> >> state UP group default qlen 1000
> >>      link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
> >>      inet 100.0.0.100/24 scope global enp0s3
> >>         valid_lft forever preferred_lft forever
> >> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> fq_codel
> >> state UP group default qlen 1000
> >>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
> >>      inet 100.0.0.100/24 scope global enp0s8
> >>         valid_lft forever preferred_lft forever
> >>
> >> The above is a VM that I configured with the same IP address
> >> (100.0.0.100) on all interfaces. Doing a reverse lookup to identify
> >> the unique interface associated with 100.0.0.100 would simply not
> >> work here. And this is why the option host_iface is required. I
> >> understand that the above config does not represent a standard host
> >> system, but I'm using this to prove a point: "we can never know how a
> >> user will configure their system and the above configuration is
> >> perfectly fine by Linux".
> >>
> >
> > ... and messing up any switch MAC address caching when doing so. I
> > guess the network admin will come down hard on you if you try that on
> > a production system.
> > And I sincerely question whether this is a valid use-case; I'm already
> > getting grief from our network admins if I dare to put two network
> > interfaces from the same machine in the same network.
> >
> >> The current TCP implementation for host_traddr uses
> >> bind()-before-connect(). This is a common construct to set the source
> >> IP address on the socket before connecting. This has no effect on how
> >> Linux will select the interface for the connection. That's because
> >> Linux uses the Weak End System model as described in RFC1122 [2].
> >> Setting the source address on a connection is a common requirement
> >> that linux-nvme needs to support. In fact, specifying the Source IP
> >> address is a mandatory FedGov requirement (e.g. connection to a
> >> RADIUS/TACACS+ server). Consider the following configuration.
> >>
> >> $ ip addr list dev enp0s8
> >> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> fq_codel
> >> state UP group default qlen 1000
> >>      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
> >>      inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic
> >> noprefixroute enp0s8
> >>         valid_lft 426sec preferred_lft 426sec
> >>      inet 192.168.56.102/24 scope global secondary enp0s8
> >>         valid_lft forever preferred_lft forever
> >>      inet 192.168.56.103/24 scope global secondary enp0s8
> >>         valid_lft forever preferred_lft forever
> >>      inet 192.168.56.104/24 scope global secondary enp0s8
> >>         valid_lft forever preferred_lft forever
> >>
> >> Here we can see that several addresses are associated with interface
> >> enp0s8. By default, Linux will select the default IP address,
> >> 192.168.56.101, as the source address when connecting over interface
> >> enp0s8. Some users, however, want the ability to specify a different
> >> address (e.g.,
> >> 192.168.56.103) to be used as the source address.
> >> The option host_traddr can be used as-is to perform this function (I
> >> tested it).
> >>
> >
> > No disagreement here.
> >
> >> In conclusion, I believe that for TCP we need 2 options. One that can
> >> be used to specify an interface. And one that can be used to set the
> >> source address. And users should be allowed to use one or the other,
> >> or both, or none.
> >> Of course, the documentation for host_traddr will need some
> >> clarification. It should state that when used for TCP connection,
> >> this option only sets the source address. And the documentation for
> >> host_iface should say that this option only applies to TCP
> >> connections.
> >>
> >
> > I'm with James Smart here. I do fail to see the need for 'host_iface'
> > _without_ 'host_traddr'; especially for IPv6 where several addresses
> > are standard just specifying 'host_iface' simply is not enough, and
> > one has to specify 'host_traddr' additionally.
> >
> > So 'host_iface' should be contingent on 'host_traddr', meaning we can
> > just expand the syntax of 'host_traddr'.
> > One easy possibility would be to add ',nobind' to the host_traddr
> > syntax which would indicate that we should _not_ bind to the
> > underlying interface; I do think that binding to the respective
> > interface should be the default.
> >
> A-ha. Just spoke to our network folks, and they clarified the usage of binding
> to an IP address vs binding to a network interface.
> Apparently, binding to a source IP address does just that, setting the source
> IP address of the outgoing packet. That packet will _still_ be subjected to the
> normal routing table, as the routing table is just influenced by the
> _destination_ IP address.
> So if we want to have it routed via a specific interface (and thereby
> influencing the routing table) we need to bind it to that interface.
> 
> The only valid scenario our network folks could come up with where we do
> _not_ want to bind to an interface is for asymmetric flows, ie in cases where
> the outgoing flow is routed to one interface and the incoming flow is arriving
> on another interface. But even they admitted that it's not a common
> scenario, and probably will be killed by anti-spoofing software running on
> the core switches ...
> 
> But if we want to support _that_ then clearly binding to a specific interface
> doesn't work.
> 
> So I would vote for making binding to the network interface holding the IP
> address the default, and add an option ',nobind' to host_traddr to skip it.
> 
> Cheers,
> 
> Hannes
> --
> Dr. Hannes Reinecke		        Kernel Storage Architect
> hare@suse.de			               +49 911 74053 688
> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)

Hi Hannes,

If the only concern here is the addition of yet another option (--host-iface), then may I suggest a simpler approach. What I'm proposing adheres to RFC4007 [1], which defines a way to specify an interface by using the '%' delimiter between the Destination IP address and the Interface. In fact, "ping" uses this approach [2]. With ping, one can force the connection to go a specific interface like this:

ping <dest-ip-addr>%<interface>

Extending this approach to nvme-cli we arrive to something like this:

nvme discover --traddr 100.64.29.2%enp0s8 --host-traddr 192.168.56.102 ....

This tells nvme to connect to 100.64.29.2 on interface enp0s8. We make no change to the --host-traddr option. It continues to be used to specify the Source IP address only (for the rare cases where users want to specify a Source Address other than the default). With this, the interface is specified by name and not by its associated address. This is not only more intuitive, but, as I stated before, eliminates the problem caused by mapping the same IP address to multiple interfaces (not to mention that doing a reverse lookup on an IP address to find the interface is extra work that we don’t need to do in kernel space).

This gives us full control. It allows us to set the "interface" and the "source address". We can set both, or only one of the two, or none. It follows standards and there is a precedent with "ping".

NOTE: RFC4007 specifically discusses specifying an Interface for IPv6, but I don’t see any issue extending the use of the '%' delimiter to IPv4 as well.

Ref:
[1] https://tools.ietf.org/html/rfc4007
[2] https://man7.org/linux/man-pages/man8/ping.8.html

Regards,

Martin Belanger
Dell EMC
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-05 20:32         ` Sagi Grimberg
@ 2021-05-06 18:27           ` Michael Christie
  0 siblings, 0 replies; 23+ messages in thread
From: Michael Christie @ 2021-05-06 18:27 UTC (permalink / raw)
  To: Sagi Grimberg
  Cc: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme,
	kbusch, axboe, hch



> On May 5, 2021, at 3:32 PM, Sagi Grimberg <sagi@grimberg.me> wrote:
> 
> 
>>>> Given that this was the original intent for host_traddr, why not have
>>>> host_traddr resolve the iface from the address and set sockopt
>>>> SO_BINDTODEVICE on it?
>>>> 
>>> That was my question, too.
>>> 
>>> I would vastly prefer to not have another option to deal with (as it raises the
>>> question whether to add it eg during 'nvme connect-all') And one could
>>> argue that this was the intention of _having_ the host_traddr argument in
>>> the first place ...
>>> 
>>> Cheers,
>>> 
>>> Hannes
>>> --
>>> Dr. Hannes Reinecke		        Kernel Storage Architect
>>> hare@suse.de			               +49 911 74053 688
>>> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
>>> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)
>> Hi Sagi and Hannes,
>> Correct me if I'm wrong, but it sounds like host_traddr was primarily added for FC (at least it wasn't tested for TCP since it does not work in its current state). I'm not an expert on FC and maybe specifying an address is the right (and only) way to specify and interface for FC. For TCP, however, it's not advisable. Specifying an interface by its associated IP address is less intuitive than specifying the actual interface name and, in some cases, it simply won't work. That's because the association between interfaces and IP addresses is not predictable. IP addresses can be changed or can change by themselves over time (e.g. DHCP). Interface names are predictable [1] and will persist over time. Consider the following configuration.
>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
>>     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>>     inet 100.0.0.100/24 scope global lo
>>        valid_lft forever preferred_lft forever
>> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>>     link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
>>     inet 100.0.0.100/24 scope global enp0s3
>>        valid_lft forever preferred_lft forever
>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
>>     link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>>     inet 100.0.0.100/24 scope global enp0s8
>>        valid_lft forever preferred_lft forever
>> The above is a VM that I configured with the same IP address (100.0.0.100) on all interfaces. Doing a reverse lookup to identify the unique interface associated with 100.0.0.100 would simply not work here. And this is why the option host_iface is required. I understand that the above config does not represent a standard host system, but I'm using this to prove a point: "we can never know how a user will configure their system and the above configuration is perfectly fine by Linux".
> 
> Is this a common setting? I'd say that we should probably see a real
> life need for it before adding a user interface for it.

For iscsi, people used SO_BINDTODEVICE because for whatver reason they
really like putting multiple interfaces on the same subnet. They
would then want to have connection1 use enp0s0 and connection2 use enp0s1,
etc.

I can’t remember why (it could be a bug or maybe the user can’t config the
routing table) but just doing a bind() will sometimes not work
because packets will come in on enp0s0 but when you do a send() it would
go out on enp0s. But enp0s1 might be connected in way it can only reach
certain target ports. The target would then not see the response.



_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-06 15:46             ` Belanger, Martin
@ 2021-05-07 18:20               ` Sagi Grimberg
  2021-05-10 13:49                 ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-07 18:20 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch



On 5/6/21 8:46 AM, Belanger, Martin wrote:
>> On 5/6/21 8:05 AM, Hannes Reinecke wrote:
>>> On 5/5/21 4:31 PM, Belanger, Martin wrote:
>> [ .. ]
>>>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state
>> UNKNOWN
>>>> group default qlen 1000
>>>>       link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
>>>>       inet 100.0.0.100/24 scope global lo
>>>>          valid_lft forever preferred_lft forever
>>>> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
>> fq_codel
>>>> state UP group default qlen 1000
>>>>       link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
>>>>       inet 100.0.0.100/24 scope global enp0s3
>>>>          valid_lft forever preferred_lft forever
>>>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
>> fq_codel
>>>> state UP group default qlen 1000
>>>>       link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>>>>       inet 100.0.0.100/24 scope global enp0s8
>>>>          valid_lft forever preferred_lft forever
>>>>
>>>> The above is a VM that I configured with the same IP address
>>>> (100.0.0.100) on all interfaces. Doing a reverse lookup to identify
>>>> the unique interface associated with 100.0.0.100 would simply not
>>>> work here. And this is why the option host_iface is required. I
>>>> understand that the above config does not represent a standard host
>>>> system, but I'm using this to prove a point: "we can never know how a
>>>> user will configure their system and the above configuration is
>>>> perfectly fine by Linux".
>>>>
>>>
>>> ... and messing up any switch MAC address caching when doing so. I
>>> guess the network admin will come down hard on you if you try that on
>>> a production system.
>>> And I sincerely question whether this is a valid use-case; I'm already
>>> getting grief from our network admins if I dare to put two network
>>> interfaces from the same machine in the same network.
>>>
>>>> The current TCP implementation for host_traddr uses
>>>> bind()-before-connect(). This is a common construct to set the source
>>>> IP address on the socket before connecting. This has no effect on how
>>>> Linux will select the interface for the connection. That's because
>>>> Linux uses the Weak End System model as described in RFC1122 [2].
>>>> Setting the source address on a connection is a common requirement
>>>> that linux-nvme needs to support. In fact, specifying the Source IP
>>>> address is a mandatory FedGov requirement (e.g. connection to a
>>>> RADIUS/TACACS+ server). Consider the following configuration.
>>>>
>>>> $ ip addr list dev enp0s8
>>>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
>> fq_codel
>>>> state UP group default qlen 1000
>>>>       link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>>>>       inet 192.168.56.101/24 brd 192.168.56.255 scope global dynamic
>>>> noprefixroute enp0s8
>>>>          valid_lft 426sec preferred_lft 426sec
>>>>       inet 192.168.56.102/24 scope global secondary enp0s8
>>>>          valid_lft forever preferred_lft forever
>>>>       inet 192.168.56.103/24 scope global secondary enp0s8
>>>>          valid_lft forever preferred_lft forever
>>>>       inet 192.168.56.104/24 scope global secondary enp0s8
>>>>          valid_lft forever preferred_lft forever
>>>>
>>>> Here we can see that several addresses are associated with interface
>>>> enp0s8. By default, Linux will select the default IP address,
>>>> 192.168.56.101, as the source address when connecting over interface
>>>> enp0s8. Some users, however, want the ability to specify a different
>>>> address (e.g.,
>>>> 192.168.56.103) to be used as the source address.
>>>> The option host_traddr can be used as-is to perform this function (I
>>>> tested it).
>>>>
>>>
>>> No disagreement here.
>>>
>>>> In conclusion, I believe that for TCP we need 2 options. One that can
>>>> be used to specify an interface. And one that can be used to set the
>>>> source address. And users should be allowed to use one or the other,
>>>> or both, or none.
>>>> Of course, the documentation for host_traddr will need some
>>>> clarification. It should state that when used for TCP connection,
>>>> this option only sets the source address. And the documentation for
>>>> host_iface should say that this option only applies to TCP
>>>> connections.
>>>>
>>>
>>> I'm with James Smart here. I do fail to see the need for 'host_iface'
>>> _without_ 'host_traddr'; especially for IPv6 where several addresses
>>> are standard just specifying 'host_iface' simply is not enough, and
>>> one has to specify 'host_traddr' additionally.
>>>
>>> So 'host_iface' should be contingent on 'host_traddr', meaning we can
>>> just expand the syntax of 'host_traddr'.
>>> One easy possibility would be to add ',nobind' to the host_traddr
>>> syntax which would indicate that we should _not_ bind to the
>>> underlying interface; I do think that binding to the respective
>>> interface should be the default.
>>>
>> A-ha. Just spoke to our network folks, and they clarified the usage of binding
>> to an IP address vs binding to a network interface.
>> Apparently, binding to a source IP address does just that, setting the source
>> IP address of the outgoing packet. That packet will _still_ be subjected to the
>> normal routing table, as the routing table is just influenced by the
>> _destination_ IP address.
>> So if we want to have it routed via a specific interface (and thereby
>> influencing the routing table) we need to bind it to that interface.
>>
>> The only valid scenario our network folks could come up with where we do
>> _not_ want to bind to an interface is for asymmetric flows, ie in cases where
>> the outgoing flow is routed to one interface and the incoming flow is arriving
>> on another interface. But even they admitted that it's not a common
>> scenario, and probably will be killed by anti-spoofing software running on
>> the core switches ...
>>
>> But if we want to support _that_ then clearly binding to a specific interface
>> doesn't work.
>>
>> So I would vote for making binding to the network interface holding the IP
>> address the default, and add an option ',nobind' to host_traddr to skip it.
>>
>> Cheers,
>>
>> Hannes
>> --
>> Dr. Hannes Reinecke		        Kernel Storage Architect
>> hare@suse.de			               +49 911 74053 688
>> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
>> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)
> 
> Hi Hannes,
> 
> If the only concern here is the addition of yet another option (--host-iface), then may I suggest a simpler approach. What I'm proposing adheres to RFC4007 [1], which defines a way to specify an interface by using the '%' delimiter between the Destination IP address and the Interface. In fact, "ping" uses this approach [2]. With ping, one can force the connection to go a specific interface like this:
> 
> ping <dest-ip-addr>%<interface>

Ping only supports this syntax for IPv6 no?

> Extending this approach to nvme-cli we arrive to something like this:
> 
> nvme discover --traddr 100.64.29.2%enp0s8 --host-traddr 192.168.56.102 ....

We already support this for IPv6, we can do that also for IPv4, but this
syntax may not be trivially expected for ipv4?

> This tells nvme to connect to 100.64.29.2 on interface enp0s8. We make no change to the --host-traddr option. It continues to be used to specify the Source IP address only (for the rare cases where users want to specify a Source Address other than the default). With this, the interface is specified by name and not by its associated address. This is not only more intuitive, but, as I stated before, eliminates the problem caused by mapping the same IP address to multiple interfaces (not to mention that doing a reverse lookup on an IP address to find the interface is extra work that we don’t need to do in kernel space).

Maybe we do something like ping -I for host_traddr, from ping man pages:

-I interface
            interface is either an address, an interface name or a VRF 
name. If interface is an address, it sets source address to specified 
interface address. If interface is an
            interface name, it sets source interface to specified 
interface. If interface is a VRF name, each packet is routed using the 
corresponding routing table; in this case, the -I
            option can be repeated to specify a source address. NOTE: 
For IPv6, when doing ping to a link-local scope address, link 
specification (by the '%'-notation in destination, or
            by this option) can be used but it is no longer required.


Without the repetition though, unless we need to support two interfaces
that share the same multiple addresses in the same subnet, which sounds
completely crazy to me...

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-07 18:20               ` Sagi Grimberg
@ 2021-05-10 13:49                 ` Belanger, Martin
  2021-05-10 18:13                   ` Sagi Grimberg
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-10 13:49 UTC (permalink / raw)
  To: Sagi Grimberg, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> On 5/6/21 8:46 AM, Belanger, Martin wrote:
> >> On 5/6/21 8:05 AM, Hannes Reinecke wrote:
> >>> On 5/5/21 4:31 PM, Belanger, Martin wrote:
> >> [ .. ]
> >>>> 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state
> >> UNKNOWN
> >>>> group default qlen 1000
> >>>>       link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> >>>>       inet 100.0.0.100/24 scope global lo
> >>>>          valid_lft forever preferred_lft forever
> >>>> 2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> >> fq_codel
> >>>> state UP group default qlen 1000
> >>>>       link/ether 08:00:27:21:65:ec brd ff:ff:ff:ff:ff:ff
> >>>>       inet 100.0.0.100/24 scope global enp0s3
> >>>>          valid_lft forever preferred_lft forever
> >>>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> >> fq_codel
> >>>> state UP group default qlen 1000
> >>>>       link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
> >>>>       inet 100.0.0.100/24 scope global enp0s8
> >>>>          valid_lft forever preferred_lft forever
> >>>>
> >>>> The above is a VM that I configured with the same IP address
> >>>> (100.0.0.100) on all interfaces. Doing a reverse lookup to identify
> >>>> the unique interface associated with 100.0.0.100 would simply not
> >>>> work here. And this is why the option host_iface is required. I
> >>>> understand that the above config does not represent a standard host
> >>>> system, but I'm using this to prove a point: "we can never know how
> >>>> a user will configure their system and the above configuration is
> >>>> perfectly fine by Linux".
> >>>>
> >>>
> >>> ... and messing up any switch MAC address caching when doing so. I
> >>> guess the network admin will come down hard on you if you try that
> >>> on a production system.
> >>> And I sincerely question whether this is a valid use-case; I'm
> >>> already getting grief from our network admins if I dare to put two
> >>> network interfaces from the same machine in the same network.
> >>>
> >>>> The current TCP implementation for host_traddr uses
> >>>> bind()-before-connect(). This is a common construct to set the
> >>>> source IP address on the socket before connecting. This has no
> >>>> effect on how Linux will select the interface for the connection.
> >>>> That's because Linux uses the Weak End System model as described in
> RFC1122 [2].
> >>>> Setting the source address on a connection is a common requirement
> >>>> that linux-nvme needs to support. In fact, specifying the Source IP
> >>>> address is a mandatory FedGov requirement (e.g. connection to a
> >>>> RADIUS/TACACS+ server). Consider the following configuration.
> >>>>
> >>>> $ ip addr list dev enp0s8
> >>>> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc
> >> fq_codel
> >>>> state UP group default qlen 1000
> >>>>       link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
> >>>>       inet 192.168.56.101/24 brd 192.168.56.255 scope global
> >>>> dynamic noprefixroute enp0s8
> >>>>          valid_lft 426sec preferred_lft 426sec
> >>>>       inet 192.168.56.102/24 scope global secondary enp0s8
> >>>>          valid_lft forever preferred_lft forever
> >>>>       inet 192.168.56.103/24 scope global secondary enp0s8
> >>>>          valid_lft forever preferred_lft forever
> >>>>       inet 192.168.56.104/24 scope global secondary enp0s8
> >>>>          valid_lft forever preferred_lft forever
> >>>>
> >>>> Here we can see that several addresses are associated with
> >>>> interface enp0s8. By default, Linux will select the default IP
> >>>> address, 192.168.56.101, as the source address when connecting over
> >>>> interface enp0s8. Some users, however, want the ability to specify
> >>>> a different address (e.g.,
> >>>> 192.168.56.103) to be used as the source address.
> >>>> The option host_traddr can be used as-is to perform this function
> >>>> (I tested it).
> >>>>
> >>>
> >>> No disagreement here.
> >>>
> >>>> In conclusion, I believe that for TCP we need 2 options. One that
> >>>> can be used to specify an interface. And one that can be used to
> >>>> set the source address. And users should be allowed to use one or
> >>>> the other, or both, or none.
> >>>> Of course, the documentation for host_traddr will need some
> >>>> clarification. It should state that when used for TCP connection,
> >>>> this option only sets the source address. And the documentation for
> >>>> host_iface should say that this option only applies to TCP
> >>>> connections.
> >>>>
> >>>
> >>> I'm with James Smart here. I do fail to see the need for 'host_iface'
> >>> _without_ 'host_traddr'; especially for IPv6 where several addresses
> >>> are standard just specifying 'host_iface' simply is not enough, and
> >>> one has to specify 'host_traddr' additionally.
> >>>
> >>> So 'host_iface' should be contingent on 'host_traddr', meaning we
> >>> can just expand the syntax of 'host_traddr'.
> >>> One easy possibility would be to add ',nobind' to the host_traddr
> >>> syntax which would indicate that we should _not_ bind to the
> >>> underlying interface; I do think that binding to the respective
> >>> interface should be the default.
> >>>
> >> A-ha. Just spoke to our network folks, and they clarified the usage
> >> of binding to an IP address vs binding to a network interface.
> >> Apparently, binding to a source IP address does just that, setting
> >> the source IP address of the outgoing packet. That packet will
> >> _still_ be subjected to the normal routing table, as the routing
> >> table is just influenced by the _destination_ IP address.
> >> So if we want to have it routed via a specific interface (and thereby
> >> influencing the routing table) we need to bind it to that interface.
> >>
> >> The only valid scenario our network folks could come up with where we
> >> do _not_ want to bind to an interface is for asymmetric flows, ie in
> >> cases where the outgoing flow is routed to one interface and the
> >> incoming flow is arriving on another interface. But even they
> >> admitted that it's not a common scenario, and probably will be killed
> >> by anti-spoofing software running on the core switches ...
> >>
> >> But if we want to support _that_ then clearly binding to a specific
> >> interface doesn't work.
> >>
> >> So I would vote for making binding to the network interface holding
> >> the IP address the default, and add an option ',nobind' to host_traddr to
> skip it.
> >>
> >> Cheers,
> >>
> >> Hannes
> >> --
> >> Dr. Hannes Reinecke		        Kernel Storage Architect
> >> hare@suse.de			               +49 911 74053 688
> >> SUSE Software Solutions Germany GmbH, 90409 Nürnberg
> >> GF: F. Imendörffer, HRB 36809 (AG Nürnberg)
> >
> > Hi Hannes,
> >
> > If the only concern here is the addition of yet another option (--host-iface),
> then may I suggest a simpler approach. What I'm proposing adheres to
> RFC4007 [1], which defines a way to specify an interface by using the '%'
> delimiter between the Destination IP address and the Interface. In fact,
> "ping" uses this approach [2]. With ping, one can force the connection to go
> a specific interface like this:
> >
> > ping <dest-ip-addr>%<interface>
> 
> Ping only supports this syntax for IPv6 no?
> 
> > Extending this approach to nvme-cli we arrive to something like this:
> >
> > nvme discover --traddr 100.64.29.2%enp0s8 --host-traddr 192.168.56.102
> ....
> 
> We already support this for IPv6, we can do that also for IPv4, but this syntax
> may not be trivially expected for ipv4?

I tried this for IPv6 and it doesn't work. Here's what I get:
$ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0
Failed to write to /dev/nvme-fabrics: Invalid argument
$ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0%enp0s8
Failed to write to /dev/nvme-fabrics: Invalid argument
$ sudo nvme discover -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0]
failed to resolve host [fe80::800:27ff:fe00:0] info
$ sudo nvme discover -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0%enp0s8]
failed to resolve host [fe80::800:27ff:fe00:0%enp0s8] info

> 
> > This tells nvme to connect to 100.64.29.2 on interface enp0s8. We make no
> change to the --host-traddr option. It continues to be used to specify the
> Source IP address only (for the rare cases where users want to specify a
> Source Address other than the default). With this, the interface is specified
> by name and not by its associated address. This is not only more intuitive,
> but, as I stated before, eliminates the problem caused by mapping the same
> IP address to multiple interfaces (not to mention that doing a reverse lookup
> on an IP address to find the interface is extra work that we don’t need to do
> in kernel space).
> 
> Maybe we do something like ping -I for host_traddr, from ping man pages:
> 
> -I interface
>             interface is either an address, an interface name or a VRF name. If
> interface is an address, it sets source address to specified interface address.
> If interface is an
>             interface name, it sets source interface to specified interface. If
> interface is a VRF name, each packet is routed using the corresponding
> routing table; in this case, the -I
>             option can be repeated to specify a source address. NOTE:
> For IPv6, when doing ping to a link-local scope address, link specification (by
> the '%'-notation in destination, or
>             by this option) can be used but it is no longer required.
> 
> 
> Without the repetition though, unless we need to support two interfaces
> that share the same multiple addresses in the same subnet, which sounds
> completely crazy to me...

Hi Sagi,

If we want to follow ping as an example, the repetition is needed not to specify two interfaces, but to specify an interface and the source address. In a previous example (reproduced below), I described a configuration where an interface had several addresses assigned to it. By default, Linux always picks the same Source address (i.e. 192.168.56.101 in this example) when connecting. If a user wants a different source address they need a way to specify it (currently with --host-traddr). Users also need a way to specify an interface separately from the source address (either with a new option like --host-iface or by repeating --host-traddr). With the example below, if we wanted to force ping to use interface enp0s8 and source address 192.168.56.103, we would repeat the -I option, for example "ping -I enp0s8 -I 192.168.56.103". We need a way to do the same with nvme-cli. 

I thought that introducing a new option, "--host-iface", had the smallest impact since it requires less code changes, but that was turned down (not sure exactly why). I then suggested that we use the '%' delimiter for IPv4 and IPv6. I agree that it is not 100% the same as ping since ping only allows the '%' delimiter for IPv6 addresses (as per RFC4007). As you suggested, we could repeat the --host-traddr option (e.g. --host-traddr enp0s8 --host-traddr 192.168.56.103), but this is more impactful to the code than adding a separate --host-iface option.

EXAMPLE: Interface with several addresses assigned:
$ ip addr list dev enp0s8
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 ...
      link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
      inet 192.168.56.101/24 brd 192.168.56.255 scope ...
         valid_lft 426sec preferred_lft 426sec
      inet 192.168.56.102/24 scope global secondary enp0s8
         valid_lft forever preferred_lft forever
      inet 192.168.56.103/24 scope global secondary enp0s8
         valid_lft forever preferred_lft forever
      inet 192.168.56.104/24 scope global secondary enp0s8
         valid_lft forever preferred_lft forever

In the end, it doesn't really matter (to me) how it is implemented. However, a solution that have little to no impact on existing code would be nice. Just like ping, we need a way to specify an interface by its **interface name** (and not by its associated IP address), and we need to allow users to select which Source IP address to use when there are multiple addresses associated with an interface.

Regards,
Martin
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-10 13:49                 ` Belanger, Martin
@ 2021-05-10 18:13                   ` Sagi Grimberg
  2021-05-10 19:18                     ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-10 18:13 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch


>>> ping <dest-ip-addr>%<interface>
>>
>> Ping only supports this syntax for IPv6 no?
>>
>>> Extending this approach to nvme-cli we arrive to something like this:
>>>
>>> nvme discover --traddr 100.64.29.2%enp0s8 --host-traddr 192.168.56.102
>> ....
>>
>> We already support this for IPv6, we can do that also for IPv4, but this syntax
>> may not be trivially expected for ipv4?
> 
> I tried this for IPv6 and it doesn't work. Here's what I get:
> $ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0
> Failed to write to /dev/nvme-fabrics: Invalid argument
> $ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0%enp0s8
> Failed to write to /dev/nvme-fabrics: Invalid argument
> $ sudo nvme discover -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0]
> failed to resolve host [fe80::800:27ff:fe00:0] info
> $ sudo nvme discover -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0%enp0s8]
> failed to resolve host [fe80::800:27ff:fe00:0%enp0s8] info

# nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b -w 
fe80::5054:ff:fe28:5edb%enp6s0

Discovery Log Number of Records 1, Generation counter 5
=====Discovery Log Entry 0======
trtype:  tcp
adrfam:  ipv6
subtype: nvme subsystem
treq:    not specified, sq flow control disable supported
portid:  3
trsvcid: 8009
subnqn:  testnqn1
traddr:  fe80::5054:ff:fef1:9f3b%enp6s0
sectype: none

> 
>>
>>> This tells nvme to connect to 100.64.29.2 on interface enp0s8. We make no
>> change to the --host-traddr option. It continues to be used to specify the
>> Source IP address only (for the rare cases where users want to specify a
>> Source Address other than the default). With this, the interface is specified
>> by name and not by its associated address. This is not only more intuitive,
>> but, as I stated before, eliminates the problem caused by mapping the same
>> IP address to multiple interfaces (not to mention that doing a reverse lookup
>> on an IP address to find the interface is extra work that we don’t need to do
>> in kernel space).
>>
>> Maybe we do something like ping -I for host_traddr, from ping man pages:
>>
>> -I interface
>>              interface is either an address, an interface name or a VRF name. If
>> interface is an address, it sets source address to specified interface address.
>> If interface is an
>>              interface name, it sets source interface to specified interface. If
>> interface is a VRF name, each packet is routed using the corresponding
>> routing table; in this case, the -I
>>              option can be repeated to specify a source address. NOTE:
>> For IPv6, when doing ping to a link-local scope address, link specification (by
>> the '%'-notation in destination, or
>>              by this option) can be used but it is no longer required.
>>
>>
>> Without the repetition though, unless we need to support two interfaces
>> that share the same multiple addresses in the same subnet, which sounds
>> completely crazy to me...
> 
> Hi Sagi,
> 
> If we want to follow ping as an example, the repetition is needed not to specify two interfaces, but to specify an interface and the source address. In a previous example (reproduced below), I described a configuration where an interface had several addresses assigned to it. By default, Linux always picks the same Source address (i.e. 192.168.56.101 in this example) when connecting. If a user wants a different source address they need a way to specify it (currently with --host-traddr). Users also need a way to specify an interface separately from the source address (either with a new option like --host-iface or by repeating --host-traddr). With the example below, if we wanted to force ping to use interface enp0s8 and source address 192.168.56.103, we would repeat the -I option, for example "ping -I enp0s8 -I 192.168.56.103". We need a way to do the same with nvme-cli.
> 
> I thought that introducing a new option, "--host-iface", had the smallest impact since it requires less code changes, but that was turned down (not sure exactly why). I then suggested that we use the '%' delimiter for IPv4 and IPv6. I agree that it is not 100% the same as ping since ping only allows the '%' delimiter for IPv6 addresses (as per RFC4007). As you suggested, we could repeat the --host-traddr option (e.g. --host-traddr enp0s8 --host-traddr 192.168.56.103), but this is more impactful to the code than adding a separate --host-iface option.

It's less about code-changes and more on adding a new user ABI, that is
the reason why (at least I'm fully on board just yet).

> EXAMPLE: Interface with several addresses assigned:
> $ ip addr list dev enp0s8
> 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 ...
>        link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
>        inet 192.168.56.101/24 brd 192.168.56.255 scope ...
>           valid_lft 426sec preferred_lft 426sec
>        inet 192.168.56.102/24 scope global secondary enp0s8
>           valid_lft forever preferred_lft forever
>        inet 192.168.56.103/24 scope global secondary enp0s8
>           valid_lft forever preferred_lft forever
>        inet 192.168.56.104/24 scope global secondary enp0s8
>           valid_lft forever preferred_lft forever
> 
> In the end, it doesn't really matter (to me) how it is implemented. However, a solution that have little to no impact on existing code would be nice. Just like ping, we need a way to specify an interface by its **interface name** (and not by its associated IP address), and we need to allow users to select which Source IP address to use when there are multiple addresses associated with an interface.

The '%' may be confusing when it comes to other transports as well (e.g.
rdma/fc would have to either reject or ignore it, but regardless of how
we add it that would be the case). Having host-traddr accept either ip
or interface seems the most desirable, however that won't work if there
are 2 interfaces that share multiple ip addresses. So if this is a
requirement we'll probably need to add --host-iface as another option...

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-10 18:13                   ` Sagi Grimberg
@ 2021-05-10 19:18                     ` Belanger, Martin
  2021-05-11  0:28                       ` Sagi Grimberg
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-10 19:18 UTC (permalink / raw)
  To: Sagi Grimberg, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> >>> ping <dest-ip-addr>%<interface>
> >>
> >> Ping only supports this syntax for IPv6 no?
> >>
> >>> Extending this approach to nvme-cli we arrive to something like this:
> >>>
> >>> nvme discover --traddr 100.64.29.2%enp0s8 --host-traddr
> >>> 192.168.56.102
> >> ....
> >>
> >> We already support this for IPv6, we can do that also for IPv4, but
> >> this syntax may not be trivially expected for ipv4?
> >
> > I tried this for IPv6 and it doesn't work. Here's what I get:
> > $ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0
> > Failed to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme
> > discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0%enp0s8 Failed
> > to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme discover
> > -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0] failed to resolve host
> > [fe80::800:27ff:fe00:0] info $ sudo nvme discover -g -G -t tcp -s 8009
> > -a [fe80::800:27ff:fe00:0%enp0s8] failed to resolve host
> > [fe80::800:27ff:fe00:0%enp0s8] info
> 
> # nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b -w
> fe80::5054:ff:fe28:5edb%enp6s0

Thanks for clarifying the syntax. However, that doesn't work for me. 

# nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w fe80::9266:4855:6cf2:f7e9%enp0s8
Failed to write to /dev/nvme-fabrics: Connection refused

Note that the above syntax does not comply with RFC4007. The '%' delimiter is supposed to be appended to the Destination IP address and not the Source Address. In other words, to be RFC4007-compliant, the syntax should be (using your example):

# nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b%enp6s0 -w fe80::5054:ff:fe28:5edb

This tells nvme-cli to connect to a controller at address fe80::5054:ff:fef1:9f3b using interface enp6s0 for the connection. And set the Source address to fe80::5054:ff:fe28:5edb.

> 
> Discovery Log Number of Records 1, Generation counter 5 =====Discovery
> Log Entry 0======
> trtype:  tcp
> adrfam:  ipv6
> subtype: nvme subsystem
> treq:    not specified, sq flow control disable supported
> portid:  3
> trsvcid: 8009
> subnqn:  testnqn1
> traddr:  fe80::5054:ff:fef1:9f3b%enp6s0
> sectype: none
> 
> >
> >>
> >>> This tells nvme to connect to 100.64.29.2 on interface enp0s8. We
> >>> make no
> >> change to the --host-traddr option. It continues to be used to
> >> specify the Source IP address only (for the rare cases where users
> >> want to specify a Source Address other than the default). With this,
> >> the interface is specified by name and not by its associated address.
> >> This is not only more intuitive, but, as I stated before, eliminates
> >> the problem caused by mapping the same IP address to multiple
> >> interfaces (not to mention that doing a reverse lookup on an IP
> >> address to find the interface is extra work that we don’t need to do in
> kernel space).
> >>
> >> Maybe we do something like ping -I for host_traddr, from ping man
> pages:
> >>
> >> -I interface
> >>              interface is either an address, an interface name or a
> >> VRF name. If interface is an address, it sets source address to specified
> interface address.
> >> If interface is an
> >>              interface name, it sets source interface to specified
> >> interface. If interface is a VRF name, each packet is routed using
> >> the corresponding routing table; in this case, the -I
> >>              option can be repeated to specify a source address. NOTE:
> >> For IPv6, when doing ping to a link-local scope address, link
> >> specification (by the '%'-notation in destination, or
> >>              by this option) can be used but it is no longer required.
> >>
> >>
> >> Without the repetition though, unless we need to support two
> >> interfaces that share the same multiple addresses in the same subnet,
> >> which sounds completely crazy to me...
> >
> > Hi Sagi,
> >
> > If we want to follow ping as an example, the repetition is needed not to
> specify two interfaces, but to specify an interface and the source address. In
> a previous example (reproduced below), I described a configuration where
> an interface had several addresses assigned to it. By default, Linux always
> picks the same Source address (i.e. 192.168.56.101 in this example) when
> connecting. If a user wants a different source address they need a way to
> specify it (currently with --host-traddr). Users also need a way to specify an
> interface separately from the source address (either with a new option like --
> host-iface or by repeating --host-traddr). With the example below, if we
> wanted to force ping to use interface enp0s8 and source address
> 192.168.56.103, we would repeat the -I option, for example "ping -I enp0s8 -I
> 192.168.56.103". We need a way to do the same with nvme-cli.
> >
> > I thought that introducing a new option, "--host-iface", had the smallest
> impact since it requires less code changes, but that was turned down (not
> sure exactly why). I then suggested that we use the '%' delimiter for IPv4 and
> IPv6. I agree that it is not 100% the same as ping since ping only allows the
> '%' delimiter for IPv6 addresses (as per RFC4007). As you suggested, we could
> repeat the --host-traddr option (e.g. --host-traddr enp0s8 --host-traddr
> 192.168.56.103), but this is more impactful to the code than adding a
> separate --host-iface option.
> 
> It's less about code-changes and more on adding a new user ABI, that is the
> reason why (at least I'm fully on board just yet).
> 
> > EXAMPLE: Interface with several addresses assigned:
> > $ ip addr list dev enp0s8
> > 3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 ...
> >        link/ether 08:00:27:4f:95:5c brd ff:ff:ff:ff:ff:ff
> >        inet 192.168.56.101/24 brd 192.168.56.255 scope ...
> >           valid_lft 426sec preferred_lft 426sec
> >        inet 192.168.56.102/24 scope global secondary enp0s8
> >           valid_lft forever preferred_lft forever
> >        inet 192.168.56.103/24 scope global secondary enp0s8
> >           valid_lft forever preferred_lft forever
> >        inet 192.168.56.104/24 scope global secondary enp0s8
> >           valid_lft forever preferred_lft forever
> >
> > In the end, it doesn't really matter (to me) how it is implemented.
> However, a solution that have little to no impact on existing code would be
> nice. Just like ping, we need a way to specify an interface by its **interface
> name** (and not by its associated IP address), and we need to allow users to
> select which Source IP address to use when there are multiple addresses
> associated with an interface.
> 
> The '%' may be confusing when it comes to other transports as well (e.g.
> rdma/fc would have to either reject or ignore it, but regardless of how we
> add it that would be the case). Having host-traddr accept either ip or
> interface seems the most desirable, however that won't work if there are 2
> interfaces that share multiple ip addresses. So if this is a requirement we'll
> probably need to add --host-iface as another option...

I don’t grok what you mean by "that won't work if there are 2 interfaces that share multiple ip addresses". Why not? If one specifies the interface by its name (e.g. enp0s8), there is no possible confusion even if multiple interfaces share the same IP addresses. 

The following are some examples of how nvme-cli should work to comply with RFC4007 and be consistent to the way ping operates.
Example 1 - IPv4, Specify Interface with -w and let Linux select Source address: 
nvme discover -t tcp -a 192.168.1.9 -w enp0s8

Example 2 - IPv4, Specify Interface and Source address with repeated -w:  
nvme discover -t tcp -a 192.168.1.9 -w enp0s8 -w 192.168.56.103

Example 3 - IPv6, Specify Interface with'%' delimiter and let Linux select Source address:
nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8

Example 4 - IPv6, Specify Interface with -w and let Linux select Source address:
nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8

Example 5 - IPv6, Specify Interface with'%' delimiter and Source address with -w: 
nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8 -w fe80::9266:4855:6cf2:f7e9

Example 6 - IPv6, Specify Interface and Source address with repeated -w: 
nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8 -w fe80::9266:4855:6cf2:f7e9

Martin
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-10 19:18                     ` Belanger, Martin
@ 2021-05-11  0:28                       ` Sagi Grimberg
  2021-05-11 13:41                         ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-11  0:28 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch


>>>> We already support this for IPv6, we can do that also for IPv4, but
>>>> this syntax may not be trivially expected for ipv4?
>>>
>>> I tried this for IPv6 and it doesn't work. Here's what I get:
>>> $ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0
>>> Failed to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme
>>> discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0%enp0s8 Failed
>>> to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme discover
>>> -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0] failed to resolve host
>>> [fe80::800:27ff:fe00:0] info $ sudo nvme discover -g -G -t tcp -s 8009
>>> -a [fe80::800:27ff:fe00:0%enp0s8] failed to resolve host
>>> [fe80::800:27ff:fe00:0%enp0s8] info
>>
>> # nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b -w
>> fe80::5054:ff:fe28:5edb%enp6s0
> 
> Thanks for clarifying the syntax. However, that doesn't work for me.
> 
> # nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w fe80::9266:4855:6cf2:f7e9%enp0s8
> Failed to write to /dev/nvme-fabrics: Connection refused

Are you using the linux target? connection refused means that
you don't have a listener on it, it's not a resolution error.

did you have the target listen on fe80::800:27ff:fe00:0%<intf> ?

> 
> Note that the above syntax does not comply with RFC4007. The '%' delimiter is supposed to be appended to the Destination IP address and not the Source Address. In other words, to be RFC4007-compliant, the syntax should be (using your example):
> 
> # nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b%enp6s0 -w fe80::5054:ff:fe28:5edb
> 
> This tells nvme-cli to connect to a controller at address fe80::5054:ff:fef1:9f3b using interface enp6s0 for the connection. And set the Source address to fe80::5054:ff:fe28:5edb.

This also seems to work, not sure that it does what we want though...
nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b%enp6s0 -w 
fe80::5054:ff:fe28:5edb%enp6s0

Discovery Log Number of Records 1, Generation counter 5
=====Discovery Log Entry 0======
trtype:  tcp
adrfam:  ipv6
subtype: nvme subsystem
treq:    not specified, sq flow control disable supported
portid:  3
trsvcid: 8009
subnqn:  testnqn1
traddr:  fe80::5054:ff:fef1:9f3b%enp6s0
sectype: none


>> The '%' may be confusing when it comes to other transports as well (e.g.
>> rdma/fc would have to either reject or ignore it, but regardless of how we
>> add it that would be the case). Having host-traddr accept either ip or
>> interface seems the most desirable, however that won't work if there are 2
>> interfaces that share multiple ip addresses. So if this is a requirement we'll
>> probably need to add --host-iface as another option...
> 
> I don’t grok what you mean by "that won't work if there are 2 interfaces that share multiple ip addresses". Why not? If one specifies the interface by its name (e.g. enp0s8), there is no possible confusion even if multiple interfaces share the same IP addresses.
> 
> The following are some examples of how nvme-cli should work to comply with RFC4007 and be consistent to the way ping operates.
> Example 1 - IPv4, Specify Interface with -w and let Linux select Source address:
> nvme discover -t tcp -a 192.168.1.9 -w enp0s8
> 
> Example 2 - IPv4, Specify Interface and Source address with repeated -w:
> nvme discover -t tcp -a 192.168.1.9 -w enp0s8 -w 192.168.56.103

I meant without the repetitions, which you only need if you have 2
devices that share more than one address, which again, is not a clear
use-case to me, but without repetitions we won't support that.

> Example 3 - IPv6, Specify Interface with'%' delimiter and let Linux select Source address:
> nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8
> 
> Example 4 - IPv6, Specify Interface with -w and let Linux select Source address:
> nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8
> 
> Example 5 - IPv6, Specify Interface with'%' delimiter and Source address with -w:
> nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8 -w fe80::9266:4855:6cf2:f7e9
> 
> Example 6 - IPv6, Specify Interface and Source address with repeated -w:
> nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8 -w fe80::9266:4855:6cf2:f7e9
> 
> Martin
> 

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-11  0:28                       ` Sagi Grimberg
@ 2021-05-11 13:41                         ` Belanger, Martin
  2021-05-11 17:13                           ` Sagi Grimberg
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-11 13:41 UTC (permalink / raw)
  To: Sagi Grimberg, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> >>>> We already support this for IPv6, we can do that also for IPv4, but
> >>>> this syntax may not be trivially expected for ipv4?
> >>>
> >>> I tried this for IPv6 and it doesn't work. Here's what I get:
> >>> $ sudo nvme discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0
> >>> Failed to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme
> >>> discover -g -G -t tcp -s 8009 -a fe80::800:27ff:fe00:0%enp0s8 Failed
> >>> to write to /dev/nvme-fabrics: Invalid argument $ sudo nvme discover
> >>> -g -G -t tcp -s 8009 -a [fe80::800:27ff:fe00:0] failed to resolve
> >>> host [fe80::800:27ff:fe00:0] info $ sudo nvme discover -g -G -t tcp
> >>> -s 8009 -a [fe80::800:27ff:fe00:0%enp0s8] failed to resolve host
> >>> [fe80::800:27ff:fe00:0%enp0s8] info
> >>
> >> # nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b -w
> >> fe80::5054:ff:fe28:5edb%enp6s0
> >
> > Thanks for clarifying the syntax. However, that doesn't work for me.
> >
> > # nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w
> > fe80::9266:4855:6cf2:f7e9%enp0s8 Failed to write to /dev/nvme-fabrics:
> > Connection refused
> 
> Are you using the linux target? connection refused means that you don't
> have a listener on it, it's not a resolution error.
> 
> did you have the target listen on fe80::800:27ff:fe00:0%<intf> ?

Doh! You are correct. In my setup, I run the nvme-cli client on a VM and I run the target (nvmet) on the host computer. I had nvmet configured for "0.0.0.0" instead of "::" (i.e. listen on all interfaces). 

After changing nvmet's configuration, I was able to query the discovery log pages, using this syntax:
nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w fe80::9266:4855:6cf2:f7ea%enp0s8

Note that it doesn't work when I append the interface to the Destination IP address as per RFC4007 (like ping) as follows.
nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8 -w fe80::9266:4855:6cf2:f7ea

> 
> >
> > Note that the above syntax does not comply with RFC4007. The '%'
> delimiter is supposed to be appended to the Destination IP address and not
> the Source Address. In other words, to be RFC4007-compliant, the syntax
> should be (using your example):
> >
> > # nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b%enp6s0 -w
> > fe80::5054:ff:fe28:5edb
> >
> > This tells nvme-cli to connect to a controller at address
> fe80::5054:ff:fef1:9f3b using interface enp6s0 for the connection. And set the
> Source address to fe80::5054:ff:fe28:5edb.
> 
> This also seems to work, not sure that it does what we want though...
> nvme discover -t tcp -a fe80::5054:ff:fef1:9f3b%enp6s0 -w
> fe80::5054:ff:fe28:5edb%enp6s0
> 
> Discovery Log Number of Records 1, Generation counter 5 =====Discovery
> Log Entry 0======
> trtype:  tcp
> adrfam:  ipv6
> subtype: nvme subsystem
> treq:    not specified, sq flow control disable supported
> portid:  3
> trsvcid: 8009
> subnqn:  testnqn1
> traddr:  fe80::5054:ff:fef1:9f3b%enp6s0
> sectype: none
> 
> 
> >> The '%' may be confusing when it comes to other transports as well (e.g.
> >> rdma/fc would have to either reject or ignore it, but regardless of
> >> how we add it that would be the case). Having host-traddr accept
> >> either ip or interface seems the most desirable, however that won't
> >> work if there are 2 interfaces that share multiple ip addresses. So
> >> if this is a requirement we'll probably need to add --host-iface as another
> option...
> >
> > I don’t grok what you mean by "that won't work if there are 2 interfaces
> that share multiple ip addresses". Why not? If one specifies the interface by
> its name (e.g. enp0s8), there is no possible confusion even if multiple
> interfaces share the same IP addresses.
> >
> > The following are some examples of how nvme-cli should work to comply
> with RFC4007 and be consistent to the way ping operates.
> > Example 1 - IPv4, Specify Interface with -w and let Linux select Source
> address:
> > nvme discover -t tcp -a 192.168.1.9 -w enp0s8
> >
> > Example 2 - IPv4, Specify Interface and Source address with repeated -w:
> > nvme discover -t tcp -a 192.168.1.9 -w enp0s8 -w 192.168.56.103
> 
> I meant without the repetitions, which you only need if you have 2 devices
> that share more than one address, which again, is not a clear use-case to
> me, but without repetitions we won't support that.

I've been thinking about what you said regarding the need to repeat the -w option when two interfaces share the same IP address. I think we're looking at the problem from a different point of view. The current implementation uses an IP address to identify an interface. I, on the other hand, believe that the best way to identify an interface is by its "interface name or index". In previous emails, I provided examples of the problems that may occur when using an IP address to identify an interface. For example, one can assign the same IP address to different interfaces making it impossible to distinguish interfaces by their IP address alone. Another example is that the low level APIs (e.g. setsockopt(SO_BINDTODEVICE) don’t even require the source IP address. They only need the interface name/index. So, why go through the trouble of performing a reverse address lookup to retrieve the interface name/index when the address is not used at all? 

By the way, if nvme-cli/linux-nvme allowed specifying interfaces by name/index, then we would not really need to repeat the -w option unless we also wanted to set the source address at the same time. Setting the source address is a completely different thing from setting the interface. One should be allowed to set one independently from the other, or both, or none.

If you look at how ping is implemented, they do not infer the interface from the IP address. If one wants to force ping to go over an interface, then one must provide the interface by name/index using the -I option. If one wants to change the source IP address (without forcing a specific interface), then one provides the IP address to the -I option. It's simple and intuitive. And ping also supports appending the interface to the Destination IP using the '%' delimiter for IPv6-only as per RFC4007.

I think that nvme-cli/linux-nvme should follow the ping approach. Interfaces should never be inferred from source IP addresses, but instead be clearly identified by their name or index. And setting the source address should be independent from setting the interface.

Regards,
Martin

> 
> > Example 3 - IPv6, Specify Interface with'%' delimiter and let Linux select
> Source address:
> > nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8
> >
> > Example 4 - IPv6, Specify Interface with -w and let Linux select Source
> address:
> > nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8
> >
> > Example 5 - IPv6, Specify Interface with'%' delimiter and Source address
> with -w:
> > nvme discover -t tcp -a fe80::800:27ff:fe00:0%enp0s8 -w
> > fe80::9266:4855:6cf2:f7e9
> >
> > Example 6 - IPv6, Specify Interface and Source address with repeated -w:
> > nvme discover -t tcp -a fe80::800:27ff:fe00:0 -w enp0s8 -w
> > fe80::9266:4855:6cf2:f7e9
> >
> > Martin
> >
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-11 13:41                         ` Belanger, Martin
@ 2021-05-11 17:13                           ` Sagi Grimberg
  2021-05-12  6:09                             ` Hannes Reinecke
  0 siblings, 1 reply; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-11 17:13 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch


> I've been thinking about what you said regarding the need to repeat the -w option when two interfaces share the same IP address. I think we're looking at the problem from a different point of view. The current implementation uses an IP address to identify an interface. I, on the other hand, believe that the best way to identify an interface is by its "interface name or index". In previous emails, I provided examples of the problems that may occur when using an IP address to identify an interface. For example, one can assign the same IP address to different interfaces making it impossible to distinguish interfaces by their IP address alone. Another example is that the low level APIs (e.g. setsockopt(SO_BINDTODEVICE) don’t even require the source IP address. They only need the interface name/index. So, why go through the trouble of performing a reverse address lookup to retrieve the interface name/index when the address is not used at all?
> 
> By the way, if nvme-cli/linux-nvme allowed specifying interfaces by name/index, then we would not really need to repeat the -w option unless we also wanted to set the source address at the same time. Setting the source address is a completely different thing from setting the interface. One should be allowed to set one independently from the other, or both, or none.
> 
> If you look at how ping is implemented, they do not infer the interface from the IP address. If one wants to force ping to go over an interface, then one must provide the interface by name/index using the -I option. If one wants to change the source IP address (without forcing a specific interface), then one provides the IP address to the -I option. It's simple and intuitive. And ping also supports appending the interface to the Destination IP using the '%' delimiter for IPv6-only as per RFC4007.
> 
> I think that nvme-cli/linux-nvme should follow the ping approach. Interfaces should never be inferred from source IP addresses, but instead be clearly identified by their name or index. And setting the source address should be independent from setting the interface.

I'm starting to think that we are going in circles, I'm getting to
the point that having host_iface is the right way to go.

We can have nvme-cli convert <addr>%iface notation to
"..,host_traddr=<addr>,host_iface=<iface>,.." when creating the
controller string...

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-11 17:13                           ` Sagi Grimberg
@ 2021-05-12  6:09                             ` Hannes Reinecke
  2021-05-12 12:12                               ` Belanger, Martin
  0 siblings, 1 reply; 23+ messages in thread
From: Hannes Reinecke @ 2021-05-12  6:09 UTC (permalink / raw)
  To: Sagi Grimberg, Belanger, Martin, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

On 5/11/21 7:13 PM, Sagi Grimberg wrote:
> 
>> I've been thinking about what you said regarding the need to repeat 
>> the -w option when two interfaces share the same IP address. I think 
>> we're looking at the problem from a different point of view. The 
>> current implementation uses an IP address to identify an interface. I, 
>> on the other hand, believe that the best way to identify an interface 
>> is by its "interface name or index". In previous emails, I provided 
>> examples of the problems that may occur when using an IP address to 
>> identify an interface. For example, one can assign the same IP address 
>> to different interfaces making it impossible to distinguish interfaces 
>> by their IP address alone. Another example is that the low level APIs 
>> (e.g. setsockopt(SO_BINDTODEVICE) don’t even require the source IP 
>> address. They only need the interface name/index. So, why go through 
>> the trouble of performing a reverse address lookup to retrieve the 
>> interface name/index when the address is not used at all?
>>
>> By the way, if nvme-cli/linux-nvme allowed specifying interfaces by 
>> name/index, then we would not really need to repeat the -w option 
>> unless we also wanted to set the source address at the same time. 
>> Setting the source address is a completely different thing from 
>> setting the interface. One should be allowed to set one independently 
>> from the other, or both, or none.
>>
>> If you look at how ping is implemented, they do not infer the 
>> interface from the IP address. If one wants to force ping to go over 
>> an interface, then one must provide the interface by name/index using 
>> the -I option. If one wants to change the source IP address (without 
>> forcing a specific interface), then one provides the IP address to the 
>> -I option. It's simple and intuitive. And ping also supports appending 
>> the interface to the Destination IP using the '%' delimiter for 
>> IPv6-only as per RFC4007.
>>
>> I think that nvme-cli/linux-nvme should follow the ping approach. 
>> Interfaces should never be inferred from source IP addresses, but 
>> instead be clearly identified by their name or index. And setting the 
>> source address should be independent from setting the interface.
> 
> I'm starting to think that we are going in circles, I'm getting to
> the point that having host_iface is the right way to go.
> 
> We can have nvme-cli convert <addr>%iface notation to
> "..,host_traddr=<addr>,host_iface=<iface>,.." when creating the
> controller string...

That was my thinking, too; the only _other_ way would be to have a 
host_traddr _imply_ a binding to the underlying interface.
We could define that, but having a single option doing _two_ different 
things is not good software design.
So let's do the 'host_iface' thingie.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-12  6:09                             ` Hannes Reinecke
@ 2021-05-12 12:12                               ` Belanger, Martin
  2021-05-12 22:12                                 ` Sagi Grimberg
  0 siblings, 1 reply; 23+ messages in thread
From: Belanger, Martin @ 2021-05-12 12:12 UTC (permalink / raw)
  To: Hannes Reinecke, Sagi Grimberg, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> On 5/11/21 7:13 PM, Sagi Grimberg wrote:
> >
> >> I've been thinking about what you said regarding the need to repeat
> >> the -w option when two interfaces share the same IP address. I think
> >> we're looking at the problem from a different point of view. The
> >> current implementation uses an IP address to identify an interface.
> >> I, on the other hand, believe that the best way to identify an
> >> interface is by its "interface name or index". In previous emails, I
> >> provided examples of the problems that may occur when using an IP
> >> address to identify an interface. For example, one can assign the
> >> same IP address to different interfaces making it impossible to
> >> distinguish interfaces by their IP address alone. Another example is
> >> that the low level APIs (e.g. setsockopt(SO_BINDTODEVICE) don’t even
> >> require the source IP address. They only need the interface
> >> name/index. So, why go through the trouble of performing a reverse
> >> address lookup to retrieve the interface name/index when the address is
> not used at all?
> >>
> >> By the way, if nvme-cli/linux-nvme allowed specifying interfaces by
> >> name/index, then we would not really need to repeat the -w option
> >> unless we also wanted to set the source address at the same time.
> >> Setting the source address is a completely different thing from
> >> setting the interface. One should be allowed to set one independently
> >> from the other, or both, or none.
> >>
> >> If you look at how ping is implemented, they do not infer the
> >> interface from the IP address. If one wants to force ping to go over
> >> an interface, then one must provide the interface by name/index using
> >> the -I option. If one wants to change the source IP address (without
> >> forcing a specific interface), then one provides the IP address to
> >> the -I option. It's simple and intuitive. And ping also supports
> >> appending the interface to the Destination IP using the '%' delimiter
> >> for IPv6-only as per RFC4007.
> >>
> >> I think that nvme-cli/linux-nvme should follow the ping approach.
> >> Interfaces should never be inferred from source IP addresses, but
> >> instead be clearly identified by their name or index. And setting the
> >> source address should be independent from setting the interface.
> >
> > I'm starting to think that we are going in circles, I'm getting to the
> > point that having host_iface is the right way to go.
> >
> > We can have nvme-cli convert <addr>%iface notation to
> > "..,host_traddr=<addr>,host_iface=<iface>,.." when creating the
> > controller string...
> 
> That was my thinking, too; the only _other_ way would be to have a
> host_traddr _imply_ a binding to the underlying interface.
> We could define that, but having a single option doing _two_ different
> things is not good software design.
> So let's do the 'host_iface' thingie.
> 
> Cheers,
> 
> Hannes
> --
> Dr. Hannes Reinecke                Kernel Storage Architect
> hare@suse.de                              +49 911 74053 688
> SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809
> (AG Nürnberg), Geschäftsführer: Felix Imendörffer

Thanks Hannes and Sagi. I will resubmit the 'host_iface' patch after running more IPv6 tests. Sagi made me realize that IPv6 has a few subtle differences and I need to double check that the current host_iface implementation works as well for IPv6 as it does for IPv4.
Martin
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
  2021-05-12 12:12                               ` Belanger, Martin
@ 2021-05-12 22:12                                 ` Sagi Grimberg
  0 siblings, 0 replies; 23+ messages in thread
From: Sagi Grimberg @ 2021-05-12 22:12 UTC (permalink / raw)
  To: Belanger, Martin, Hannes Reinecke, Martin Belanger, linux-nvme
  Cc: kbusch, axboe, hch

> Thanks Hannes and Sagi. I will resubmit the 'host_iface' patch after running more IPv6 tests. Sagi made me realize that IPv6 has a few subtle differences and I need to double check that the current host_iface implementation works as well for IPv6 as it does for IPv4.

Cool, thanks.

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, back to index

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20210415192848.962891-1-nitram_67@hotmail.com>
2021-04-15 19:28 ` [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting Martin Belanger
2021-05-01 11:34   ` Hannes Reinecke
2021-05-03 16:59     ` Belanger, Martin
2021-05-04 13:25       ` Hannes Reinecke
2021-05-04 19:56   ` Sagi Grimberg
2021-05-05  8:47     ` Hannes Reinecke
2021-05-05 14:31       ` Belanger, Martin
2021-05-05 18:33         ` James Smart
2021-05-05 20:32         ` Sagi Grimberg
2021-05-06 18:27           ` Michael Christie
2021-05-06  6:05         ` Hannes Reinecke
2021-05-06  7:00           ` Hannes Reinecke
2021-05-06 15:46             ` Belanger, Martin
2021-05-07 18:20               ` Sagi Grimberg
2021-05-10 13:49                 ` Belanger, Martin
2021-05-10 18:13                   ` Sagi Grimberg
2021-05-10 19:18                     ` Belanger, Martin
2021-05-11  0:28                       ` Sagi Grimberg
2021-05-11 13:41                         ` Belanger, Martin
2021-05-11 17:13                           ` Sagi Grimberg
2021-05-12  6:09                             ` Hannes Reinecke
2021-05-12 12:12                               ` Belanger, Martin
2021-05-12 22:12                                 ` Sagi Grimberg

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git