Linux-NVME Archive on lore.kernel.org
 help / color / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: Martin Belanger <nitram_67@hotmail.com>, linux-nvme@lists.infradead.org
Cc: kbusch@kernel.org, axboe@fb.com, hch@lst.de, sagi@grimberg.me,
	Martin Belanger <martin.belanger@dell.com>
Subject: Re: [PATCH 1/1] Add 'Transport Interface' (triface) option. This can be used to specify the IP interface to use for the connection. The driver uses that to set SO_BINDTODEVICE on the socket before connecting.
Date: Sat, 1 May 2021 13:34:25 +0200
Message-ID: <11e71590-de9b-7eca-7d10-bbd3600d650c@suse.de> (raw)
In-Reply-To: <BL0PR13MB42917AC33BFFC7DB2440D1149C4D9@BL0PR13MB4291.namprd13.prod.outlook.com>

On 4/15/21 9:28 PM, Martin Belanger wrote:
> From: Martin Belanger <martin.belanger@dell.com>
> 
Please fix up the subject and description.

> ---
>   drivers/nvme/host/core.c    |  5 +++++
>   drivers/nvme/host/fabrics.c | 14 +++++++++++++
>   drivers/nvme/host/fabrics.h |  6 +++++-
>   drivers/nvme/host/tcp.c     | 41 ++++++++++++++++++++++++++++++++++---
>   4 files changed, 62 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 288ac47ff5b4..91ae11a1ae26 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3961,6 +3961,11 @@ static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
>   
>   		ret = add_uevent_var(env, "NVME_HOST_TRADDR=%s",
>   				opts->host_traddr ?: "none");
> +		if (ret)
> +			return ret;
> +
> +		ret = add_uevent_var(env, "NVME_HOST_TRIFACE=%s",
> +				opts->host_triface ?: "none");
>   	}
>   	return ret;
>   }

Why not simply 'host_iface' ? 'triface' is a bit awkward.

> diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
> index 604ab0e5a2ad..f5d0d760b53b 100644
> --- a/drivers/nvme/host/fabrics.c
> +++ b/drivers/nvme/host/fabrics.c
> @@ -112,6 +112,9 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
>   	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
>   		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
>   				(len) ? "," : "", ctrl->opts->host_traddr);
> +	if (ctrl->opts->mask & NVMF_OPT_HOST_TRIFACE)
> +		len += scnprintf(buf + len, size - len, "%shost_triface=%s",
> +				(len) ? "," : "", ctrl->opts->host_triface);
>   	len += scnprintf(buf + len, size - len, "\n");
>   
>   	return len;
> @@ -604,6 +607,7 @@ static const match_table_t opt_tokens = {
>   	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
>   	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
>   	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
> +	{ NVMF_OPT_HOST_TRIFACE,	"host_triface=%s"	},
>   	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
>   	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
>   	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
> @@ -813,6 +817,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
>   			kfree(opts->host_traddr);
>   			opts->host_traddr = p;
>   			break;
> +		case NVMF_OPT_HOST_TRIFACE:
> +			p = match_strdup(args);
> +			if (!p) {
> +				ret = -ENOMEM;
> +				goto out;
> +			}
> +			kfree(opts->host_triface);
> +			opts->host_triface = p;
> +			break;
>   		case NVMF_OPT_HOST_ID:
>   			p = match_strdup(args);
>   			if (!p) {
> @@ -997,6 +1010,7 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts)
>   	kfree(opts->trsvcid);
>   	kfree(opts->subsysnqn);
>   	kfree(opts->host_traddr);
> +	kfree(opts->host_triface);
>   	kfree(opts);
>   }
>   EXPORT_SYMBOL_GPL(nvmf_free_options);
> diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
> index 733010d2eafd..17c64ff4db8c 100644
> --- a/drivers/nvme/host/fabrics.h
> +++ b/drivers/nvme/host/fabrics.h
> @@ -59,6 +59,7 @@ enum {
>   	NVMF_OPT_NR_POLL_QUEUES = 1 << 18,
>   	NVMF_OPT_TOS		= 1 << 19,
>   	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
> +	NVMF_OPT_HOST_TRIFACE	= 1 << 21,
>   };
>   
>   /**
> @@ -76,7 +77,9 @@ enum {
>    * @trsvcid:	The transport-specific TRSVCID field for a port on the
>    *              subsystem which is adding a controller.
>    * @host_traddr: A transport-specific field identifying the NVME host port
> - *              to use for the connection to the controller.
> + *		to use for the connection to the controller.
> + * @host_triface: A transport-specific field identifying the NVME host
> + *		interface to use for the connection to the controller.
>    * @queue_size: Number of IO queue elements.
>    * @nr_io_queues: Number of controller IO queues that will be established.
>    * @reconnect_delay: Time between two consecutive reconnect attempts.
> @@ -101,6 +104,7 @@ struct nvmf_ctrl_options {
>   	char			*traddr;
>   	char			*trsvcid;
>   	char			*host_traddr;
> +	char			*host_triface;
>   	size_t			queue_size;
>   	unsigned int		nr_io_queues;
>   	unsigned int		reconnect_delay;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 8e55d8bc0c50..28eb7f88b487 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -1447,6 +1447,20 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
>   		}
>   	}
>   
> +	if (nctrl->opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +		char *iface = nctrl->opts->host_triface;
> +		sockptr_t optval = KERNEL_SOCKPTR(iface);
> +
> +		ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
> +				      optval, strlen(iface));
> +		if (ret) {
> +			dev_err(nctrl->device,
> +			  "failed to bind to interface %s queue %d err %d\n",
> +			  iface, qid, ret);
> +			goto err_sock;
> +		}
> +	}
> +
>   	queue->hdr_digest = nctrl->opts->hdr_digest;
>   	queue->data_digest = nctrl->opts->data_digest;
>   	if (queue->hdr_digest || queue->data_digest) {

Is this valid for all transports? I guess it would only work for 'tcp', 
and maybe 'rdma' if one would be running ROCE.
Shouldn't we error out on other transports like 'fc' or 'loop'?

> @@ -2457,6 +2471,10 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
>   static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   		struct nvmf_ctrl_options *opts)
>   {
> +	const char *iface_key = "";
> +	const char *iface_val = "";
> +	const char *srce_key  = "";
> +	const char *srce_val  = "";
>   	struct nvme_tcp_ctrl *ctrl;
>   	int ret;
>   
> @@ -2502,6 +2520,22 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   			       opts->host_traddr);
>   			goto out_free_ctrl;
>   		}
> +		srce_key = ", src-addr ";
> +		srce_val = opts->host_traddr;
> +	}
> +
> +	if (opts->mask & NVMF_OPT_HOST_TRIFACE) {
> +		struct net_device *ndev;
> +
> +		ndev = dev_get_by_name(&init_net, opts->host_triface);
> +		if (!ndev) {
> +			pr_err("invalid interface passed: %s\n",
> +			       opts->host_triface);
> +			ret = -ENODEV;
> +			goto out_free_ctrl;
> +		}
> +		iface_key = ", iface ";
> +		iface_val = opts->host_triface;
>   	}
>   
>   	if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {

Normally the options are just parts of the 'address' string; why didn't 
you use that approach here?

> @@ -2530,8 +2564,9 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>   	if (ret)
>   		goto out_uninit_ctrl;
>   
> -	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
> -		ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
> +	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp%s%s%s%s\n",
> +		 ctrl->ctrl.opts->subsysnqn, &ctrl->addr,
> +		 srce_key, srce_val, iface_key, iface_val);
>   
>   	mutex_lock(&nvme_tcp_ctrl_mutex);
>   	list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
> @@ -2560,7 +2595,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
>   			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
>   			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
>   			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
> -			  NVMF_OPT_TOS,
> +			  NVMF_OPT_TOS | NVMF_OPT_HOST_TRIFACE,
>   	.create_ctrl	= nvme_tcp_create_ctrl,
>   };
>   
> 
Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer

_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme

  reply index

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20210415192848.962891-1-nitram_67@hotmail.com>
2021-04-15 19:28 ` Martin Belanger
2021-05-01 11:34   ` Hannes Reinecke [this message]
2021-05-03 16:59     ` Belanger, Martin
2021-05-04 13:25       ` Hannes Reinecke
2021-05-04 19:56   ` Sagi Grimberg
2021-05-05  8:47     ` Hannes Reinecke
2021-05-05 14:31       ` Belanger, Martin
2021-05-05 18:33         ` James Smart
2021-05-05 20:32         ` Sagi Grimberg
2021-05-06 18:27           ` Michael Christie
2021-05-06  6:05         ` Hannes Reinecke
2021-05-06  7:00           ` Hannes Reinecke
2021-05-06 15:46             ` Belanger, Martin
2021-05-07 18:20               ` Sagi Grimberg
2021-05-10 13:49                 ` Belanger, Martin
2021-05-10 18:13                   ` Sagi Grimberg
2021-05-10 19:18                     ` Belanger, Martin
2021-05-11  0:28                       ` Sagi Grimberg
2021-05-11 13:41                         ` Belanger, Martin
2021-05-11 17:13                           ` Sagi Grimberg
2021-05-12  6:09                             ` Hannes Reinecke
2021-05-12 12:12                               ` Belanger, Martin
2021-05-12 22:12                                 ` Sagi Grimberg

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11e71590-de9b-7eca-7d10-bbd3600d650c@suse.de \
    --to=hare@suse.de \
    --cc=axboe@fb.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=martin.belanger@dell.com \
    --cc=nitram_67@hotmail.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-NVME Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-nvme/0 linux-nvme/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-nvme linux-nvme/ https://lore.kernel.org/linux-nvme \
		linux-nvme@lists.infradead.org
	public-inbox-index linux-nvme

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.infradead.lists.linux-nvme


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git