* [PATCH 01/18] nvme-keyring: register '.nvme' keyring
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 02/18] nvme-keyring: define a 'psk' keytype Hannes Reinecke
` (18 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Register a '.nvme' keyring to hold keys for TLS and DH-HMAC-CHAP and
add a new config option NVME_KEYRING.
We need a separate keyring for NVMe as the configuration is done
via individual commands (eg for configfs), and the usual per-session
or per-process keyrings can't be used.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/common/Kconfig | 4 ++++
drivers/nvme/common/Makefile | 3 ++-
drivers/nvme/common/keyring.c | 40 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/core.c | 10 +++++++--
include/linux/nvme-keyring.h | 28 ++++++++++++++++++++++++
5 files changed, 82 insertions(+), 3 deletions(-)
create mode 100644 drivers/nvme/common/keyring.c
create mode 100644 include/linux/nvme-keyring.h
diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig
index 4514f44362dd..641b27adb047 100644
--- a/drivers/nvme/common/Kconfig
+++ b/drivers/nvme/common/Kconfig
@@ -2,3 +2,7 @@
config NVME_COMMON
tristate
+
+config NVME_KEYRING
+ bool
+ select KEYS
diff --git a/drivers/nvme/common/Makefile b/drivers/nvme/common/Makefile
index 720c625b8a52..0cbd0b0b8d49 100644
--- a/drivers/nvme/common/Makefile
+++ b/drivers/nvme/common/Makefile
@@ -4,4 +4,5 @@ ccflags-y += -I$(src)
obj-$(CONFIG_NVME_COMMON) += nvme-common.o
-nvme-common-y += auth.o
+nvme-common-$(CONFIG_NVME_AUTH) += auth.o
+nvme-common-$(CONFIG_NVME_KEYRING) += keyring.o
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
new file mode 100644
index 000000000000..5cf64b278119
--- /dev/null
+++ b/drivers/nvme/common/keyring.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2023 Hannes Reinecke, SUSE Labs
+ */
+
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <linux/nvme.h>
+
+static struct key *nvme_keyring;
+
+key_serial_t nvme_keyring_id(void)
+{
+ return nvme_keyring->serial;
+}
+EXPORT_SYMBOL_GPL(nvme_keyring_id);
+
+int nvme_keyring_init(void)
+{
+ nvme_keyring = keyring_alloc(".nvme",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+ current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ (KEY_USR_ALL & ~KEY_USR_SETATTR),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(nvme_keyring))
+ return PTR_ERR(nvme_keyring);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_keyring_init);
+
+void nvme_keyring_exit(void)
+{
+ key_revoke(nvme_keyring);
+ key_put(nvme_keyring);
+}
+EXPORT_SYMBOL_GPL(nvme_keyring_exit);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa746662..dfc574d0f18d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -25,6 +25,7 @@
#include "nvme.h"
#include "fabrics.h"
#include <linux/nvme-auth.h>
+#include <linux/nvme-keyring.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -4703,12 +4704,16 @@ static int __init nvme_core_init(void)
result = PTR_ERR(nvme_ns_chr_class);
goto unregister_generic_ns;
}
-
- result = nvme_init_auth();
+ result = nvme_keyring_init();
if (result)
goto destroy_ns_chr;
+ result = nvme_init_auth();
+ if (result)
+ goto keyring_exit;
return 0;
+keyring_exit:
+ nvme_keyring_exit();
destroy_ns_chr:
class_destroy(nvme_ns_chr_class);
unregister_generic_ns:
@@ -4732,6 +4737,7 @@ static int __init nvme_core_init(void)
static void __exit nvme_core_exit(void)
{
nvme_exit_auth();
+ nvme_keyring_exit();
class_destroy(nvme_ns_chr_class);
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h
new file mode 100644
index 000000000000..32bd264a71e6
--- /dev/null
+++ b/include/linux/nvme-keyring.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Hannes Reinecke, SUSE Labs
+ */
+
+#ifndef _NVME_KEYRING_H
+#define _NVME_KEYRING_H
+
+#ifdef CONFIG_NVME_KEYRING
+
+key_serial_t nvme_keyring_id(void);
+int nvme_keyring_init(void);
+void nvme_keyring_exit(void);
+
+#else
+
+static inline key_serial_t nvme_keyring_id(void)
+{
+ return 0;
+}
+static inline int nvme_keyring_init(void)
+{
+ return 0;
+}
+static inline void nvme_keyring_exit(void) {}
+
+#endif /* !CONFIG_NVME_KEYRING */
+#endif /* _NVME_KEYRING_H */
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 02/18] nvme-keyring: define a 'psk' keytype
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
2023-08-24 14:39 ` [PATCH 01/18] nvme-keyring: register '.nvme' keyring Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 03/18] nvme: add TCP TSAS definitions Hannes Reinecke
` (17 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Define a 'psk' keytype to hold the NVMe TLS PSKs.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/common/keyring.c | 94 +++++++++++++++++++++++++++++++++++
1 file changed, 94 insertions(+)
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index 5cf64b278119..494dd365052e 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -8,6 +8,8 @@
#include <linux/key-type.h>
#include <keys/user-type.h>
#include <linux/nvme.h>
+#include <linux/nvme-tcp.h>
+#include <linux/nvme-keyring.h>
static struct key *nvme_keyring;
@@ -17,8 +19,94 @@ key_serial_t nvme_keyring_id(void)
}
EXPORT_SYMBOL_GPL(nvme_keyring_id);
+static void nvme_tls_psk_describe(const struct key *key, struct seq_file *m)
+{
+ seq_puts(m, key->description);
+ seq_printf(m, ": %u", key->datalen);
+}
+
+static bool nvme_tls_psk_match(const struct key *key,
+ const struct key_match_data *match_data)
+{
+ const char *match_id;
+ size_t match_len;
+
+ if (!key->description) {
+ pr_debug("%s: no key description\n", __func__);
+ return false;
+ }
+ match_len = strlen(key->description);
+ pr_debug("%s: id %s len %zd\n", __func__, key->description, match_len);
+
+ if (!match_data->raw_data) {
+ pr_debug("%s: no match data\n", __func__);
+ return false;
+ }
+ match_id = match_data->raw_data;
+ pr_debug("%s: match '%s' '%s' len %zd\n",
+ __func__, match_id, key->description, match_len);
+ return !memcmp(key->description, match_id, match_len);
+}
+
+static int nvme_tls_psk_match_preparse(struct key_match_data *match_data)
+{
+ match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+ match_data->cmp = nvme_tls_psk_match;
+ return 0;
+}
+
+static struct key_type nvme_tls_psk_key_type = {
+ .name = "psk",
+ .flags = KEY_TYPE_NET_DOMAIN,
+ .preparse = user_preparse,
+ .free_preparse = user_free_preparse,
+ .match_preparse = nvme_tls_psk_match_preparse,
+ .instantiate = generic_key_instantiate,
+ .revoke = user_revoke,
+ .destroy = user_destroy,
+ .describe = nvme_tls_psk_describe,
+ .read = user_read,
+};
+
+static struct key *nvme_tls_psk_lookup(struct key *keyring,
+ const char *hostnqn, const char *subnqn,
+ int hmac, bool generated)
+{
+ char *identity;
+ size_t identity_len = (NVMF_NQN_SIZE) * 2 + 11;
+ key_ref_t keyref;
+ key_serial_t keyring_id;
+
+ identity = kzalloc(identity_len, GFP_KERNEL);
+ if (!identity)
+ return ERR_PTR(-ENOMEM);
+
+ snprintf(identity, identity_len, "NVMe0%c%02d %s %s",
+ generated ? 'G' : 'R', hmac, hostnqn, subnqn);
+
+ if (!keyring)
+ keyring = nvme_keyring;
+ keyring_id = key_serial(keyring);
+ pr_debug("keyring %x lookup tls psk '%s'\n",
+ keyring_id, identity);
+ keyref = keyring_search(make_key_ref(keyring, true),
+ &nvme_tls_psk_key_type,
+ identity, false);
+ if (IS_ERR(keyref)) {
+ pr_debug("lookup tls psk '%s' failed, error %ld\n",
+ identity, PTR_ERR(keyref));
+ kfree(identity);
+ return ERR_PTR(-ENOKEY);
+ }
+ kfree(identity);
+
+ return key_ref_to_ptr(keyref);
+}
+
int nvme_keyring_init(void)
{
+ int err;
+
nvme_keyring = keyring_alloc(".nvme",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
current_cred(),
@@ -28,12 +116,18 @@ int nvme_keyring_init(void)
if (IS_ERR(nvme_keyring))
return PTR_ERR(nvme_keyring);
+ err = register_key_type(&nvme_tls_psk_key_type);
+ if (err) {
+ key_put(nvme_keyring);
+ return err;
+ }
return 0;
}
EXPORT_SYMBOL_GPL(nvme_keyring_init);
void nvme_keyring_exit(void)
{
+ unregister_key_type(&nvme_tls_psk_key_type);
key_revoke(nvme_keyring);
key_put(nvme_keyring);
}
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 03/18] nvme: add TCP TSAS definitions
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
2023-08-24 14:39 ` [PATCH 01/18] nvme-keyring: register '.nvme' keyring Hannes Reinecke
2023-08-24 14:39 ` [PATCH 02/18] nvme-keyring: define a 'psk' keytype Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 04/18] nvme-tcp: add definitions for TLS cipher suites Hannes Reinecke
` (16 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
include/linux/nvme.h | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 26dd3f859d9d..a7ba74babad7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -108,6 +108,13 @@ enum {
NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
};
+/* TSAS SECTYPE for TCP transport */
+enum {
+ NVMF_TCP_SECTYPE_NONE = 0, /* No Security */
+ NVMF_TCP_SECTYPE_TLS12 = 1, /* TLSv1.2, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+ NVMF_TCP_SECTYPE_TLS13 = 2, /* TLSv1.3, NVMe-oF 1.1 and NVMe-TCP 3.6.1.1 */
+};
+
#define NVME_AQ_DEPTH 32
#define NVME_NR_AEN_COMMANDS 1
#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
@@ -1493,6 +1500,9 @@ struct nvmf_disc_rsp_page_entry {
__u16 pkey;
__u8 resv10[246];
} rdma;
+ struct tcp {
+ __u8 sectype;
+ } tcp;
} tsas;
};
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 04/18] nvme-tcp: add definitions for TLS cipher suites
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (2 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 03/18] nvme: add TCP TSAS definitions Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 05/18] nvme-keyring: implement nvme_tls_psk_default() Hannes Reinecke
` (15 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
include/linux/nvme-tcp.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 57ebe1267f7f..e07e8978d691 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -18,6 +18,12 @@ enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,
};
+enum nvme_tcp_tls_cipher {
+ NVME_TCP_TLS_CIPHER_INVALID = 0,
+ NVME_TCP_TLS_CIPHER_SHA256 = 1,
+ NVME_TCP_TLS_CIPHER_SHA384 = 2,
+};
+
enum nvme_tcp_fatal_error_status {
NVME_TCP_FES_INVALID_PDU_HDR = 0x01,
NVME_TCP_FES_PDU_SEQ_ERR = 0x02,
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 05/18] nvme-keyring: implement nvme_tls_psk_default()
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (3 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 04/18] nvme-tcp: add definitions for TLS cipher suites Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 06/18] security/keys: export key_lookup() Hannes Reinecke
` (14 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Implement a function to select the preferred PSK for TLS.
A 'retained' PSK should be preferred over a 'generated' PSK,
and SHA-384 should be preferred to SHA-256.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/common/keyring.c | 48 +++++++++++++++++++++++++++++++++++
include/linux/nvme-keyring.h | 8 ++++++
2 files changed, 56 insertions(+)
diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c
index 494dd365052e..f8d9a208397b 100644
--- a/drivers/nvme/common/keyring.c
+++ b/drivers/nvme/common/keyring.c
@@ -5,6 +5,7 @@
#include <linux/module.h>
#include <linux/seq_file.h>
+#include <linux/key.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
#include <linux/nvme.h>
@@ -103,6 +104,53 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring,
return key_ref_to_ptr(keyref);
}
+/*
+ * NVMe PSK priority list
+ *
+ * 'Retained' PSKs (ie 'generated == false')
+ * should be preferred to 'generated' PSKs,
+ * and SHA-384 should be preferred to SHA-256.
+ */
+struct nvme_tls_psk_priority_list {
+ bool generated;
+ enum nvme_tcp_tls_cipher cipher;
+} nvme_tls_psk_prio[] = {
+ { .generated = false,
+ .cipher = NVME_TCP_TLS_CIPHER_SHA384, },
+ { .generated = false,
+ .cipher = NVME_TCP_TLS_CIPHER_SHA256, },
+ { .generated = true,
+ .cipher = NVME_TCP_TLS_CIPHER_SHA384, },
+ { .generated = true,
+ .cipher = NVME_TCP_TLS_CIPHER_SHA256, },
+};
+
+/*
+ * nvme_tls_psk_default - Return the preferred PSK to use for TLS ClientHello
+ */
+key_serial_t nvme_tls_psk_default(struct key *keyring,
+ const char *hostnqn, const char *subnqn)
+{
+ struct key *tls_key;
+ key_serial_t tls_key_id;
+ int prio;
+
+ for (prio = 0; prio < ARRAY_SIZE(nvme_tls_psk_prio); prio++) {
+ bool generated = nvme_tls_psk_prio[prio].generated;
+ enum nvme_tcp_tls_cipher cipher = nvme_tls_psk_prio[prio].cipher;
+
+ tls_key = nvme_tls_psk_lookup(keyring, hostnqn, subnqn,
+ cipher, generated);
+ if (!IS_ERR(tls_key)) {
+ tls_key_id = tls_key->serial;
+ key_put(tls_key);
+ return tls_key_id;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_tls_psk_default);
+
int nvme_keyring_init(void)
{
int err;
diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h
index 32bd264a71e6..4efea9dd967c 100644
--- a/include/linux/nvme-keyring.h
+++ b/include/linux/nvme-keyring.h
@@ -8,12 +8,20 @@
#ifdef CONFIG_NVME_KEYRING
+key_serial_t nvme_tls_psk_default(struct key *keyring,
+ const char *hostnqn, const char *subnqn);
+
key_serial_t nvme_keyring_id(void);
int nvme_keyring_init(void);
void nvme_keyring_exit(void);
#else
+static inline key_serial_t nvme_tls_psk_default(struct key *keyring,
+ const char *hostnqn, const char *subnqn)
+{
+ return 0;
+}
static inline key_serial_t nvme_keyring_id(void)
{
return 0;
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 06/18] security/keys: export key_lookup()
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (4 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 05/18] nvme-keyring: implement nvme_tls_psk_default() Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 07/18] nvme-tcp: allocate socket file Hannes Reinecke
` (13 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke,
David Howells
For in-kernel consumers one cannot readily assign a user (eg when
running from a workqueue), so the normal key search permissions
cannot be applied.
This patch exports the 'key_lookup()' function for a simple lookup
of keys without checking for permissions.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Acked-by: David Howells <dhowells@redhat.com>
---
include/linux/key.h | 1 +
security/keys/key.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/include/linux/key.h b/include/linux/key.h
index 938d7ecfb495..943a432da3ae 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -515,6 +515,7 @@ extern void key_init(void);
#define key_init() do { } while(0)
#define key_free_user_ns(ns) do { } while(0)
#define key_remove_domain(d) do { } while(0)
+#define key_lookup(k) NULL
#endif /* CONFIG_KEYS */
#endif /* __KERNEL__ */
diff --git a/security/keys/key.c b/security/keys/key.c
index 5c0c7df833f8..0260a1902922 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -693,6 +693,7 @@ struct key *key_lookup(key_serial_t id)
spin_unlock(&key_serial_lock);
return key;
}
+EXPORT_SYMBOL(key_lookup);
/*
* Find and lock the specified key type against removal.
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 07/18] nvme-tcp: allocate socket file
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (5 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 06/18] security/keys: export key_lookup() Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 08/18] nvme-tcp: enable TLS handshake upcall Hannes Reinecke
` (12 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
When using the TLS upcall we need to allocate a socket file such
that the userspace daemon is able to use the socket.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 9ce417cd32a7..324d3bce65b8 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1338,7 +1338,9 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
}
noreclaim_flag = memalloc_noreclaim_save();
- sock_release(queue->sock);
+ /* ->sock will be released by fput() */
+ fput(queue->sock->file);
+ queue->sock = NULL;
memalloc_noreclaim_restore(noreclaim_flag);
kfree(queue->pdu);
@@ -1512,6 +1514,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
int ret, rcv_pdu_size;
+ struct file *sock_file;
mutex_init(&queue->queue_lock);
queue->ctrl = ctrl;
@@ -1534,6 +1537,11 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
goto err_destroy_mutex;
}
+ sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL);
+ if (IS_ERR(sock_file)) {
+ ret = PTR_ERR(sock_file);
+ goto err_destroy_mutex;
+ }
nvme_tcp_reclassify_socket(queue->sock);
/* Single syn retry */
@@ -1640,7 +1648,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
if (queue->hdr_digest || queue->data_digest)
nvme_tcp_free_crypto(queue);
err_sock:
- sock_release(queue->sock);
+ /* ->sock will be released by fput() */
+ fput(queue->sock->file);
queue->sock = NULL;
err_destroy_mutex:
mutex_destroy(&queue->send_mutex);
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 08/18] nvme-tcp: enable TLS handshake upcall
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (6 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 07/18] nvme-tcp: allocate socket file Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 09/18] nvme-tcp: control message handling for recvmsg() Hannes Reinecke
` (11 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Add a fabrics option 'tls' and start the TLS handshake upcall
with the default PSK. When TLS is started the PSK key serial
number is displayed in the sysfs attribute 'tls_key'
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/Kconfig | 15 ++++
drivers/nvme/host/core.c | 2 +-
drivers/nvme/host/fabrics.c | 12 ++++
drivers/nvme/host/fabrics.h | 3 +
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/sysfs.c | 21 ++++++
drivers/nvme/host/tcp.c | 138 ++++++++++++++++++++++++++++++++++--
7 files changed, 185 insertions(+), 7 deletions(-)
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 2f6a7f8c94e8..a517030d7d50 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -92,6 +92,21 @@ config NVME_TCP
If unsure, say N.
+config NVME_TCP_TLS
+ bool "NVMe over Fabrics TCP TLS encryption support"
+ depends on NVME_TCP
+ select NVME_COMMON
+ select NVME_KEYRING
+ select NET_HANDSHAKE
+ select KEYS
+ help
+ Enables TLS encryption for NVMe TCP using the netlink handshake API.
+
+ The TLS handshake daemon is availble at
+ https://github.com/oracle/ktls-utils.
+
+ If unsure, say N.
+
config NVME_AUTH
bool "NVM Express over Fabrics In-Band Authentication"
depends on NVME_CORE
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfc574d0f18d..b52e9c9bffd6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4380,7 +4380,7 @@ static void nvme_free_ctrl(struct device *dev)
if (!subsys || ctrl->instance != subsys->instance)
ida_free(&nvme_instance_ida, ctrl->instance);
-
+ key_put(ctrl->tls_key);
nvme_free_cels(ctrl);
nvme_mpath_uninit(ctrl);
nvme_auth_stop(ctrl);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 8175d49f2909..ddad482c3537 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -647,6 +647,9 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_DISCOVERY, "discovery" },
{ NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" },
{ NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" },
+#ifdef CONFIG_NVME_TCP_TLS
+ { NVMF_OPT_TLS, "tls" },
+#endif
{ NVMF_OPT_ERR, NULL }
};
@@ -671,6 +674,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->hdr_digest = false;
opts->data_digest = false;
opts->tos = -1; /* < 0 == use transport default */
+ opts->tls = false;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -955,6 +959,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
kfree(opts->dhchap_ctrl_secret);
opts->dhchap_ctrl_secret = p;
break;
+ case NVMF_OPT_TLS:
+ if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
+ pr_err("TLS is not supported\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ opts->tls = true;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 82e7a27ffbde..dac17c3fee26 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -70,6 +70,7 @@ enum {
NVMF_OPT_DISCOVERY = 1 << 22,
NVMF_OPT_DHCHAP_SECRET = 1 << 23,
NVMF_OPT_DHCHAP_CTRL_SECRET = 1 << 24,
+ NVMF_OPT_TLS = 1 << 25,
};
/**
@@ -102,6 +103,7 @@ enum {
* @dhchap_secret: DH-HMAC-CHAP secret
* @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional
* authentication
+ * @tls: Start TLS encrypted connections (TCP)
* @disable_sqflow: disable controller sq flow control
* @hdr_digest: generate/verify header digest (TCP)
* @data_digest: generate/verify data digest (TCP)
@@ -128,6 +130,7 @@ struct nvmf_ctrl_options {
struct nvmf_host *host;
char *dhchap_secret;
char *dhchap_ctrl_secret;
+ bool tls;
bool disable_sqflow;
bool hdr_digest;
bool data_digest;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f35647c470af..6fe7966f720b 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -357,6 +357,7 @@ struct nvme_ctrl {
struct nvme_dhchap_key *ctrl_key;
u16 transaction;
#endif
+ struct key *tls_key;
/* Power saving configuration */
u64 ps_max_latency_us;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 212e1b05d298..d0966159981c 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -527,6 +527,19 @@ static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR,
nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store);
#endif
+#ifdef CONFIG_NVME_TCP_TLS
+static ssize_t tls_key_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (!ctrl->tls_key)
+ return 0;
+ return sysfs_emit(buf, "%08x", key_serial(ctrl->tls_key));
+}
+static DEVICE_ATTR_RO(tls_key);
+#endif
+
static struct attribute *nvme_dev_attrs[] = {
&dev_attr_reset_controller.attr,
&dev_attr_rescan_controller.attr,
@@ -553,6 +566,9 @@ static struct attribute *nvme_dev_attrs[] = {
#ifdef CONFIG_NVME_AUTH
&dev_attr_dhchap_secret.attr,
&dev_attr_dhchap_ctrl_secret.attr,
+#endif
+#ifdef CONFIG_NVME_TCP_TLS
+ &dev_attr_tls_key.attr,
#endif
NULL
};
@@ -583,6 +599,11 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
return 0;
#endif
+#ifdef CONFIG_NVME_TCP_TLS
+ if (a == &dev_attr_tls_key.attr &&
+ (!ctrl->opts || strcmp(ctrl->opts->transport, "tcp")))
+ return 0;
+#endif
return a->mode;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 324d3bce65b8..fca18064f1fb 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -8,9 +8,13 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/key.h>
#include <linux/nvme-tcp.h>
+#include <linux/nvme-keyring.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/tls.h>
+#include <net/handshake.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
#include <net/busy_poll.h>
@@ -31,6 +35,16 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
+#ifdef CONFIG_NVME_TCP_TLS
+/*
+ * TLS handshake timeout
+ */
+static int tls_handshake_timeout = 10;
+module_param(tls_handshake_timeout, int, 0644);
+MODULE_PARM_DESC(tls_handshake_timeout,
+ "nvme TLS handshake timeout in seconds (default 10)");
+#endif
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* lockdep can detect a circular dependency of the form
* sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
@@ -146,7 +160,10 @@ struct nvme_tcp_queue {
struct ahash_request *snd_hash;
__le32 exp_ddgst;
__le32 recv_ddgst;
-
+#ifdef CONFIG_NVME_TCP_TLS
+ struct completion tls_complete;
+ int tls_err;
+#endif
struct page_frag_cache pf_cache;
void (*state_change)(struct sock *);
@@ -1509,7 +1526,92 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
}
-static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
+#ifdef CONFIG_NVME_TCP_TLS
+static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
+{
+ struct nvme_tcp_queue *queue = data;
+ struct nvme_tcp_ctrl *ctrl = queue->ctrl;
+ int qid = nvme_tcp_queue_id(queue);
+ struct key *tls_key;
+
+ dev_dbg(ctrl->ctrl.device, "queue %d: TLS handshake done, key %x, status %d\n",
+ qid, pskid, status);
+
+ if (status) {
+ queue->tls_err = -status;
+ goto out_complete;
+ }
+
+ tls_key = key_lookup(pskid);
+ if (IS_ERR(tls_key)) {
+ dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n",
+ qid, pskid);
+ queue->tls_err = -ENOKEY;
+ } else {
+ ctrl->ctrl.tls_key = tls_key;
+ queue->tls_err = 0;
+ }
+
+out_complete:
+ complete(&queue->tls_complete);
+}
+
+static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
+ struct nvme_tcp_queue *queue,
+ key_serial_t pskid)
+{
+ int qid = nvme_tcp_queue_id(queue);
+ int ret;
+ struct tls_handshake_args args;
+ unsigned long tmo = tls_handshake_timeout * HZ;
+ key_serial_t keyring = nvme_keyring_id();
+
+ dev_dbg(nctrl->device, "queue %d: start TLS with key %x\n",
+ qid, pskid);
+ memset(&args, 0, sizeof(args));
+ args.ta_sock = queue->sock;
+ args.ta_done = nvme_tcp_tls_done;
+ args.ta_data = queue;
+ args.ta_my_peerids[0] = pskid;
+ args.ta_num_peerids = 1;
+ args.ta_keyring = keyring;
+ args.ta_timeout_ms = tls_handshake_timeout * 1000;
+ queue->tls_err = -EOPNOTSUPP;
+ init_completion(&queue->tls_complete);
+ ret = tls_client_hello_psk(&args, GFP_KERNEL);
+ if (ret) {
+ dev_err(nctrl->device, "queue %d: failed to start TLS: %d\n",
+ qid, ret);
+ return ret;
+ }
+ ret = wait_for_completion_interruptible_timeout(&queue->tls_complete, tmo);
+ if (ret <= 0) {
+ if (ret == 0)
+ ret = -ETIMEDOUT;
+
+ dev_err(nctrl->device,
+ "queue %d: TLS handshake failed, error %d\n",
+ qid, ret);
+ tls_handshake_cancel(queue->sock->sk);
+ } else {
+ dev_dbg(nctrl->device,
+ "queue %d: TLS handshake complete, error %d\n",
+ qid, queue->tls_err);
+ ret = queue->tls_err;
+ }
+ return ret;
+}
+#else
+static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
+ struct nvme_tcp_queue *queue,
+ key_serial_t pskid)
+{
+ return -EPROTONOSUPPORT;
+}
+#endif
+
+static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
+ key_serial_t pskid)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid];
@@ -1632,6 +1734,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
goto err_rcv_pdu;
}
+ /* If PSKs are configured try to start TLS */
+ if (pskid) {
+ ret = nvme_tcp_start_tls(nctrl, queue, pskid);
+ if (ret)
+ goto err_init_connect;
+ }
+
ret = nvme_tcp_init_connection(queue);
if (ret)
goto err_init_connect;
@@ -1781,10 +1890,22 @@ static int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
{
int ret;
+ key_serial_t pskid = 0;
+
+ if (ctrl->opts->tls) {
+ pskid = nvme_tls_psk_default(NULL,
+ ctrl->opts->host->nqn,
+ ctrl->opts->subsysnqn);
+ if (!pskid) {
+ dev_err(ctrl->device, "no valid PSK found\n");
+ ret = -ENOKEY;
+ goto out_free_queue;
+ }
+ }
- ret = nvme_tcp_alloc_queue(ctrl, 0);
+ ret = nvme_tcp_alloc_queue(ctrl, 0, pskid);
if (ret)
- return ret;
+ goto out_free_queue;
ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
if (ret)
@@ -1801,8 +1922,13 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
{
int i, ret;
+ if (ctrl->opts->tls && !ctrl->tls_key) {
+ dev_err(ctrl->device, "no PSK negotiated\n");
+ return -ENOKEY;
+ }
for (i = 1; i < ctrl->queue_count; i++) {
- ret = nvme_tcp_alloc_queue(ctrl, i);
+ ret = nvme_tcp_alloc_queue(ctrl, i,
+ key_serial(ctrl->tls_key));
if (ret)
goto out_free_queues;
}
@@ -2629,7 +2755,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
- NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE,
+ NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS,
.create_ctrl = nvme_tcp_create_ctrl,
};
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 09/18] nvme-tcp: control message handling for recvmsg()
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (7 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 08/18] nvme-tcp: enable TLS handshake upcall Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 10/18] nvme-tcp: improve icreq/icresp logging Hannes Reinecke
` (10 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
kTLS is sending TLS ALERT messages as control messages for recvmsg().
As we can't do anything sensible with it just abort the connection
and let the userspace agent to a re-negotiation.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index fca18064f1fb..e8e408dbb6ad 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -14,6 +14,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
@@ -1369,6 +1370,8 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_icreq_pdu *icreq;
struct nvme_tcp_icresp_pdu *icresp;
+ char cbuf[CMSG_LEN(sizeof(char))] = {};
+ u8 ctype;
struct msghdr msg = {};
struct kvec iov;
bool ctrl_hdgst, ctrl_ddgst;
@@ -1406,11 +1409,23 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
memset(&msg, 0, sizeof(msg));
iov.iov_base = icresp;
iov.iov_len = sizeof(*icresp);
+ if (queue->ctrl->ctrl.opts->tls) {
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ }
ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
if (ret < 0)
goto free_icresp;
-
+ if (queue->ctrl->ctrl.opts->tls) {
+ ctype = tls_get_record_type(queue->sock->sk,
+ (struct cmsghdr *)cbuf);
+ if (ctype != TLS_RECORD_TYPE_DATA) {
+ pr_err("queue %d: unhandled TLS record %d\n",
+ nvme_tcp_queue_id(queue), ctype);
+ return -ENOTCONN;
+ }
+ }
ret = -EINVAL;
if (icresp->hdr.type != nvme_tcp_icresp) {
pr_err("queue %d: bad type returned %d\n",
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 10/18] nvme-tcp: improve icreq/icresp logging
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (8 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 09/18] nvme-tcp: control message handling for recvmsg() Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key' Hannes Reinecke
` (9 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
When icreq/icresp fails we should be printing out a warning to
inform the user that the connection could not be established;
without it there won't be anything in the kernel message log,
just an error code returned to nvme-cli.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/tcp.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e8e408dbb6ad..ef9cf8c7a113 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1403,8 +1403,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
iov.iov_base = icreq;
iov.iov_len = sizeof(*icreq);
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
- if (ret < 0)
+ if (ret < 0) {
+ pr_warn("queue %d: failed to send icreq, error %d\n",
+ nvme_tcp_queue_id(queue), ret);
goto free_icresp;
+ }
memset(&msg, 0, sizeof(msg));
iov.iov_base = icresp;
@@ -1415,8 +1418,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
}
ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
- if (ret < 0)
+ if (ret < 0) {
+ pr_warn("queue %d: failed to receive icresp, error %d\n",
+ nvme_tcp_queue_id(queue), ret);
goto free_icresp;
+ }
if (queue->ctrl->ctrl.opts->tls) {
ctype = tls_get_record_type(queue->sock->sk,
(struct cmsghdr *)cbuf);
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key'
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (9 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 10/18] nvme-tcp: improve icreq/icresp logging Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-10-11 17:32 ` Keith Busch
2023-08-24 14:39 ` [PATCH 12/18] nvmet: make TCP sectype settable via configfs Hannes Reinecke
` (8 subsequent siblings)
19 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Parse the fabrics options 'keyring' and 'tls_key' and store the
referenced keys in the options structure.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/host/fabrics.c | 55 ++++++++++++++++++++++++++++++++++++-
drivers/nvme/host/fabrics.h | 6 ++++
drivers/nvme/host/tcp.c | 14 +++++++---
3 files changed, 70 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index ddad482c3537..4673ead69c5f 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -12,6 +12,7 @@
#include <linux/seq_file.h>
#include "nvme.h"
#include "fabrics.h"
+#include <linux/nvme-keyring.h>
static LIST_HEAD(nvmf_transports);
static DECLARE_RWSEM(nvmf_transports_rwsem);
@@ -622,6 +623,23 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
return NULL;
}
+static struct key *nvmf_parse_key(int key_id)
+{
+ struct key *key;
+
+ if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) {
+ pr_err("TLS is not supported\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ key = key_lookup(key_id);
+ if (!IS_ERR(key))
+ pr_err("key id %08x not found\n", key_id);
+ else
+ pr_debug("Using key id %08x\n", key_id);
+ return key;
+}
+
static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" },
{ NVMF_OPT_TRADDR, "traddr=%s" },
@@ -643,6 +661,10 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" },
{ NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" },
{ NVMF_OPT_TOS, "tos=%d" },
+#ifdef CONFIG_NVME_TCP_TLS
+ { NVMF_OPT_KEYRING, "keyring=%d" },
+ { NVMF_OPT_TLS_KEY, "tls_key=%d" },
+#endif
{ NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" },
{ NVMF_OPT_DISCOVERY, "discovery" },
{ NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" },
@@ -660,9 +682,10 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
char *options, *o, *p;
int token, ret = 0;
size_t nqnlen = 0;
- int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
+ int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO, key_id;
uuid_t hostid;
char hostnqn[NVMF_NQN_SIZE];
+ struct key *key;
/* Set defaults */
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
@@ -675,6 +698,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->data_digest = false;
opts->tos = -1; /* < 0 == use transport default */
opts->tls = false;
+ opts->tls_key = NULL;
+ opts->keyring = NULL;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -928,6 +953,32 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
}
opts->tos = token;
break;
+ case NVMF_OPT_KEYRING:
+ if (match_int(args, &key_id) || key_id <= 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ key = nvmf_parse_key(key_id);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto out;
+ }
+ key_put(opts->keyring);
+ opts->keyring = key;
+ break;
+ case NVMF_OPT_TLS_KEY:
+ if (match_int(args, &key_id) || key_id <= 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ key = nvmf_parse_key(key_id);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto out;
+ }
+ key_put(opts->tls_key);
+ opts->tls_key = key;
+ break;
case NVMF_OPT_DISCOVERY:
opts->discovery_nqn = true;
break;
@@ -1168,6 +1219,8 @@ static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
void nvmf_free_options(struct nvmf_ctrl_options *opts)
{
nvmf_host_put(opts->host);
+ key_put(opts->keyring);
+ key_put(opts->tls_key);
kfree(opts->transport);
kfree(opts->traddr);
kfree(opts->trsvcid);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index dac17c3fee26..fbaee5a7be19 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -71,6 +71,8 @@ enum {
NVMF_OPT_DHCHAP_SECRET = 1 << 23,
NVMF_OPT_DHCHAP_CTRL_SECRET = 1 << 24,
NVMF_OPT_TLS = 1 << 25,
+ NVMF_OPT_KEYRING = 1 << 26,
+ NVMF_OPT_TLS_KEY = 1 << 27,
};
/**
@@ -103,6 +105,8 @@ enum {
* @dhchap_secret: DH-HMAC-CHAP secret
* @dhchap_ctrl_secret: DH-HMAC-CHAP controller secret for bi-directional
* authentication
+ * @keyring: Keyring to use for key lookups
+ * @tls_key: TLS key for encrypted connections (TCP)
* @tls: Start TLS encrypted connections (TCP)
* @disable_sqflow: disable controller sq flow control
* @hdr_digest: generate/verify header digest (TCP)
@@ -130,6 +134,8 @@ struct nvmf_ctrl_options {
struct nvmf_host *host;
char *dhchap_secret;
char *dhchap_ctrl_secret;
+ struct key *keyring;
+ struct key *tls_key;
bool tls;
bool disable_sqflow;
bool hdr_digest;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index ef9cf8c7a113..e44068315c61 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1595,6 +1595,8 @@ static int nvme_tcp_start_tls(struct nvme_ctrl *nctrl,
args.ta_data = queue;
args.ta_my_peerids[0] = pskid;
args.ta_num_peerids = 1;
+ if (nctrl->opts->keyring)
+ keyring = key_serial(nctrl->opts->keyring;
args.ta_keyring = keyring;
args.ta_timeout_ms = tls_handshake_timeout * 1000;
queue->tls_err = -EOPNOTSUPP;
@@ -1914,9 +1916,12 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
key_serial_t pskid = 0;
if (ctrl->opts->tls) {
- pskid = nvme_tls_psk_default(NULL,
- ctrl->opts->host->nqn,
- ctrl->opts->subsysnqn);
+ if (ctrl->opts->tls_key)
+ pskid = key_serial(ctrl->opts->tls_key);
+ else
+ pskid = nvme_tls_psk_default(ctrl->opts->keyring,
+ ctrl->opts->host->nqn,
+ ctrl->opts->subsysnqn);
if (!pskid) {
dev_err(ctrl->device, "no valid PSK found\n");
ret = -ENOKEY;
@@ -2776,7 +2781,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
- NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS,
+ NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS |
+ NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY,
.create_ctrl = nvme_tcp_create_ctrl,
};
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key'
2023-08-24 14:39 ` [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key' Hannes Reinecke
@ 2023-10-11 17:32 ` Keith Busch
2023-10-12 6:24 ` Hannes Reinecke
0 siblings, 1 reply; 40+ messages in thread
From: Keith Busch @ 2023-10-11 17:32 UTC (permalink / raw)
To: Hannes Reinecke
Cc: Christoph Hellwig, Sagi Grimberg, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev
On Thu, Aug 24, 2023 at 04:39:18PM +0200, Hannes Reinecke wrote:
> args.ta_data = queue;
> args.ta_my_peerids[0] = pskid;
> args.ta_num_peerids = 1;
> + if (nctrl->opts->keyring)
> + keyring = key_serial(nctrl->opts->keyring;
The key_serial call is missing the closing ')'. I fixed it up while
applying, but a little concerning. I'm guessing you tested with a
previous version?
Anyway, I've applied the series to nvme-6.7 since this otherwise looks
good. I'll let it sit there a day and send a pull request to Jens if the
build bots don't complain.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key'
2023-10-11 17:32 ` Keith Busch
@ 2023-10-12 6:24 ` Hannes Reinecke
0 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-10-12 6:24 UTC (permalink / raw)
To: Keith Busch
Cc: Christoph Hellwig, Sagi Grimberg, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev
On 10/11/23 19:32, Keith Busch wrote:
> On Thu, Aug 24, 2023 at 04:39:18PM +0200, Hannes Reinecke wrote:
>> args.ta_data = queue;
>> args.ta_my_peerids[0] = pskid;
>> args.ta_num_peerids = 1;
>> + if (nctrl->opts->keyring)
>> + keyring = key_serial(nctrl->opts->keyring;
>
> The key_serial call is missing the closing ')'. I fixed it up while
> applying, but a little concerning. I'm guessing you tested with a
> previous version?
>
Yes, I did. I've tried in this series to implement the suggestion from
Sagi to store the key serial in the 'opts' structure; hence you wouldn't
need to call key_serial here. But then backed it out again as it didn't
make sense.
But that was apparently imperfectly done :-(
> Anyway, I've applied the series to nvme-6.7 since this otherwise looks
> good. I'll let it sit there a day and send a pull request to Jens if the
> build bots don't complain.
Thank you!
I had been wondering if I needed another resend...
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH 12/18] nvmet: make TCP sectype settable via configfs
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (10 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 11/18] nvme-fabrics: parse options 'keyring' and 'tls_key' Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 13/18] nvmet-tcp: make nvmet_tcp_alloc_queue() a void function Hannes Reinecke
` (7 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Add a new configfs attribute 'addr_tsas' to make the TCP sectype
settable via configfs.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/configfs.c | 75 +++++++++++++++++++++++++++++++++-
1 file changed, 74 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 907143870da5..27a0f9c768e8 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -159,6 +159,11 @@ static const struct nvmet_type_name_map nvmet_addr_treq[] = {
{ NVMF_TREQ_NOT_REQUIRED, "not required" },
};
+static inline u8 nvmet_port_disc_addr_treq_mask(struct nvmet_port *port)
+{
+ return (port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK);
+}
+
static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
{
u8 treq = to_nvmet_port(item)->disc_addr.treq &
@@ -178,7 +183,7 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
- u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
+ u8 treq = nvmet_port_disc_addr_treq_mask(port);
int i;
if (nvmet_is_port_enabled(port, __func__))
@@ -303,6 +308,11 @@ static void nvmet_port_init_tsas_rdma(struct nvmet_port *port)
port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM;
}
+static void nvmet_port_init_tsas_tcp(struct nvmet_port *port, int sectype)
+{
+ port->disc_addr.tsas.tcp.sectype = sectype;
+}
+
static ssize_t nvmet_addr_trtype_store(struct config_item *item,
const char *page, size_t count)
{
@@ -325,11 +335,73 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
port->disc_addr.trtype = nvmet_transport[i].type;
if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
nvmet_port_init_tsas_rdma(port);
+ else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP)
+ nvmet_port_init_tsas_tcp(port, NVMF_TCP_SECTYPE_NONE);
return count;
}
CONFIGFS_ATTR(nvmet_, addr_trtype);
+static const struct nvmet_type_name_map nvmet_addr_tsas_tcp[] = {
+ { NVMF_TCP_SECTYPE_NONE, "none" },
+ { NVMF_TCP_SECTYPE_TLS13, "tls1.3" },
+};
+
+static const struct nvmet_type_name_map nvmet_addr_tsas_rdma[] = {
+ { NVMF_RDMA_QPTYPE_CONNECTED, "connected" },
+ { NVMF_RDMA_QPTYPE_DATAGRAM, "datagram" },
+};
+
+static ssize_t nvmet_addr_tsas_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ int i;
+
+ if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) {
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
+ if (port->disc_addr.tsas.tcp.sectype == nvmet_addr_tsas_tcp[i].type)
+ return sprintf(page, "%s\n", nvmet_addr_tsas_tcp[i].name);
+ }
+ } else if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) {
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) {
+ if (port->disc_addr.tsas.rdma.qptype == nvmet_addr_tsas_rdma[i].type)
+ return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name);
+ }
+ }
+ return sprintf(page, "reserved\n");
+}
+
+static ssize_t nvmet_addr_tsas_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ u8 sectype;
+ int i;
+
+ if (nvmet_is_port_enabled(port, __func__))
+ return -EACCES;
+
+ if (port->disc_addr.trtype != NVMF_TRTYPE_TCP)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) {
+ if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) {
+ sectype = nvmet_addr_tsas_tcp[i].type;
+ goto found;
+ }
+ }
+
+ pr_err("Invalid value '%s' for tsas\n", page);
+ return -EINVAL;
+
+found:
+ nvmet_port_init_tsas_tcp(port, sectype);
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, addr_tsas);
+
/*
* Namespace structures & file operation functions below
*/
@@ -1741,6 +1813,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_traddr,
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
+ &nvmet_attr_addr_tsas,
&nvmet_attr_param_inline_data_size,
#ifdef CONFIG_BLK_DEV_INTEGRITY
&nvmet_attr_param_pi_enable,
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 13/18] nvmet-tcp: make nvmet_tcp_alloc_queue() a void function
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (11 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 12/18] nvmet: make TCP sectype settable via configfs Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 14/18] nvmet-tcp: allocate socket file Hannes Reinecke
` (6 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke,
Nitesh Shetty
The return value from nvmet_tcp_alloc_queue() are just used to
figure out if sock_release() need to be called. So this patch
moves sock_release() into nvmet_tcp_alloc_queue() and make it
a void function.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com>
---
drivers/nvme/target/tcp.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 97d07488072d..d44e9051ddd9 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1621,15 +1621,17 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
return ret;
}
-static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
+static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
struct socket *newsock)
{
struct nvmet_tcp_queue *queue;
int ret;
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
- if (!queue)
- return -ENOMEM;
+ if (!queue) {
+ ret = -ENOMEM;
+ goto out_release;
+ }
INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
@@ -1666,7 +1668,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_destroy_sq;
- return 0;
+ return;
out_destroy_sq:
mutex_lock(&nvmet_tcp_queue_mutex);
list_del_init(&queue->queue_list);
@@ -1678,7 +1680,9 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
ida_free(&nvmet_tcp_queue_ida, queue->idx);
out_free_queue:
kfree(queue);
- return ret;
+out_release:
+ pr_err("failed to allocate queue, error %d\n", ret);
+ sock_release(newsock);
}
static void nvmet_tcp_accept_work(struct work_struct *w)
@@ -1695,11 +1699,7 @@ static void nvmet_tcp_accept_work(struct work_struct *w)
pr_warn("failed to accept err=%d\n", ret);
return;
}
- ret = nvmet_tcp_alloc_queue(port, newsock);
- if (ret) {
- pr_err("failed to allocate queue\n");
- sock_release(newsock);
- }
+ nvmet_tcp_alloc_queue(port, newsock);
}
}
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 14/18] nvmet-tcp: allocate socket file
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (12 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 13/18] nvmet-tcp: make nvmet_tcp_alloc_queue() a void function Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 15/18] nvmet: Set 'TREQ' to 'required' when TLS is enabled Hannes Reinecke
` (5 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
For the TLS upcall we need to allocate a socket file such
that the userspace daemon is able to use the socket.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/tcp.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d44e9051ddd9..f19ea9d923fd 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1493,12 +1493,12 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
nvmet_sq_destroy(&queue->nvme_sq);
cancel_work_sync(&queue->io_work);
nvmet_tcp_free_cmd_data_in_buffers(queue);
- sock_release(queue->sock);
+ /* ->sock will be released by fput() */
+ fput(queue->sock->file);
nvmet_tcp_free_cmds(queue);
if (queue->hdr_digest || queue->data_digest)
nvmet_tcp_free_crypto(queue);
ida_free(&nvmet_tcp_queue_ida, queue->idx);
-
page = virt_to_head_page(queue->pf_cache.va);
__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
kfree(queue);
@@ -1625,6 +1625,7 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
struct socket *newsock)
{
struct nvmet_tcp_queue *queue;
+ struct file *sock_file = NULL;
int ret;
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
@@ -1644,10 +1645,16 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
init_llist_head(&queue->resp_list);
INIT_LIST_HEAD(&queue->resp_send_list);
+ sock_file = sock_alloc_file(queue->sock, O_CLOEXEC, NULL);
+ if (IS_ERR(sock_file)) {
+ ret = PTR_ERR(sock_file);
+ goto out_free_queue;
+ }
+
queue->idx = ida_alloc(&nvmet_tcp_queue_ida, GFP_KERNEL);
if (queue->idx < 0) {
ret = queue->idx;
- goto out_free_queue;
+ goto out_sock;
}
ret = nvmet_tcp_alloc_cmd(queue, &queue->connect);
@@ -1678,11 +1685,14 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
nvmet_tcp_free_cmd(&queue->connect);
out_ida_remove:
ida_free(&nvmet_tcp_queue_ida, queue->idx);
+out_sock:
+ fput(queue->sock->file);
out_free_queue:
kfree(queue);
out_release:
pr_err("failed to allocate queue, error %d\n", ret);
- sock_release(newsock);
+ if (!sock_file)
+ sock_release(newsock);
}
static void nvmet_tcp_accept_work(struct work_struct *w)
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 15/18] nvmet: Set 'TREQ' to 'required' when TLS is enabled
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (13 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 14/18] nvmet-tcp: allocate socket file Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 16/18] nvmet-tcp: enable TLS handshake upcall Hannes Reinecke
` (4 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
The current implementation does not support secure concatenation,
so 'TREQ' is always set to 'required' when TLS is enabled.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/configfs.c | 15 +++++++++++++--
drivers/nvme/target/nvmet.h | 5 +++++
2 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 27a0f9c768e8..483569c3f622 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -166,8 +166,7 @@ static inline u8 nvmet_port_disc_addr_treq_mask(struct nvmet_port *port)
static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
{
- u8 treq = to_nvmet_port(item)->disc_addr.treq &
- NVME_TREQ_SECURE_CHANNEL_MASK;
+ u8 treq = nvmet_port_disc_addr_treq_secure_channel(to_nvmet_port(item));
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
@@ -376,6 +375,7 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ u8 treq = nvmet_port_disc_addr_treq_mask(port);
u8 sectype;
int i;
@@ -397,6 +397,17 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
found:
nvmet_port_init_tsas_tcp(port, sectype);
+ /*
+ * The TLS implementation currently does not support
+ * secure concatenation, so TREQ is always set to 'required'
+ * if TLS is enabled.
+ */
+ if (sectype == NVMF_TCP_SECTYPE_TLS13) {
+ treq |= NVMF_TREQ_REQUIRED;
+ } else {
+ treq |= NVMF_TREQ_NOT_SPECIFIED;
+ }
+ port->disc_addr.treq = treq;
return count;
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8cfd60f3b564..87da62e4b743 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -178,6 +178,11 @@ static inline struct nvmet_port *ana_groups_to_port(
ana_groups_group);
}
+static inline u8 nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *port)
+{
+ return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
+}
+
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 16/18] nvmet-tcp: enable TLS handshake upcall
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (14 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 15/18] nvmet: Set 'TREQ' to 'required' when TLS is enabled Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-09-12 11:32 ` Sagi Grimberg
2023-08-24 14:39 ` [PATCH 17/18] nvmet-tcp: control messages for recvmsg() Hannes Reinecke
` (3 subsequent siblings)
19 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
TLS handshake is handled in userspace with the
netlink tls handshake protocol.
The patch adds a function to start the TLS handshake
upcall for any incoming network connections if
the TCP TSAS sectype is set to 'tls1.3'.
A config option NVME_TARGET_TCP_TLS selects whether
the TLS handshake upcall should be compiled in.
The patch also adds reference counting to
struct nvmet_tcp_queue to ensure the queue is
always valid when the the TLS handshake completes.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/target/Kconfig | 15 ++++
drivers/nvme/target/configfs.c | 21 +++++
drivers/nvme/target/nvmet.h | 1 +
drivers/nvme/target/tcp.c | 154 ++++++++++++++++++++++++++++++++-
4 files changed, 187 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 79fc64035ee3..c56cb1005327 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -84,6 +84,21 @@ config NVME_TARGET_TCP
If unsure, say N.
+config NVME_TARGET_TCP_TLS
+ bool "NVMe over Fabrics TCP target TLS encryption support"
+ depends on NVME_TARGET_TCP
+ select NVME_COMMON
+ select NVME_KEYRING
+ select NET_HANDSHAKE
+ select KEYS
+ help
+ Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
+
+ The TLS handshake daemon is available at
+ https://github.com/oracle/ktls-utils.
+
+ If unsure, say N.
+
config NVME_TARGET_AUTH
bool "NVMe over Fabrics In-band Authentication support"
depends on NVME_TARGET
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 483569c3f622..b780ce049163 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -15,6 +15,7 @@
#ifdef CONFIG_NVME_TARGET_AUTH
#include <linux/nvme-auth.h>
#endif
+#include <linux/nvme-keyring.h>
#include <crypto/hash.h>
#include <crypto/kpp.h>
@@ -396,6 +397,17 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
return -EINVAL;
found:
+ if (sectype == NVMF_TCP_SECTYPE_TLS13) {
+ if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
+ pr_err("TLS is not supported\n");
+ return -EINVAL;
+ }
+ if (!port->keyring) {
+ pr_err("TLS keyring not configured\n");
+ return -EINVAL;
+ }
+ }
+
nvmet_port_init_tsas_tcp(port, sectype);
/*
* The TLS implementation currently does not support
@@ -1814,6 +1826,7 @@ static void nvmet_port_release(struct config_item *item)
flush_workqueue(nvmet_wq);
list_del(&port->global_entry);
+ key_put(port->keyring);
kfree(port->ana_state);
kfree(port);
}
@@ -1863,6 +1876,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
return ERR_PTR(-ENOMEM);
}
+ if (nvme_keyring_id()) {
+ port->keyring = key_lookup(nvme_keyring_id());
+ if (IS_ERR(port->keyring)) {
+ pr_warn("NVMe keyring not available, disabling TLS\n");
+ port->keyring = NULL;
+ }
+ }
+
for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
if (i == NVMET_DEFAULT_ANA_GRPID)
port->ana_state[1] = NVME_ANA_OPTIMIZED;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 87da62e4b743..e35a03260f45 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -158,6 +158,7 @@ struct nvmet_port {
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
+ struct key *keyring;
void *priv;
bool enabled;
int inline_data_size;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f19ea9d923fd..1139d40a9ef6 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -8,9 +8,13 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/err.h>
+#include <linux/key.h>
#include <linux/nvme-tcp.h>
+#include <linux/nvme-keyring.h>
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/tls.h>
+#include <net/handshake.h>
#include <linux/inet.h>
#include <linux/llist.h>
#include <crypto/hash.h>
@@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops,
MODULE_PARM_DESC(idle_poll_period_usecs,
"nvmet tcp io_work poll till idle time period in usecs: Default 0");
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+/*
+ * TLS handshake timeout
+ */
+static int tls_handshake_timeout = 10;
+module_param(tls_handshake_timeout, int, 0644);
+MODULE_PARM_DESC(tls_handshake_timeout,
+ "nvme TLS handshake timeout in seconds (default 10)");
+#endif
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64
@@ -122,8 +136,10 @@ struct nvmet_tcp_cmd {
enum nvmet_tcp_queue_state {
NVMET_TCP_Q_CONNECTING,
+ NVMET_TCP_Q_TLS_HANDSHAKE,
NVMET_TCP_Q_LIVE,
NVMET_TCP_Q_DISCONNECTING,
+ NVMET_TCP_Q_FAILED,
};
struct nvmet_tcp_queue {
@@ -132,6 +148,7 @@ struct nvmet_tcp_queue {
struct work_struct io_work;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
+ struct kref kref;
/* send state */
struct nvmet_tcp_cmd *cmds;
@@ -155,6 +172,10 @@ struct nvmet_tcp_queue {
struct ahash_request *snd_hash;
struct ahash_request *rcv_hash;
+ /* TLS state */
+ key_serial_t tls_pskid;
+ struct delayed_work tls_handshake_tmo_work;
+
unsigned long poll_end;
spinlock_t state_lock;
@@ -918,6 +939,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
free_crypto:
if (queue->hdr_digest || queue->data_digest)
nvmet_tcp_free_crypto(queue);
+ queue->state = NVMET_TCP_Q_FAILED;
return ret;
}
@@ -1283,12 +1305,25 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
return ret;
}
+static void nvmet_tcp_release_queue(struct kref *kref)
+{
+ struct nvmet_tcp_queue *queue =
+ container_of(kref, struct nvmet_tcp_queue, kref);
+
+ WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
+ queue_work(nvmet_wq, &queue->release_work);
+}
+
static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
{
spin_lock_bh(&queue->state_lock);
+ if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
+ /* Socket closed during handshake */
+ tls_handshake_cancel(queue->sock->sk);
+ }
if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
queue->state = NVMET_TCP_Q_DISCONNECTING;
- queue_work(nvmet_wq, &queue->release_work);
+ kref_put(&queue->kref, nvmet_tcp_release_queue);
}
spin_unlock_bh(&queue->state_lock);
}
@@ -1485,6 +1520,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
mutex_unlock(&nvmet_tcp_queue_mutex);
nvmet_tcp_restore_socket_callbacks(queue);
+ cancel_delayed_work_sync(&queue->tls_handshake_tmo_work);
cancel_work_sync(&queue->io_work);
/* stop accepting incoming data */
queue->rcv_state = NVMET_TCP_RECV_ERR;
@@ -1512,8 +1548,13 @@ static void nvmet_tcp_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
- if (likely(queue))
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ if (likely(queue)) {
+ if (queue->data_ready)
+ queue->data_ready(sk);
+ if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
+ queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
+ &queue->io_work);
+ }
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1621,6 +1662,87 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
return ret;
}
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+static void nvmet_tcp_tls_handshake_done(void *data, int status,
+ key_serial_t peerid)
+{
+ struct nvmet_tcp_queue *queue = data;
+
+ pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
+ queue->idx, peerid, status);
+ spin_lock_bh(&queue->state_lock);
+ if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) {
+ spin_unlock_bh(&queue->state_lock);
+ return;
+ }
+ if (!status) {
+ queue->tls_pskid = peerid;
+ queue->state = NVMET_TCP_Q_CONNECTING;
+ } else
+ queue->state = NVMET_TCP_Q_FAILED;
+ spin_unlock_bh(&queue->state_lock);
+
+ cancel_delayed_work_sync(&queue->tls_handshake_tmo_work);
+ if (status)
+ nvmet_tcp_schedule_release_queue(queue);
+ else
+ nvmet_tcp_set_queue_sock(queue);
+ kref_put(&queue->kref, nvmet_tcp_release_queue);
+}
+
+static void nvmet_tcp_tls_handshake_timeout(struct work_struct *w)
+{
+ struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w),
+ struct nvmet_tcp_queue, tls_handshake_tmo_work);
+
+ pr_warn("queue %d: TLS handshake timeout\n", queue->idx);
+ /*
+ * If tls_handshake_cancel() fails we've lost the race with
+ * nvmet_tcp_tls_handshake_done() */
+ if (!tls_handshake_cancel(queue->sock->sk))
+ return;
+ spin_lock_bh(&queue->state_lock);
+ if (WARN_ON(queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)) {
+ spin_unlock_bh(&queue->state_lock);
+ return;
+ }
+ queue->state = NVMET_TCP_Q_FAILED;
+ spin_unlock_bh(&queue->state_lock);
+ nvmet_tcp_schedule_release_queue(queue);
+ kref_put(&queue->kref, nvmet_tcp_release_queue);
+}
+
+static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
+{
+ int ret = -EOPNOTSUPP;
+ struct tls_handshake_args args;
+
+ if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+ pr_warn("cannot start TLS in state %d\n", queue->state);
+ return -EINVAL;
+ }
+
+ kref_get(&queue->kref);
+ pr_debug("queue %d: TLS ServerHello\n", queue->idx);
+ memset(&args, 0, sizeof(args));
+ args.ta_sock = queue->sock;
+ args.ta_done = nvmet_tcp_tls_handshake_done;
+ args.ta_data = queue;
+ args.ta_keyring = key_serial(queue->port->nport->keyring);
+ args.ta_timeout_ms = tls_handshake_timeout * 1000;
+
+ ret = tls_server_hello_psk(&args, GFP_KERNEL);
+ if (ret) {
+ kref_put(&queue->kref, nvmet_tcp_release_queue);
+ pr_err("failed to start TLS, err=%d\n", ret);
+ } else {
+ queue_delayed_work(nvmet_wq, &queue->tls_handshake_tmo_work,
+ tls_handshake_timeout * HZ);
+ }
+ return ret;
+}
+#endif
+
static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
struct socket *newsock)
{
@@ -1636,11 +1758,16 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
+ kref_init(&queue->kref);
queue->sock = newsock;
queue->port = port;
queue->nr_cmds = 0;
spin_lock_init(&queue->state_lock);
- queue->state = NVMET_TCP_Q_CONNECTING;
+ if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
+ NVMF_TCP_SECTYPE_TLS13)
+ queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
+ else
+ queue->state = NVMET_TCP_Q_CONNECTING;
INIT_LIST_HEAD(&queue->free_list);
init_llist_head(&queue->resp_list);
INIT_LIST_HEAD(&queue->resp_send_list);
@@ -1671,6 +1798,25 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
mutex_unlock(&nvmet_tcp_queue_mutex);
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+ INIT_DELAYED_WORK(&queue->tls_handshake_tmo_work,
+ nvmet_tcp_tls_handshake_timeout);
+ if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
+ struct sock *sk = queue->sock->sk;
+
+ /* Restore the default callbacks before starting upcall */
+ read_lock_bh(&sk->sk_callback_lock);
+ sk->sk_user_data = NULL;
+ sk->sk_data_ready = port->data_ready;
+ read_unlock_bh(&sk->sk_callback_lock);
+ if (!nvmet_tcp_tls_handshake(queue))
+ return;
+
+ /* TLS handshake failed, terminate the connection */
+ goto out_destroy_sq;
+ }
+#endif
+
ret = nvmet_tcp_set_queue_sock(queue);
if (ret)
goto out_destroy_sq;
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 17/18] nvmet-tcp: control messages for recvmsg()
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (15 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 16/18] nvmet-tcp: enable TLS handshake upcall Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-08-24 14:39 ` [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS Hannes Reinecke
` (2 subsequent siblings)
19 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
kTLS requires control messages for recvmsg() to relay any out-of-band
TLS messages (eg TLS alerts) to the caller.
Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
drivers/nvme/target/tcp.c | 93 +++++++++++++++++++++++++++++++++------
1 file changed, 80 insertions(+), 13 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 1139d40a9ef6..67fffa2e1e4a 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -14,6 +14,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <net/tls_prot.h>
#include <net/handshake.h>
#include <linux/inet.h>
#include <linux/llist.h>
@@ -118,6 +119,7 @@ struct nvmet_tcp_cmd {
u32 pdu_len;
u32 pdu_recv;
int sg_idx;
+ char recv_cbuf[CMSG_LEN(sizeof(char))];
struct msghdr recv_msg;
struct bio_vec *iov;
u32 flags;
@@ -1121,20 +1123,65 @@ static inline bool nvmet_tcp_pdu_valid(u8 type)
return false;
}
+static int nvmet_tcp_tls_record_ok(struct nvmet_tcp_queue *queue,
+ struct msghdr *msg, char *cbuf)
+{
+ struct cmsghdr *cmsg = (struct cmsghdr *)cbuf;
+ u8 ctype, level, description;
+ int ret = 0;
+
+ ctype = tls_get_record_type(queue->sock->sk, cmsg);
+ switch (ctype) {
+ case 0:
+ break;
+ case TLS_RECORD_TYPE_DATA:
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ tls_alert_recv(queue->sock->sk, msg, &level, &description);
+ if (level == TLS_ALERT_LEVEL_FATAL) {
+ pr_err("queue %d: TLS Alert desc %u\n",
+ queue->idx, description);
+ ret = -ENOTCONN;
+ } else {
+ pr_warn("queue %d: TLS Alert desc %u\n",
+ queue->idx, description);
+ ret = -EAGAIN;
+ }
+ break;
+ default:
+ /* discard this record type */
+ pr_err("queue %d: TLS record %d unhandled\n",
+ queue->idx, ctype);
+ ret = -EAGAIN;
+ break;
+ }
+ return ret;
+}
+
static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
{
struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
- int len;
+ int len, ret;
struct kvec iov;
+ char cbuf[CMSG_LEN(sizeof(char))] = {};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
recv:
iov.iov_base = (void *)&queue->pdu + queue->offset;
iov.iov_len = queue->left;
+ if (queue->tls_pskid) {
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ }
len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
if (unlikely(len < 0))
return len;
+ if (queue->tls_pskid) {
+ ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf);
+ if (ret < 0)
+ return ret;
+ }
queue->offset += len;
queue->left -= len;
@@ -1187,16 +1234,22 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd)
static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
{
struct nvmet_tcp_cmd *cmd = queue->cmd;
- int ret;
+ int len, ret;
while (msg_data_left(&cmd->recv_msg)) {
- ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg,
+ len = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg,
cmd->recv_msg.msg_flags);
- if (ret <= 0)
- return ret;
+ if (len <= 0)
+ return len;
+ if (queue->tls_pskid) {
+ ret = nvmet_tcp_tls_record_ok(cmd->queue,
+ &cmd->recv_msg, cmd->recv_cbuf);
+ if (ret < 0)
+ return ret;
+ }
- cmd->pdu_recv += ret;
- cmd->rbytes_done += ret;
+ cmd->pdu_recv += len;
+ cmd->rbytes_done += len;
}
if (queue->data_digest) {
@@ -1214,20 +1267,30 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
{
struct nvmet_tcp_cmd *cmd = queue->cmd;
- int ret;
+ int ret, len;
+ char cbuf[CMSG_LEN(sizeof(char))] = {};
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
.iov_base = (void *)&cmd->recv_ddgst + queue->offset,
.iov_len = queue->left
};
- ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ if (queue->tls_pskid) {
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ }
+ len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
iov.iov_len, msg.msg_flags);
- if (unlikely(ret < 0))
- return ret;
+ if (unlikely(len < 0))
+ return len;
+ if (queue->tls_pskid) {
+ ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf);
+ if (ret < 0)
+ return ret;
+ }
- queue->offset += ret;
- queue->left -= ret;
+ queue->offset += len;
+ queue->left -= len;
if (queue->left)
return -EAGAIN;
@@ -1407,6 +1470,10 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
if (!c->r2t_pdu)
goto out_free_data;
+ if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
+ c->recv_msg.msg_control = c->recv_cbuf;
+ c->recv_msg.msg_controllen = sizeof(c->recv_cbuf);
+ }
c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
list_add_tail(&c->entry, &queue->free_list);
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (16 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 17/18] nvmet-tcp: control messages for recvmsg() Hannes Reinecke
@ 2023-08-24 14:39 ` Hannes Reinecke
2023-09-12 11:32 ` Sagi Grimberg
2023-09-06 8:48 ` [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Pawel Baldysiak
2023-09-12 11:34 ` Sagi Grimberg
19 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-08-24 14:39 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Sagi Grimberg, Keith Busch, linux-nvme, Jakub Kicinski,
Eric Dumazet, Paolo Abeni, netdev, Hannes Reinecke
Incoming connection might be either 'normal' NVMe-TCP connections
starting with icreq or TLS handshakes. To ensure that 'normal'
connections can still be handled we need to peek the first packet
and only start TLS handshake if it's not an icreq.
With that we can lift the restriction to always set TREQ to
'required' when TLS1.3 is enabled.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/nvme/target/configfs.c | 25 +++++++++++---
drivers/nvme/target/nvmet.h | 5 +++
drivers/nvme/target/tcp.c | 61 +++++++++++++++++++++++++++++++---
3 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index b780ce049163..9eed6e6765ea 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -198,6 +198,20 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
return -EINVAL;
found:
+ if (port->disc_addr.trtype == NVMF_TRTYPE_TCP &&
+ port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) {
+ switch (nvmet_addr_treq[i].type) {
+ case NVMF_TREQ_NOT_SPECIFIED:
+ pr_debug("treq '%s' not allowed for TLS1.3\n",
+ nvmet_addr_treq[i].name);
+ return -EINVAL;
+ case NVMF_TREQ_NOT_REQUIRED:
+ pr_warn("Allow non-TLS connections while TLS1.3 is enabled\n");
+ break;
+ default:
+ break;
+ }
+ }
treq |= nvmet_addr_treq[i].type;
port->disc_addr.treq = treq;
return count;
@@ -410,12 +424,15 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
nvmet_port_init_tsas_tcp(port, sectype);
/*
- * The TLS implementation currently does not support
- * secure concatenation, so TREQ is always set to 'required'
- * if TLS is enabled.
+ * If TLS is enabled TREQ should be set to 'required' per default
*/
if (sectype == NVMF_TCP_SECTYPE_TLS13) {
- treq |= NVMF_TREQ_REQUIRED;
+ u8 sc = nvmet_port_disc_addr_treq_secure_channel(port);
+
+ if (sc == NVMF_TREQ_NOT_SPECIFIED)
+ treq |= NVMF_TREQ_REQUIRED;
+ else
+ treq |= sc;
} else {
treq |= NVMF_TREQ_NOT_SPECIFIED;
}
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index e35a03260f45..3e179019ca7c 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -184,6 +184,11 @@ static inline u8 nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *por
return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
}
+static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
+{
+ return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
+}
+
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 67fffa2e1e4a..5c1518a8bded 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1730,6 +1730,54 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
}
#ifdef CONFIG_NVME_TARGET_TCP_TLS
+static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue)
+{
+ struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr;
+ int len, ret;
+ struct kvec iov = {
+ .iov_base = (u8 *)&queue->pdu + queue->offset,
+ .iov_len = sizeof(struct nvme_tcp_hdr),
+ };
+ char cbuf[CMSG_LEN(sizeof(char))] = {};
+ struct msghdr msg = {
+ .msg_control = cbuf,
+ .msg_controllen = sizeof(cbuf),
+ .msg_flags = MSG_PEEK,
+ };
+
+ if (nvmet_port_secure_channel_required(queue->port->nport))
+ return 0;
+
+ len = kernel_recvmsg(queue->sock, &msg, &iov, 1,
+ iov.iov_len, msg.msg_flags);
+ if (unlikely(len < 0)) {
+ pr_debug("queue %d: peek error %d\n",
+ queue->idx, len);
+ return len;
+ }
+
+ ret = nvmet_tcp_tls_record_ok(queue, &msg, cbuf);
+ if (ret < 0)
+ return ret;
+
+ if (len < sizeof(struct nvme_tcp_hdr)) {
+ pr_debug("queue %d: short read, %d bytes missing\n",
+ queue->idx, (int)iov.iov_len - len);
+ return -EAGAIN;
+ }
+ pr_debug("queue %d: hdr type %d hlen %d plen %d size %d\n",
+ queue->idx, hdr->type, hdr->hlen, hdr->plen,
+ (int)sizeof(struct nvme_tcp_icreq_pdu));
+ if (hdr->type == nvme_tcp_icreq &&
+ hdr->hlen == sizeof(struct nvme_tcp_icreq_pdu) &&
+ hdr->plen == (__le32)sizeof(struct nvme_tcp_icreq_pdu)) {
+ pr_debug("queue %d: icreq detected\n",
+ queue->idx);
+ return len;
+ }
+ return 0;
+}
+
static void nvmet_tcp_tls_handshake_done(void *data, int status,
key_serial_t peerid)
{
@@ -1876,11 +1924,14 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
sk->sk_user_data = NULL;
sk->sk_data_ready = port->data_ready;
read_unlock_bh(&sk->sk_callback_lock);
- if (!nvmet_tcp_tls_handshake(queue))
- return;
-
- /* TLS handshake failed, terminate the connection */
- goto out_destroy_sq;
+ if (!nvmet_tcp_try_peek_pdu(queue)) {
+ if (!nvmet_tcp_tls_handshake(queue))
+ return;
+ /* TLS handshake failed, terminate the connection */
+ goto out_destroy_sq;
+ }
+ /* Not a TLS connection, continue with normal processing */
+ queue->state = NVMET_TCP_Q_CONNECTING;
}
#endif
--
2.35.3
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS
2023-08-24 14:39 ` [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS Hannes Reinecke
@ 2023-09-12 11:32 ` Sagi Grimberg
2023-09-12 11:48 ` Hannes Reinecke
0 siblings, 1 reply; 40+ messages in thread
From: Sagi Grimberg @ 2023-09-12 11:32 UTC (permalink / raw)
To: Hannes Reinecke, Christoph Hellwig
Cc: Keith Busch, linux-nvme, Jakub Kicinski, Eric Dumazet,
Paolo Abeni, netdev
On 8/24/23 17:39, Hannes Reinecke wrote:
> Incoming connection might be either 'normal' NVMe-TCP connections
> starting with icreq or TLS handshakes. To ensure that 'normal'
> connections can still be handled we need to peek the first packet
> and only start TLS handshake if it's not an icreq.
> With that we can lift the restriction to always set TREQ to
> 'required' when TLS1.3 is enabled.
>
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
> drivers/nvme/target/configfs.c | 25 +++++++++++---
> drivers/nvme/target/nvmet.h | 5 +++
> drivers/nvme/target/tcp.c | 61 +++++++++++++++++++++++++++++++---
> 3 files changed, 82 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index b780ce049163..9eed6e6765ea 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -198,6 +198,20 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
> return -EINVAL;
>
> found:
> + if (port->disc_addr.trtype == NVMF_TRTYPE_TCP &&
> + port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) {
> + switch (nvmet_addr_treq[i].type) {
> + case NVMF_TREQ_NOT_SPECIFIED:
> + pr_debug("treq '%s' not allowed for TLS1.3\n",
> + nvmet_addr_treq[i].name);
> + return -EINVAL;
> + case NVMF_TREQ_NOT_REQUIRED:
> + pr_warn("Allow non-TLS connections while TLS1.3 is enabled\n");
> + break;
> + default:
> + break;
> + }
> + }
> treq |= nvmet_addr_treq[i].type;
> port->disc_addr.treq = treq;
> return count;
> @@ -410,12 +424,15 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
>
> nvmet_port_init_tsas_tcp(port, sectype);
> /*
> - * The TLS implementation currently does not support
> - * secure concatenation, so TREQ is always set to 'required'
> - * if TLS is enabled.
> + * If TLS is enabled TREQ should be set to 'required' per default
> */
> if (sectype == NVMF_TCP_SECTYPE_TLS13) {
> - treq |= NVMF_TREQ_REQUIRED;
> + u8 sc = nvmet_port_disc_addr_treq_secure_channel(port);
> +
> + if (sc == NVMF_TREQ_NOT_SPECIFIED)
> + treq |= NVMF_TREQ_REQUIRED;
> + else
> + treq |= sc;
> } else {
> treq |= NVMF_TREQ_NOT_SPECIFIED;
> }
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index e35a03260f45..3e179019ca7c 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -184,6 +184,11 @@ static inline u8 nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *por
> return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
> }
>
> +static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
> +{
> + return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
> +}
> +
> struct nvmet_ctrl {
> struct nvmet_subsys *subsys;
> struct nvmet_sq **sqs;
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index 67fffa2e1e4a..5c1518a8bded 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -1730,6 +1730,54 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
> }
>
> #ifdef CONFIG_NVME_TARGET_TCP_TLS
You need a stub for the other side of the ifdef?
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS
2023-09-12 11:32 ` Sagi Grimberg
@ 2023-09-12 11:48 ` Hannes Reinecke
2023-09-12 12:55 ` Sagi Grimberg
0 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-09-12 11:48 UTC (permalink / raw)
To: Sagi Grimberg, Christoph Hellwig
Cc: Keith Busch, linux-nvme, Jakub Kicinski, Eric Dumazet,
Paolo Abeni, netdev
On 9/12/23 13:32, Sagi Grimberg wrote:
>
>
> On 8/24/23 17:39, Hannes Reinecke wrote:
>> Incoming connection might be either 'normal' NVMe-TCP connections
>> starting with icreq or TLS handshakes. To ensure that 'normal'
>> connections can still be handled we need to peek the first packet
>> and only start TLS handshake if it's not an icreq.
>> With that we can lift the restriction to always set TREQ to
>> 'required' when TLS1.3 is enabled.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>> drivers/nvme/target/configfs.c | 25 +++++++++++---
>> drivers/nvme/target/nvmet.h | 5 +++
>> drivers/nvme/target/tcp.c | 61 +++++++++++++++++++++++++++++++---
>> 3 files changed, 82 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/nvme/target/configfs.c
>> b/drivers/nvme/target/configfs.c
>> index b780ce049163..9eed6e6765ea 100644
>> --- a/drivers/nvme/target/configfs.c
>> +++ b/drivers/nvme/target/configfs.c
>> @@ -198,6 +198,20 @@ static ssize_t nvmet_addr_treq_store(struct
>> config_item *item,
>> return -EINVAL;
>> found:
>> + if (port->disc_addr.trtype == NVMF_TRTYPE_TCP &&
>> + port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) {
>> + switch (nvmet_addr_treq[i].type) {
>> + case NVMF_TREQ_NOT_SPECIFIED:
>> + pr_debug("treq '%s' not allowed for TLS1.3\n",
>> + nvmet_addr_treq[i].name);
>> + return -EINVAL;
>> + case NVMF_TREQ_NOT_REQUIRED:
>> + pr_warn("Allow non-TLS connections while TLS1.3 is
>> enabled\n");
>> + break;
>> + default:
>> + break;
>> + }
>> + }
>> treq |= nvmet_addr_treq[i].type;
>> port->disc_addr.treq = treq;
>> return count;
>> @@ -410,12 +424,15 @@ static ssize_t nvmet_addr_tsas_store(struct
>> config_item *item,
>> nvmet_port_init_tsas_tcp(port, sectype);
>> /*
>> - * The TLS implementation currently does not support
>> - * secure concatenation, so TREQ is always set to 'required'
>> - * if TLS is enabled.
>> + * If TLS is enabled TREQ should be set to 'required' per default
>> */
>> if (sectype == NVMF_TCP_SECTYPE_TLS13) {
>> - treq |= NVMF_TREQ_REQUIRED;
>> + u8 sc = nvmet_port_disc_addr_treq_secure_channel(port);
>> +
>> + if (sc == NVMF_TREQ_NOT_SPECIFIED)
>> + treq |= NVMF_TREQ_REQUIRED;
>> + else
>> + treq |= sc;
>> } else {
>> treq |= NVMF_TREQ_NOT_SPECIFIED;
>> }
>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>> index e35a03260f45..3e179019ca7c 100644
>> --- a/drivers/nvme/target/nvmet.h
>> +++ b/drivers/nvme/target/nvmet.h
>> @@ -184,6 +184,11 @@ static inline u8
>> nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *por
>> return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
>> }
>> +static inline bool nvmet_port_secure_channel_required(struct
>> nvmet_port *port)
>> +{
>> + return nvmet_port_disc_addr_treq_secure_channel(port) ==
>> NVMF_TREQ_REQUIRED;
>> +}
>> +
>> struct nvmet_ctrl {
>> struct nvmet_subsys *subsys;
>> struct nvmet_sq **sqs;
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index 67fffa2e1e4a..5c1518a8bded 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -1730,6 +1730,54 @@ static int nvmet_tcp_set_queue_sock(struct
>> nvmet_tcp_queue *queue)
>> }
>> #ifdef CONFIG_NVME_TARGET_TCP_TLS
>
> You need a stub for the other side of the ifdef?
No. The new function is completely encapsulated by
CONFIG_NVME_TARGET_TCP_TLS, so no need to add a stub.
Cheers,
Hannes
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS
2023-09-12 11:48 ` Hannes Reinecke
@ 2023-09-12 12:55 ` Sagi Grimberg
0 siblings, 0 replies; 40+ messages in thread
From: Sagi Grimberg @ 2023-09-12 12:55 UTC (permalink / raw)
To: Hannes Reinecke, Christoph Hellwig
Cc: Keith Busch, linux-nvme, Jakub Kicinski, Eric Dumazet,
Paolo Abeni, netdev
On 9/12/23 14:48, Hannes Reinecke wrote:
> On 9/12/23 13:32, Sagi Grimberg wrote:
>>
>>
>> On 8/24/23 17:39, Hannes Reinecke wrote:
>>> Incoming connection might be either 'normal' NVMe-TCP connections
>>> starting with icreq or TLS handshakes. To ensure that 'normal'
>>> connections can still be handled we need to peek the first packet
>>> and only start TLS handshake if it's not an icreq.
>>> With that we can lift the restriction to always set TREQ to
>>> 'required' when TLS1.3 is enabled.
>>>
>>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>>> ---
>>> drivers/nvme/target/configfs.c | 25 +++++++++++---
>>> drivers/nvme/target/nvmet.h | 5 +++
>>> drivers/nvme/target/tcp.c | 61 +++++++++++++++++++++++++++++++---
>>> 3 files changed, 82 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/nvme/target/configfs.c
>>> b/drivers/nvme/target/configfs.c
>>> index b780ce049163..9eed6e6765ea 100644
>>> --- a/drivers/nvme/target/configfs.c
>>> +++ b/drivers/nvme/target/configfs.c
>>> @@ -198,6 +198,20 @@ static ssize_t nvmet_addr_treq_store(struct
>>> config_item *item,
>>> return -EINVAL;
>>> found:
>>> + if (port->disc_addr.trtype == NVMF_TRTYPE_TCP &&
>>> + port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) {
>>> + switch (nvmet_addr_treq[i].type) {
>>> + case NVMF_TREQ_NOT_SPECIFIED:
>>> + pr_debug("treq '%s' not allowed for TLS1.3\n",
>>> + nvmet_addr_treq[i].name);
>>> + return -EINVAL;
>>> + case NVMF_TREQ_NOT_REQUIRED:
>>> + pr_warn("Allow non-TLS connections while TLS1.3 is
>>> enabled\n");
>>> + break;
>>> + default:
>>> + break;
>>> + }
>>> + }
>>> treq |= nvmet_addr_treq[i].type;
>>> port->disc_addr.treq = treq;
>>> return count;
>>> @@ -410,12 +424,15 @@ static ssize_t nvmet_addr_tsas_store(struct
>>> config_item *item,
>>> nvmet_port_init_tsas_tcp(port, sectype);
>>> /*
>>> - * The TLS implementation currently does not support
>>> - * secure concatenation, so TREQ is always set to 'required'
>>> - * if TLS is enabled.
>>> + * If TLS is enabled TREQ should be set to 'required' per default
>>> */
>>> if (sectype == NVMF_TCP_SECTYPE_TLS13) {
>>> - treq |= NVMF_TREQ_REQUIRED;
>>> + u8 sc = nvmet_port_disc_addr_treq_secure_channel(port);
>>> +
>>> + if (sc == NVMF_TREQ_NOT_SPECIFIED)
>>> + treq |= NVMF_TREQ_REQUIRED;
>>> + else
>>> + treq |= sc;
>>> } else {
>>> treq |= NVMF_TREQ_NOT_SPECIFIED;
>>> }
>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>>> index e35a03260f45..3e179019ca7c 100644
>>> --- a/drivers/nvme/target/nvmet.h
>>> +++ b/drivers/nvme/target/nvmet.h
>>> @@ -184,6 +184,11 @@ static inline u8
>>> nvmet_port_disc_addr_treq_secure_channel(struct nvmet_port *por
>>> return (port->disc_addr.treq & NVME_TREQ_SECURE_CHANNEL_MASK);
>>> }
>>> +static inline bool nvmet_port_secure_channel_required(struct
>>> nvmet_port *port)
>>> +{
>>> + return nvmet_port_disc_addr_treq_secure_channel(port) ==
>>> NVMF_TREQ_REQUIRED;
>>> +}
>>> +
>>> struct nvmet_ctrl {
>>> struct nvmet_subsys *subsys;
>>> struct nvmet_sq **sqs;
>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>> index 67fffa2e1e4a..5c1518a8bded 100644
>>> --- a/drivers/nvme/target/tcp.c
>>> +++ b/drivers/nvme/target/tcp.c
>>> @@ -1730,6 +1730,54 @@ static int nvmet_tcp_set_queue_sock(struct
>>> nvmet_tcp_queue *queue)
>>> }
>>> #ifdef CONFIG_NVME_TARGET_TCP_TLS
>>
>> You need a stub for the other side of the ifdef?
>
> No. The new function is completely encapsulated by
> CONFIG_NVME_TARGET_TCP_TLS, so no need to add a stub.
Ok.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (17 preceding siblings ...)
2023-08-24 14:39 ` [PATCH 18/18] nvmet-tcp: peek icreq before starting TLS Hannes Reinecke
@ 2023-09-06 8:48 ` Pawel Baldysiak
2023-09-06 10:04 ` Hannes Reinecke
2023-09-12 11:34 ` Sagi Grimberg
19 siblings, 1 reply; 40+ messages in thread
From: Pawel Baldysiak @ 2023-09-06 8:48 UTC (permalink / raw)
To: hare; +Cc: linux-nvme
Hi Hannes,
I'm testing v15 patchset in extensive way and observing that it is not stable.
I'm running it on two connected machines - one is used as a host, second as target.
Once connected I'm running I/Os with dd.
Eventually I got I/O timeouts from nvme layer on host,
causing connect/disconnect loops and I/O hung.
Does not matter if the workload is reads or writes.
Second test is connect/disconnect loops.
On disconnect I've got ocasional delays due to nvme commands timeouts.
The disconnect happens eventually, but first it need to hit timeout.
It is not happening if TLS is disable.
Are you performing similar tests on you setup? Are you observing the same?
Thanks
Pawel Baldysiak
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-06 8:48 ` [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Pawel Baldysiak
@ 2023-09-06 10:04 ` Hannes Reinecke
2023-09-11 12:09 ` Hannes Reinecke
0 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-09-06 10:04 UTC (permalink / raw)
To: Pawel Baldysiak; +Cc: linux-nvme
On 9/6/23 10:48, Pawel Baldysiak wrote:
> Hi Hannes,
>
> I'm testing v15 patchset in extensive way and observing that it is not stable.
> I'm running it on two connected machines - one is used as a host, second as target.
> Once connected I'm running I/Os with dd.
> Eventually I got I/O timeouts from nvme layer on host,
> causing connect/disconnect loops and I/O hung.
> Does not matter if the workload is reads or writes.
>
Do you have the kernel message log here?
It _might_ be that we're overloading the TLS stack, such that processing
of individual packets is taking too long and triggering a KATO.
> Second test is connect/disconnect loops.
> On disconnect I've got ocasional delays due to nvme commands timeouts.
> The disconnect happens eventually, but first it need to hit timeout.
>
Again, do you have message logs?
> It is not happening if TLS is disable.
>
> Are you performing similar tests on you setup? Are you observing the same?
>
Yes, but probably not as exhaustive as your tests.
Need to create the testcases for blktests, now that the cleanup from
Daniel is nearing completion.
Cheers,
Hannes
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-06 10:04 ` Hannes Reinecke
@ 2023-09-11 12:09 ` Hannes Reinecke
2023-09-14 18:10 ` Sagi Grimberg
0 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-09-11 12:09 UTC (permalink / raw)
To: Pawel Baldysiak; +Cc: linux-nvme
On 9/6/23 12:04, Hannes Reinecke wrote:
> On 9/6/23 10:48, Pawel Baldysiak wrote:
>> Hi Hannes,
>>
>> I'm testing v15 patchset in extensive way and observing that it is not
>> stable.
>> I'm running it on two connected machines - one is used as a host,
>> second as target.
>> Once connected I'm running I/Os with dd.
>> Eventually I got I/O timeouts from nvme layer on host,
>> causing connect/disconnect loops and I/O hung.
>> Does not matter if the workload is reads or writes.
>>
> Do you have the kernel message log here?
> It _might_ be that we're overloading the TLS stack, such that processing
> of individual packets is taking too long and triggering a KATO.
>
>> Second test is connect/disconnect loops.
>> On disconnect I've got ocasional delays due to nvme commands timeouts.
>> The disconnect happens eventually, but first it need to hit timeout.
>>
> Again, do you have message logs?
>
Managed to validate it with a larger setup.
Message log is:
nvme nvme0: mapped 16/0/0 default/read/poll queues.
nvme nvme0: new ctrl: NQN "nqn.exuma", addr 10.168.136.124:4420
nvme nvme0: Removing ctrl: NQN "nqn.exuma"
nvme nvme0: queue 1: timeout cid 0x3071 type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999927, 9 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x2078 type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999872, 4 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x2079 type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999877, 1 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x207a type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999879, 2 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x207b type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999882, 4 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x207c type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999887, 21 blocks, Host Aborted Command
(sct 0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x207d type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999909, 6 blocks, Host Aborted Command (sct
0x3 / sc 0x71)
nvme nvme0: queue 1: timeout cid 0x207e type 4 opcode 0x2 (Read)
nvme0c0n1: Read(0x2) @ LBA 15999916, 10 blocks, Host Aborted Command
(sct 0x3 / sc 0x71)
block nvme0n1: no available path - failing I/O
Buffer I/O error on dev nvme0n1, logical block 15999872, async page read
nvme nvme0: failed to send request -32
nvme nvme0: Property Set error: 880, offset 0x14
Which actually points to an issue in the teardown flow.
nvme_do_delete_ctrl() does this:
nvme_stop_ctrl(ctrl);
nvme_remove_namespaces(ctrl);
ctrl->ops->delete_ctrl(ctrl);
and ->delete_ctrl()
->nvme_tcp_delete_ctrl()
->nvme_tcp_teardown_ctrl()
->nvme_tcp_teardown_io_queues()
has:
nvme_quiesce_admin_queue(ctrl);
nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
Now, if you look at 'nvme_sync_io_queues()' you'll find
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
blk_sync_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem);
but we've removed all namespaces right at the start in
nvme_do_delete_ctr(), so this is a no-op and we'll never
catch any I/O in flight causing the timeouts to occur :-(
Cheers,
Hannes
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-11 12:09 ` Hannes Reinecke
@ 2023-09-14 18:10 ` Sagi Grimberg
2023-09-18 12:15 ` Hannes Reinecke
0 siblings, 1 reply; 40+ messages in thread
From: Sagi Grimberg @ 2023-09-14 18:10 UTC (permalink / raw)
To: Hannes Reinecke, Pawel Baldysiak; +Cc: linux-nvme
On 9/11/23 15:09, Hannes Reinecke wrote:
> On 9/6/23 12:04, Hannes Reinecke wrote:
>> On 9/6/23 10:48, Pawel Baldysiak wrote:
>>> Hi Hannes,
>>>
>>> I'm testing v15 patchset in extensive way and observing that it is
>>> not stable.
>>> I'm running it on two connected machines - one is used as a host,
>>> second as target.
>>> Once connected I'm running I/Os with dd.
>>> Eventually I got I/O timeouts from nvme layer on host,
>>> causing connect/disconnect loops and I/O hung.
>>> Does not matter if the workload is reads or writes.
>>>
>> Do you have the kernel message log here?
>> It _might_ be that we're overloading the TLS stack, such that
>> processing of individual packets is taking too long and triggering a
>> KATO.
>>
>>> Second test is connect/disconnect loops.
>>> On disconnect I've got ocasional delays due to nvme commands timeouts.
>>> The disconnect happens eventually, but first it need to hit timeout.
>>>
>> Again, do you have message logs?
>>
> Managed to validate it with a larger setup.
> Message log is:
> nvme nvme0: mapped 16/0/0 default/read/poll queues.
> nvme nvme0: new ctrl: NQN "nqn.exuma", addr 10.168.136.124:4420
> nvme nvme0: Removing ctrl: NQN "nqn.exuma"
> nvme nvme0: queue 1: timeout cid 0x3071 type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999927, 9 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x2078 type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999872, 4 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x2079 type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999877, 1 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x207a type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999879, 2 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x207b type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999882, 4 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x207c type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999887, 21 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x207d type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999909, 6 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> nvme nvme0: queue 1: timeout cid 0x207e type 4 opcode 0x2 (Read)
> nvme0c0n1: Read(0x2) @ LBA 15999916, 10 blocks, Host Aborted Command
> (sct 0x3 / sc 0x71)
> block nvme0n1: no available path - failing I/O
> Buffer I/O error on dev nvme0n1, logical block 15999872, async page read
> nvme nvme0: failed to send request -32
> nvme nvme0: Property Set error: 880, offset 0x14
Can you add timestamps to the log trace?
>
> Which actually points to an issue in the teardown flow.
> nvme_do_delete_ctrl() does this:
>
> nvme_stop_ctrl(ctrl);
> nvme_remove_namespaces(ctrl);
> ctrl->ops->delete_ctrl(ctrl);
>
> and ->delete_ctrl()
> ->nvme_tcp_delete_ctrl()
> ->nvme_tcp_teardown_ctrl()
> ->nvme_tcp_teardown_io_queues()
>
> has:
> nvme_quiesce_admin_queue(ctrl);
> nvme_quiesce_io_queues(ctrl);
> nvme_sync_io_queues(ctrl);
> nvme_tcp_stop_io_queues(ctrl);
>
> Now, if you look at 'nvme_sync_io_queues()' you'll find
>
> down_read(&ctrl->namespaces_rwsem);
> list_for_each_entry(ns, &ctrl->namespaces, list)
> blk_sync_queue(ns->queue);
> up_read(&ctrl->namespaces_rwsem);
>
> but we've removed all namespaces right at the start in
> nvme_do_delete_ctr(), so this is a no-op and we'll never
> catch any I/O in flight causing the timeouts to occur :-(
Hannes, its not clear to me what you're saying here. What is different
in TLS that makes the teardown go that slow?
Also, what is being blocked here? Are there still I/Os to an already
deleted namespace?
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-14 18:10 ` Sagi Grimberg
@ 2023-09-18 12:15 ` Hannes Reinecke
2023-09-18 12:42 ` Sagi Grimberg
0 siblings, 1 reply; 40+ messages in thread
From: Hannes Reinecke @ 2023-09-18 12:15 UTC (permalink / raw)
To: Sagi Grimberg, Pawel Baldysiak; +Cc: linux-nvme
On 9/14/23 20:10, Sagi Grimberg wrote:
>
>
> On 9/11/23 15:09, Hannes Reinecke wrote:
>> On 9/6/23 12:04, Hannes Reinecke wrote:
>>> On 9/6/23 10:48, Pawel Baldysiak wrote:
>>>> Hi Hannes,
>>>>
>>>> I'm testing v15 patchset in extensive way and observing that it is
>>>> not stable.
>>>> I'm running it on two connected machines - one is used as a host,
>>>> second as target.
>>>> Once connected I'm running I/Os with dd.
>>>> Eventually I got I/O timeouts from nvme layer on host,
>>>> causing connect/disconnect loops and I/O hung.
>>>> Does not matter if the workload is reads or writes.
>>>>
>>> Do you have the kernel message log here?
>>> It _might_ be that we're overloading the TLS stack, such that
>>> processing of individual packets is taking too long and triggering a
>>> KATO.
>>>
>>>> Second test is connect/disconnect loops.
>>>> On disconnect I've got ocasional delays due to nvme commands timeouts.
>>>> The disconnect happens eventually, but first it need to hit timeout.
>>>>
>>> Again, do you have message logs?
>>>
>> Managed to validate it with a larger setup.
>> Message log is:
>> nvme nvme0: mapped 16/0/0 default/read/poll queues.
>> nvme nvme0: new ctrl: NQN "nqn.exuma", addr 10.168.136.124:4420
>> nvme nvme0: Removing ctrl: NQN "nqn.exuma"
>> nvme nvme0: queue 1: timeout cid 0x3071 type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999927, 9 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x2078 type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999872, 4 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x2079 type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999877, 1 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x207a type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999879, 2 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x207b type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999882, 4 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x207c type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999887, 21 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x207d type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999909, 6 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> nvme nvme0: queue 1: timeout cid 0x207e type 4 opcode 0x2 (Read)
>> nvme0c0n1: Read(0x2) @ LBA 15999916, 10 blocks, Host Aborted Command
>> (sct 0x3 / sc 0x71)
>> block nvme0n1: no available path - failing I/O
>> Buffer I/O error on dev nvme0n1, logical block 15999872, async page read
>> nvme nvme0: failed to send request -32
>> nvme nvme0: Property Set error: 880, offset 0x14
>
> Can you add timestamps to the log trace?
>
>>
>> Which actually points to an issue in the teardown flow.
>> nvme_do_delete_ctrl() does this:
>>
>> nvme_stop_ctrl(ctrl);
>> nvme_remove_namespaces(ctrl);
>> ctrl->ops->delete_ctrl(ctrl);
>>
>> and ->delete_ctrl()
>> ->nvme_tcp_delete_ctrl()
>> ->nvme_tcp_teardown_ctrl()
>> ->nvme_tcp_teardown_io_queues()
>>
>> has:
>> nvme_quiesce_admin_queue(ctrl);
>> nvme_quiesce_io_queues(ctrl);
>> nvme_sync_io_queues(ctrl);
>> nvme_tcp_stop_io_queues(ctrl);
>>
>> Now, if you look at 'nvme_sync_io_queues()' you'll find
>>
>> down_read(&ctrl->namespaces_rwsem);
>> list_for_each_entry(ns, &ctrl->namespaces, list)
>> blk_sync_queue(ns->queue);
>> up_read(&ctrl->namespaces_rwsem);
>>
>> but we've removed all namespaces right at the start in
>> nvme_do_delete_ctr(), so this is a no-op and we'll never
>> catch any I/O in flight causing the timeouts to occur :-(
>
> Hannes, its not clear to me what you're saying here. What is different
> in TLS that makes the teardown go that slow?
>
> Also, what is being blocked here? Are there still I/Os to an already
> deleted namespace?
Hmm. Seems to be an interrelated issue. Problem is that TLS sessions
occasionally time out right after initiating the connection, and it'll
take some reconnect attempts to get things sorts.
Still debugging; seems like an issue with the network stack.
And while reconnection is underway of course the I/O is aborted, but
depending on the timing namespace scanning might be underway, so we're
blocked in 'flush_work(&ctrl->scan_work)' in nvme_remove_namespaces()
and have to wait for command timeout on namespace identify et al.
So that is probably expected, and I'll be concentrating on figuring out
why we're seeing reconnections.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-18 12:15 ` Hannes Reinecke
@ 2023-09-18 12:42 ` Sagi Grimberg
0 siblings, 0 replies; 40+ messages in thread
From: Sagi Grimberg @ 2023-09-18 12:42 UTC (permalink / raw)
To: Hannes Reinecke, Pawel Baldysiak; +Cc: linux-nvme
On 9/18/23 15:15, Hannes Reinecke wrote:
> On 9/14/23 20:10, Sagi Grimberg wrote:
>>
>>
>> On 9/11/23 15:09, Hannes Reinecke wrote:
>>> On 9/6/23 12:04, Hannes Reinecke wrote:
>>>> On 9/6/23 10:48, Pawel Baldysiak wrote:
>>>>> Hi Hannes,
>>>>>
>>>>> I'm testing v15 patchset in extensive way and observing that it is
>>>>> not stable.
>>>>> I'm running it on two connected machines - one is used as a host,
>>>>> second as target.
>>>>> Once connected I'm running I/Os with dd.
>>>>> Eventually I got I/O timeouts from nvme layer on host,
>>>>> causing connect/disconnect loops and I/O hung.
>>>>> Does not matter if the workload is reads or writes.
>>>>>
>>>> Do you have the kernel message log here?
>>>> It _might_ be that we're overloading the TLS stack, such that
>>>> processing of individual packets is taking too long and triggering a
>>>> KATO.
>>>>
>>>>> Second test is connect/disconnect loops.
>>>>> On disconnect I've got ocasional delays due to nvme commands timeouts.
>>>>> The disconnect happens eventually, but first it need to hit timeout.
>>>>>
>>>> Again, do you have message logs?
>>>>
>>> Managed to validate it with a larger setup.
>>> Message log is:
>>> nvme nvme0: mapped 16/0/0 default/read/poll queues.
>>> nvme nvme0: new ctrl: NQN "nqn.exuma", addr 10.168.136.124:4420
>>> nvme nvme0: Removing ctrl: NQN "nqn.exuma"
>>> nvme nvme0: queue 1: timeout cid 0x3071 type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999927, 9 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x2078 type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999872, 4 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x2079 type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999877, 1 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x207a type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999879, 2 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x207b type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999882, 4 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x207c type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999887, 21 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x207d type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999909, 6 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> nvme nvme0: queue 1: timeout cid 0x207e type 4 opcode 0x2 (Read)
>>> nvme0c0n1: Read(0x2) @ LBA 15999916, 10 blocks, Host Aborted Command
>>> (sct 0x3 / sc 0x71)
>>> block nvme0n1: no available path - failing I/O
>>> Buffer I/O error on dev nvme0n1, logical block 15999872, async page read
>>> nvme nvme0: failed to send request -32
>>> nvme nvme0: Property Set error: 880, offset 0x14
>>
>> Can you add timestamps to the log trace?
>>
>>>
>>> Which actually points to an issue in the teardown flow.
>>> nvme_do_delete_ctrl() does this:
>>>
>>> nvme_stop_ctrl(ctrl);
>>> nvme_remove_namespaces(ctrl);
>>> ctrl->ops->delete_ctrl(ctrl);
>>>
>>> and ->delete_ctrl()
>>> ->nvme_tcp_delete_ctrl()
>>> ->nvme_tcp_teardown_ctrl()
>>> ->nvme_tcp_teardown_io_queues()
>>>
>>> has:
>>> nvme_quiesce_admin_queue(ctrl);
>>> nvme_quiesce_io_queues(ctrl);
>>> nvme_sync_io_queues(ctrl);
>>> nvme_tcp_stop_io_queues(ctrl);
>>>
>>> Now, if you look at 'nvme_sync_io_queues()' you'll find
>>>
>>> down_read(&ctrl->namespaces_rwsem);
>>> list_for_each_entry(ns, &ctrl->namespaces, list)
>>> blk_sync_queue(ns->queue);
>>> up_read(&ctrl->namespaces_rwsem);
>>>
>>> but we've removed all namespaces right at the start in
>>> nvme_do_delete_ctr(), so this is a no-op and we'll never
>>> catch any I/O in flight causing the timeouts to occur :-(
>>
>> Hannes, its not clear to me what you're saying here. What is different
>> in TLS that makes the teardown go that slow?
>>
>> Also, what is being blocked here? Are there still I/Os to an already
>> deleted namespace?
>
> Hmm. Seems to be an interrelated issue. Problem is that TLS sessions
> occasionally time out right after initiating the connection, and it'll
> take some reconnect attempts to get things sorts.
Sounds like the trigger to the problem. If there is some I/O stalls
that would explain why there are resets due to either an I/O or
the keep-alive timing out.
> Still debugging; seems like an issue with the network stack.
Interesting, please do share when you have some further insights.
> And while reconnection is underway of course the I/O is aborted, but
> depending on the timing namespace scanning might be underway, so we're
> blocked in 'flush_work(&ctrl->scan_work)' in nvme_remove_namespaces()
> and have to wait for command timeout on namespace identify et al.
> So that is probably expected,
The scan_work is fine to block if there is an ongoing admin command
that needs to timeout first. However this should be bound to the admin
timeout, not forever. After that any reconnection attempts should not
continue as the ctrl state is DELETING.
Is this a permanent phenomenon? or just stuck and gets released at some
point?
BTW, in your tests you are triggering a disconnect (delete), was that
step included as part of the report from Pawel?
> and I'll be concentrating on figuring out why we're seeing reconnections.
Sure, it'll also be good to align that you are doing the same tests.
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-08-24 14:39 [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Hannes Reinecke
` (18 preceding siblings ...)
2023-09-06 8:48 ` [PATCHv11 00/18] nvme: In-kernel TLS support for TCP Pawel Baldysiak
@ 2023-09-12 11:34 ` Sagi Grimberg
2023-09-12 11:46 ` Hannes Reinecke
19 siblings, 1 reply; 40+ messages in thread
From: Sagi Grimberg @ 2023-09-12 11:34 UTC (permalink / raw)
To: Hannes Reinecke, Christoph Hellwig
Cc: Keith Busch, linux-nvme, Jakub Kicinski, Eric Dumazet,
Paolo Abeni, netdev
> Hi all,
Hannes, I think that this is in decent shape. Assuming that
the recent reports on the tls tests are resolved, I think this
is ready for inclusion.
I also want to give it some time on the nvme tree.
>
> with the merge of Chuck Levers handshake upcall mechanism and
> my tls_read_sock() implementation finally merged with net-next
> it's now time to restart on the actual issue, namely implementing
> in-kernel TLS support for nvme-tcp.
>
> The patchset is based on the recent net-next git tree;
> everything after commit ba4a734e1aa0 ("net/tls: avoid TCP window
> full during ->read_sock()") should work.
> You might want to add the patch
> 'nvmet-tcp: use 'spin_lock_bh' for state_lock()'
> before applying this patchset; otherwise results might be ...
> interesting.
>
> It also requires the 'tlshd' userspace daemon
> (https://github.com/oracle/ktls-utils)
> for the actual TLS handshake.
> Changes for nvme-cli are already included in the upstream repository.
>
> Theory of operation:
> A dedicated '.nvme' keyring is created to hold the pre-shared keys (PSKs)
> for the TLS handshake. Keys will have to be provisioned before TLS handshake
> is attempted; that can be done with the 'nvme gen-tls-key' command for nvme-cli
> (patches are already merged upstream).
> After connection to the remote TCP port the client side will use the
> 'best' PSK (as inferred from the NVMe TCP spec) or the PSK specified
> by the '--tls_key' option to nvme-cli and call the TLS userspace daemon
> to initiate a TLS handshake.
> The server side will then invoke the TLS userspace daemon to run the TLS
> handshake.
> If the TLS handshake succeeds the userspace daemon will be activating
> kTLS on the socket, and control is passed back to the kernel.
>
> This implementation currently does not implement the so-called
> 'secure concatenation' mode from NVMe-TCP; a TPAR is still pending
> with fixes for it, so I'll wait until it's published before
> posting patches for that.
>
> Patchset can be found at:
> git.kernel.org/pub/scm/linux/kernel/git/hare/nvme.git
> branch tls.v16
>
> For testing I'm using this script, running on a nvme target
> with NQN 'nqn.test' and using 127.0.0.1 as a port; the port
> has to set 'addr_tsas' to 'tls1.3':
>
> modprobe nvmet-tcp
> nvmetcli restore
> modprobe nvme-tcp
> ./nvme gen-tls-key --subsysnqn=nqn.test -i
> ./nvme gen-tls-key --subsysnqn=nqn.2014-08.org.nvmexpress.discovery -i
> tlshd -c /etc/tlshd.conf
>
> and then one can do a simple:
> # nvme connect -t tcp -a 127.0.0.1 -s 4420 -n nqn.test --tls
> to start the connection.
>
> As usual, comments and reviews are welcome.
>
> Changes to v10:
> - Include reviews from Sagi
>
> Changes to v9:
> - Close race between done() and timeout()
> - Add logging message for icreq/icresp failure
> - Sparse fixes
> - Restrict TREQ setting to 'not required' or 'required'
> when TLS is enabled
>
> Changes to v8:
> - Simplify reference counting as suggested by Sagi
> - Implement nvmf_parse_key() to simplify options parsing
> - Add patch to peek at icreq to figure out whether TLS
> should be started
>
> Changes to v7:
> - Include reviews from Simon
> - Do not call sock_release() when sock_alloc_file() fails
>
> Changes to v6:
> - More reviews from Sagi
> - Fixup non-tls connections
> - Fixup nvme options handling
> - Add reference counting to nvmet-tcp queues
>
> Changes to v5:
> - Include reviews from Sagi
> - Split off nvmet tsas/treq handling
> - Sanitize sock_file handling
>
> Changes to v4:
> - Split off network patches into a separate
> patchset
> - Handle TLS Alert notifications
>
> Changes to v3:
> - Really handle MSG_EOR for TLS
> - Fixup MSG_SENDPAGE_NOTLAST handling
> - Conditionally disable fabric option
>
> Changes to v2:
> - Included reviews from Sagi
> - Removed MSG_SENDPAGE_NOTLAST
> - Improved MSG_EOR handling for TLS
> - Add config options NVME_TCP_TLS
> and NVME_TARGET_TCP_TLS
>
> Changes to the original RFC:
> - Add a CONFIG_NVME_TLS config option
> - Use a single PSK for the TLS handshake
> - Make TLS connections mandatory
> - Do not peek messages for the server
> - Simplify data_ready callback
> - Implement read_sock() for TLS
>
> Hannes Reinecke (18):
> nvme-keyring: register '.nvme' keyring
> nvme-keyring: define a 'psk' keytype
> nvme: add TCP TSAS definitions
> nvme-tcp: add definitions for TLS cipher suites
> nvme-keyring: implement nvme_tls_psk_default()
> security/keys: export key_lookup()
> nvme-tcp: allocate socket file
> nvme-tcp: enable TLS handshake upcall
> nvme-tcp: control message handling for recvmsg()
> nvme-tcp: improve icreq/icresp logging
> nvme-fabrics: parse options 'keyring' and 'tls_key'
> nvmet: make TCP sectype settable via configfs
> nvmet-tcp: make nvmet_tcp_alloc_queue() a void function
> nvmet-tcp: allocate socket file
> nvmet: Set 'TREQ' to 'required' when TLS is enabled
> nvmet-tcp: enable TLS handshake upcall
> nvmet-tcp: control messages for recvmsg()
> nvmet-tcp: peek icreq before starting TLS
>
> drivers/nvme/common/Kconfig | 4 +
> drivers/nvme/common/Makefile | 3 +-
> drivers/nvme/common/keyring.c | 182 ++++++++++++++++++
> drivers/nvme/host/Kconfig | 15 ++
> drivers/nvme/host/core.c | 12 +-
> drivers/nvme/host/fabrics.c | 67 ++++++-
> drivers/nvme/host/fabrics.h | 9 +
> drivers/nvme/host/nvme.h | 1 +
> drivers/nvme/host/sysfs.c | 21 +++
> drivers/nvme/host/tcp.c | 184 ++++++++++++++++--
> drivers/nvme/target/Kconfig | 15 ++
> drivers/nvme/target/configfs.c | 128 ++++++++++++-
> drivers/nvme/target/nvmet.h | 11 ++
> drivers/nvme/target/tcp.c | 334 ++++++++++++++++++++++++++++++---
> include/linux/key.h | 1 +
> include/linux/nvme-keyring.h | 36 ++++
> include/linux/nvme-tcp.h | 6 +
> include/linux/nvme.h | 10 +
> security/keys/key.c | 1 +
> 19 files changed, 991 insertions(+), 49 deletions(-)
> create mode 100644 drivers/nvme/common/keyring.c
> create mode 100644 include/linux/nvme-keyring.h
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCHv11 00/18] nvme: In-kernel TLS support for TCP
2023-09-12 11:34 ` Sagi Grimberg
@ 2023-09-12 11:46 ` Hannes Reinecke
0 siblings, 0 replies; 40+ messages in thread
From: Hannes Reinecke @ 2023-09-12 11:46 UTC (permalink / raw)
To: Sagi Grimberg, Christoph Hellwig
Cc: Keith Busch, linux-nvme, Jakub Kicinski, Eric Dumazet,
Paolo Abeni, netdev
On 9/12/23 13:34, Sagi Grimberg wrote:
>
>> Hi all,
>
> Hannes, I think that this is in decent shape. Assuming that
> the recent reports on the tls tests are resolved, I think this
> is ready for inclusion.
>
Thanks for that.
> I also want to give it some time on the nvme tree.
>
I guess that will be a good idea.
I had a look at the report for I/O timeouts with TLS, and currently
it seems not to be TLS specific but rather generic issues with the NVMe
stack, just being visible now due to delayed timing.
Cheers,
Hannes
^ permalink raw reply [flat|nested] 40+ messages in thread