linux-cifs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [SMB3][PATCHes] parallelizing decryption of large read responses
@ 2019-09-09  4:31 Steve French
  2019-09-09  4:34 ` Steve French
  0 siblings, 1 reply; 5+ messages in thread
From: Steve French @ 2019-09-09  4:31 UTC (permalink / raw)
  To: CIFS

[-- Attachment #1: Type: text/plain, Size: 417 bytes --]

I am seeing very good performance benefit from offload of decryption
of encrypted SMB3 read responses to a pool of worker threads
(optionally).  See attached patches.

I plan to add another patch to only offload when number of requests in
flight is > 1 (since there is no point offloading and doing a thread
switch if no other responses would overlap in the cifsd thread reading
from the socket).

-- 
Thanks,

Steve

[-- Attachment #2: 0002-smb3-enable-offload-of-decryption-of-large-reads-via.patch --]
[-- Type: text/x-patch, Size: 4590 bytes --]

From e5de72d8b6920fd6397512c7fed59780fa273ada Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 8 Sep 2019 23:22:02 -0500
Subject: [PATCH 2/2] smb3: enable offload of decryption of large reads via
 mount option

Disable offload of the decryption of encrypted read responses
by default (equivalent to setting this new mount option "esize=0").

Allow setting the minimum encrypted read response size that we
will choose to offload to a worker thread - it is now configurable
via on a new mount option "esize="

Depending on which encryption mechanism (GCM vs. CCM) and
the number of reads that will be issued in parallel and the
performance of the network and CPU on the client, it may make
sense to enable this since it can provide substantial benefit when
multiple large reads are in flight at the same time.

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c   |  2 ++
 fs/cifs/cifsglob.h |  2 ++
 fs/cifs/connect.c  | 13 +++++++++++++
 fs/cifs/smb2ops.c  |  4 ++--
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b0ea332af35c..ebf85a5d95e4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -559,6 +559,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	seq_printf(s, ",rsize=%u", cifs_sb->rsize);
 	seq_printf(s, ",wsize=%u", cifs_sb->wsize);
 	seq_printf(s, ",bsize=%u", cifs_sb->bsize);
+	if (tcon->ses->server->min_offload)
+		seq_printf(s, ",esize=%u", tcon->ses->server->min_offload);
 	seq_printf(s, ",echo_interval=%lu",
 			tcon->ses->server->echo_interval / HZ);
 
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d66106ac031a..6987fbc5a24a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -592,6 +592,7 @@ struct smb_vol {
 	unsigned int bsize;
 	unsigned int rsize;
 	unsigned int wsize;
+	unsigned int min_offload;
 	bool sockopt_tcp_nodelay:1;
 	unsigned long actimeo; /* attribute cache timeout (jiffies) */
 	struct smb_version_operations *ops;
@@ -745,6 +746,7 @@ struct TCP_Server_Info {
 #endif /* STATS2 */
 	unsigned int	max_read;
 	unsigned int	max_write;
+	unsigned int	min_offload;
 	__le16	compress_algorithm;
 	__le16	cipher_type;
 	 /* save initital negprot hash */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17882cede197..403b5cebc152 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -103,6 +103,7 @@ enum {
 	Opt_backupuid, Opt_backupgid, Opt_uid,
 	Opt_cruid, Opt_gid, Opt_file_mode,
 	Opt_dirmode, Opt_port,
+	Opt_min_enc_offload,
 	Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo,
 	Opt_echo_interval, Opt_max_credits, Opt_handletimeout,
 	Opt_snapshot,
@@ -207,6 +208,7 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_dirmode, "dirmode=%s" },
 	{ Opt_dirmode, "dir_mode=%s" },
 	{ Opt_port, "port=%s" },
+	{ Opt_min_enc_offload, "esize=%s" },
 	{ Opt_blocksize, "bsize=%s" },
 	{ Opt_rsize, "rsize=%s" },
 	{ Opt_wsize, "wsize=%s" },
@@ -2016,6 +2018,13 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			}
 			port = (unsigned short)option;
 			break;
+		case Opt_min_enc_offload:
+			if (get_option_ul(args, &option)) {
+				cifs_dbg(VFS, "%s: Invalid minimum encrypted read offload size (esize)\n");
+				goto cifs_parse_mount_err;
+			}
+			vol->min_offload = option;
+			break;
 		case Opt_blocksize:
 			if (get_option_ul(args, &option)) {
 				cifs_dbg(VFS, "%s: Invalid blocksize value\n",
@@ -2616,6 +2625,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
 	if (server->ignore_signature != vol->ignore_signature)
 		return 0;
 
+	if (server->min_offload != vol->min_offload)
+		return 0;
+
 	return 1;
 }
 
@@ -2790,6 +2802,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		module_put(THIS_MODULE);
 		goto out_err_crypto_release;
 	}
+	tcp_ses->min_offload = volume_info->min_offload;
 	tcp_ses->tcpStatus = CifsNeedNegotiate;
 
 	tcp_ses->nr_targets = 1;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 64fe0d93c442..047066493832 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4125,8 +4125,8 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
 	 * use more cores decrypting which can be expensive
 	 */
 
-	/* TODO: make the size limit to enable decrypt offload configurable */
-	if (server->pdu_size > (512 * 1024)) {
+	if ((server->min_offload) &&
+	    (server->pdu_size >= server->min_offload)) {
 		dw = kmalloc(sizeof(struct smb2_decrypt_work), GFP_KERNEL);
 		if (dw == NULL)
 			goto non_offloaded_decrypt;
-- 
2.20.1


[-- Attachment #3: 0001-smb3-allow-parallelizing-decryption-of-reads.patch --]
[-- Type: text/x-patch, Size: 6365 bytes --]

From 03b90249fa35fae25401f5f06f33ce9f36834c86 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 7 Sep 2019 01:09:49 -0500
Subject: [PATCH 1/2] smb3: allow parallelizing decryption of reads

decrypting large reads on encrypted shares can be slow (e.g. adding
multiple milliseconds per-read on non-GCM capable servers or
when mounting with dialects prior to SMB3.1.1) - allow parallelizing
of read decryption by launching worker threads.

Testing to Samba on localhost showed 25% improvement.
Testing to remote server showed very large improvement when
doing more than one 'cp' command was called at one time.

Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c   | 17 ++++++++-
 fs/cifs/cifsglob.h |  1 +
 fs/cifs/smb2ops.c  | 87 ++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 101 insertions(+), 4 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index de90e665ef11..b0ea332af35c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -109,6 +109,7 @@ extern mempool_t *cifs_req_poolp;
 extern mempool_t *cifs_mid_poolp;
 
 struct workqueue_struct	*cifsiod_wq;
+struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*cifsoplockd_wq;
 __u32 cifs_lock_secret;
 
@@ -1499,11 +1500,22 @@ init_cifs(void)
 		goto out_clean_proc;
 	}
 
+	/*
+	 * BB Consider setting limit!=0 maybe to min(num_of_cores - 1, 3) so we
+	 * don't launch too many worker threads
+	 */
+	decrypt_wq = alloc_workqueue("smb3decryptd",
+				     WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	if (!decrypt_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cifsiod_wq;
+	}
+
 	cifsoplockd_wq = alloc_workqueue("cifsoplockd",
 					 WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
 	if (!cifsoplockd_wq) {
 		rc = -ENOMEM;
-		goto out_destroy_cifsiod_wq;
+		goto out_destroy_decrypt_wq;
 	}
 
 	rc = cifs_fscache_register();
@@ -1569,6 +1581,8 @@ init_cifs(void)
 	cifs_fscache_unregister();
 out_destroy_cifsoplockd_wq:
 	destroy_workqueue(cifsoplockd_wq);
+out_destroy_decrypt_wq:
+	destroy_workqueue(decrypt_wq);
 out_destroy_cifsiod_wq:
 	destroy_workqueue(cifsiod_wq);
 out_clean_proc:
@@ -1595,6 +1609,7 @@ exit_cifs(void)
 	cifs_destroy_inodecache();
 	cifs_fscache_unregister();
 	destroy_workqueue(cifsoplockd_wq);
+	destroy_workqueue(decrypt_wq);
 	destroy_workqueue(cifsiod_wq);
 	cifs_proc_clean();
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1f53dee211d8..d66106ac031a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1892,6 +1892,7 @@ void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
+extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
 extern __u32 cifs_lock_secret;
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 83b02d74d48e..64fe0d93c442 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4017,8 +4017,62 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 	return length;
 }
 
+struct smb2_decrypt_work {
+	struct work_struct decrypt;
+	struct TCP_Server_Info *server;
+	struct page **ppages;
+	char *buf;
+	unsigned int npages;
+	unsigned int len;
+};
+
+
+static void smb2_decrypt_offload(struct work_struct *work)
+{
+	struct smb2_decrypt_work *dw = container_of(work,
+				struct smb2_decrypt_work, decrypt);
+	int i, rc;
+	struct mid_q_entry *mid;
+
+	rc = decrypt_raw_data(dw->server, dw->buf, dw->server->vals->read_rsp_size,
+			      dw->ppages, dw->npages, dw->len);
+	if (rc) {
+		cifs_dbg(VFS, "error decrypting rc=%d\n", rc);
+		goto free_pages;
+	}
+
+	mid = smb2_find_mid(dw->server, dw->buf);
+	if (mid == NULL)
+		cifs_dbg(VFS, "mid not found\n"); /* BB FIXME? change to FYI? */
+	else {
+		mid->decrypted = true;
+		rc = handle_read_data(dw->server, mid, dw->buf,
+				      dw->server->vals->read_rsp_size,
+				      dw->ppages, dw->npages, dw->len);
+	}
+
+	dw->server->lstrp = jiffies;
+
+	mid->callback(mid);
+
+	cifs_mid_q_entry_release(mid);
+
+free_pages:
+	for (i = dw->npages-1; i >= 0; i--)
+		put_page(dw->ppages[i]);
+
+	kfree(dw->ppages);
+	cifs_small_buf_release(dw->buf);
+
+/* FIXME Do we need the equivalent of this? */
+/* discard_data:
+	cifs_discard_remaining_data(server); */
+}
+
+
 static int
-receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
+receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
+		       int *num_mids)
 {
 	char *buf = server->smallbuf;
 	struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
@@ -4028,7 +4082,9 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	unsigned int buflen = server->pdu_size;
 	int rc;
 	int i = 0;
+	struct smb2_decrypt_work *dw;
 
+	*num_mids = 1;
 	len = min_t(unsigned int, buflen, server->vals->read_rsp_size +
 		sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1;
 
@@ -4064,6 +4120,32 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid)
 	if (rc)
 		goto free_pages;
 
+	/*
+	 * For large reads, offload to different thread for better performance,
+	 * use more cores decrypting which can be expensive
+	 */
+
+	/* TODO: make the size limit to enable decrypt offload configurable */
+	if (server->pdu_size > (512 * 1024)) {
+		dw = kmalloc(sizeof(struct smb2_decrypt_work), GFP_KERNEL);
+		if (dw == NULL)
+			goto non_offloaded_decrypt;
+
+		dw->buf = server->smallbuf;
+		server->smallbuf = (char *)cifs_small_buf_get();
+
+		INIT_WORK(&dw->decrypt, smb2_decrypt_offload);
+
+		dw->npages = npages;
+		dw->server = server;
+		dw->ppages = pages;
+		dw->len = len;
+		queue_work(cifsiod_wq, &dw->decrypt);
+		*num_mids = 0; /* worker thread takes care of finding mid */
+		return -1;
+	}
+
+non_offloaded_decrypt:
 	rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size,
 			      pages, npages, len);
 	if (rc)
@@ -4210,8 +4292,7 @@ smb3_receive_transform(struct TCP_Server_Info *server,
 
 	/* TODO: add support for compounds containing READ. */
 	if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) {
-		*num_mids = 1;
-		return receive_encrypted_read(server, &mids[0]);
+		return receive_encrypted_read(server, &mids[0], num_mids);
 	}
 
 	return receive_encrypted_standard(server, mids, bufs, num_mids);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-09-10 21:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-09  4:31 [SMB3][PATCHes] parallelizing decryption of large read responses Steve French
2019-09-09  4:34 ` Steve French
2019-09-09 18:33   ` Pavel Shilovsky
2019-09-10  9:19   ` ronnie sahlberg
2019-09-10 21:10     ` Steve French

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).