From 03b90249fa35fae25401f5f06f33ce9f36834c86 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sat, 7 Sep 2019 01:09:49 -0500 Subject: [PATCH 1/2] smb3: allow parallelizing decryption of reads decrypting large reads on encrypted shares can be slow (e.g. adding multiple milliseconds per-read on non-GCM capable servers or when mounting with dialects prior to SMB3.1.1) - allow parallelizing of read decryption by launching worker threads. Testing to Samba on localhost showed 25% improvement. Testing to remote server showed very large improvement when doing more than one 'cp' command was called at one time. Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 17 ++++++++- fs/cifs/cifsglob.h | 1 + fs/cifs/smb2ops.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 101 insertions(+), 4 deletions(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index de90e665ef11..b0ea332af35c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -109,6 +109,7 @@ extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; struct workqueue_struct *cifsiod_wq; +struct workqueue_struct *decrypt_wq; struct workqueue_struct *cifsoplockd_wq; __u32 cifs_lock_secret; @@ -1499,11 +1500,22 @@ init_cifs(void) goto out_clean_proc; } + /* + * BB Consider setting limit!=0 maybe to min(num_of_cores - 1, 3) so we + * don't launch too many worker threads + */ + decrypt_wq = alloc_workqueue("smb3decryptd", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!decrypt_wq) { + rc = -ENOMEM; + goto out_destroy_cifsiod_wq; + } + cifsoplockd_wq = alloc_workqueue("cifsoplockd", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); if (!cifsoplockd_wq) { rc = -ENOMEM; - goto out_destroy_cifsiod_wq; + goto out_destroy_decrypt_wq; } rc = cifs_fscache_register(); @@ -1569,6 +1581,8 @@ init_cifs(void) cifs_fscache_unregister(); out_destroy_cifsoplockd_wq: destroy_workqueue(cifsoplockd_wq); +out_destroy_decrypt_wq: + destroy_workqueue(decrypt_wq); out_destroy_cifsiod_wq: destroy_workqueue(cifsiod_wq); out_clean_proc: @@ -1595,6 +1609,7 @@ exit_cifs(void) cifs_destroy_inodecache(); cifs_fscache_unregister(); destroy_workqueue(cifsoplockd_wq); + destroy_workqueue(decrypt_wq); destroy_workqueue(cifsiod_wq); cifs_proc_clean(); } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 1f53dee211d8..d66106ac031a 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1892,6 +1892,7 @@ void cifs_queue_oplock_break(struct cifsFileInfo *cfile); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; +extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *cifsoplockd_wq; extern __u32 cifs_lock_secret; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 83b02d74d48e..64fe0d93c442 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -4017,8 +4017,62 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, return length; } +struct smb2_decrypt_work { + struct work_struct decrypt; + struct TCP_Server_Info *server; + struct page **ppages; + char *buf; + unsigned int npages; + unsigned int len; +}; + + +static void smb2_decrypt_offload(struct work_struct *work) +{ + struct smb2_decrypt_work *dw = container_of(work, + struct smb2_decrypt_work, decrypt); + int i, rc; + struct mid_q_entry *mid; + + rc = decrypt_raw_data(dw->server, dw->buf, dw->server->vals->read_rsp_size, + dw->ppages, dw->npages, dw->len); + if (rc) { + cifs_dbg(VFS, "error decrypting rc=%d\n", rc); + goto free_pages; + } + + mid = smb2_find_mid(dw->server, dw->buf); + if (mid == NULL) + cifs_dbg(VFS, "mid not found\n"); /* BB FIXME? change to FYI? */ + else { + mid->decrypted = true; + rc = handle_read_data(dw->server, mid, dw->buf, + dw->server->vals->read_rsp_size, + dw->ppages, dw->npages, dw->len); + } + + dw->server->lstrp = jiffies; + + mid->callback(mid); + + cifs_mid_q_entry_release(mid); + +free_pages: + for (i = dw->npages-1; i >= 0; i--) + put_page(dw->ppages[i]); + + kfree(dw->ppages); + cifs_small_buf_release(dw->buf); + +/* FIXME Do we need the equivalent of this? */ +/* discard_data: + cifs_discard_remaining_data(server); */ +} + + static int -receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid) +receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid, + int *num_mids) { char *buf = server->smallbuf; struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf; @@ -4028,7 +4082,9 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid) unsigned int buflen = server->pdu_size; int rc; int i = 0; + struct smb2_decrypt_work *dw; + *num_mids = 1; len = min_t(unsigned int, buflen, server->vals->read_rsp_size + sizeof(struct smb2_transform_hdr)) - HEADER_SIZE(server) + 1; @@ -4064,6 +4120,32 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid) if (rc) goto free_pages; + /* + * For large reads, offload to different thread for better performance, + * use more cores decrypting which can be expensive + */ + + /* TODO: make the size limit to enable decrypt offload configurable */ + if (server->pdu_size > (512 * 1024)) { + dw = kmalloc(sizeof(struct smb2_decrypt_work), GFP_KERNEL); + if (dw == NULL) + goto non_offloaded_decrypt; + + dw->buf = server->smallbuf; + server->smallbuf = (char *)cifs_small_buf_get(); + + INIT_WORK(&dw->decrypt, smb2_decrypt_offload); + + dw->npages = npages; + dw->server = server; + dw->ppages = pages; + dw->len = len; + queue_work(cifsiod_wq, &dw->decrypt); + *num_mids = 0; /* worker thread takes care of finding mid */ + return -1; + } + +non_offloaded_decrypt: rc = decrypt_raw_data(server, buf, server->vals->read_rsp_size, pages, npages, len); if (rc) @@ -4210,8 +4292,7 @@ smb3_receive_transform(struct TCP_Server_Info *server, /* TODO: add support for compounds containing READ. */ if (pdu_length > CIFSMaxBufSize + MAX_HEADER_SIZE(server)) { - *num_mids = 1; - return receive_encrypted_read(server, &mids[0]); + return receive_encrypted_read(server, &mids[0], num_mids); } return receive_encrypted_standard(server, mids, bufs, num_mids); -- 2.20.1