* [PATCH] CIFS: Fix retry mid list corruption on reconnects
@ 2019-10-22 22:35 Pavel Shilovsky
2019-10-23 2:56 ` Steve French
0 siblings, 1 reply; 2+ messages in thread
From: Pavel Shilovsky @ 2019-10-22 22:35 UTC (permalink / raw)
To: Steve French, Ronnie Sahlberg, David Wysochanski
Cc: linux-cifs, Pavel Shilovskiy
When the client hits reconnect it iterates over the mid
pending queue marking entries for retry and moving them
to a temporary list to issue callbacks later without holding
GlobalMid_Lock. In the same time there is no guarantee that
mids can't be removed from the temporary list or even
freed completely by another thread. It may cause a temporary
list corruption:
[ 430.454897] list_del corruption. prev->next should be ffff98d3a8f316c0, but was 2e885cb266355469
[ 430.464668] ------------[ cut here ]------------
[ 430.466569] kernel BUG at lib/list_debug.c:51!
[ 430.468476] invalid opcode: 0000 [#1] SMP PTI
[ 430.470286] CPU: 0 PID: 13267 Comm: cifsd Kdump: loaded Not tainted 5.4.0-rc3+ #19
[ 430.473472] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 430.475872] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55
...
[ 430.510426] Call Trace:
[ 430.511500] cifs_reconnect+0x25e/0x610 [cifs]
[ 430.513350] cifs_readv_from_socket+0x220/0x250 [cifs]
[ 430.515464] cifs_read_from_socket+0x4a/0x70 [cifs]
[ 430.517452] ? try_to_wake_up+0x212/0x650
[ 430.519122] ? cifs_small_buf_get+0x16/0x30 [cifs]
[ 430.521086] ? allocate_buffers+0x66/0x120 [cifs]
[ 430.523019] cifs_demultiplex_thread+0xdc/0xc30 [cifs]
[ 430.525116] kthread+0xfb/0x130
[ 430.526421] ? cifs_handle_standard+0x190/0x190 [cifs]
[ 430.528514] ? kthread_park+0x90/0x90
[ 430.530019] ret_from_fork+0x35/0x40
Fix this by obtaining extra references for mids being retried
and marking them as MID_DELETED which indicates that such a mid
has been dequeued from the pending list.
Also move mid cleanup logic from DeleteMidQEntry to
_cifs_mid_q_entry_release which is called when the last reference
to a particular mid is put. This allows to avoid any use-after-free
of response buffers.
The patch needs to be backported to stable kernels. A stable tag
is not mentioned below because the patch doesn't apply cleanly
to any actively maintained stable kernel.
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Reviewed-and-tested-by: David Wysochanski <dwysocha@redhat.com>
Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
---
fs/cifs/connect.c | 10 +++++++++-
fs/cifs/transport.c | 42 +++++++++++++++++++++++-------------------
2 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b724227b853c..ac43c79ebdf5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
spin_lock(&GlobalMid_Lock);
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+ kref_get(&mid_entry->refcount);
if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
mid_entry->mid_state = MID_RETRY_NEEDED;
list_move(&mid_entry->qhead, &retry_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
mutex_unlock(&server->srv_mutex);
@@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
if (cifs_rdma_enabled(server)) {
@@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
if (mid->mid_flags & MID_DELETED)
printk_once(KERN_WARNING
"trying to dequeue a deleted mid\n");
- else
+ else {
list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
}
@@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
+ kref_get(&mid_entry->refcount);
mid_entry->mid_state = MID_SHUTDOWN;
list_move(&mid_entry->qhead, &dispose_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
@@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
/* 1/8th of sec is more than enough time for them to exit */
msleep(125);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 308ad0f495e1..ca3de62688d6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
static void _cifs_mid_q_entry_release(struct kref *refcount)
{
- struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
- refcount);
-
- mempool_free(mid, cifs_mid_poolp);
-}
-
-void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
-{
- spin_lock(&GlobalMid_Lock);
- kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
- spin_unlock(&GlobalMid_Lock);
-}
-
-void
-DeleteMidQEntry(struct mid_q_entry *midEntry)
-{
+ struct mid_q_entry *midEntry =
+ container_of(refcount, struct mid_q_entry, refcount);
#ifdef CONFIG_CIFS_STATS2
__le16 command = midEntry->server->vals->lock_cmd;
__u16 smb_cmd = le16_to_cpu(midEntry->command);
@@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
}
}
#endif
+
+ mempool_free(midEntry, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+ spin_lock(&GlobalMid_Lock);
+ kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+ spin_unlock(&GlobalMid_Lock);
+}
+
+void DeleteMidQEntry(struct mid_q_entry *midEntry)
+{
cifs_mid_q_entry_release(midEntry);
}
@@ -173,8 +172,10 @@ void
cifs_delete_mid(struct mid_q_entry *mid)
{
spin_lock(&GlobalMid_Lock);
- list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
DeleteMidQEntry(mid);
@@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
rc = -EHOSTDOWN;
break;
default:
- list_del_init(&mid->qhead);
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
--
2.17.1
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] CIFS: Fix retry mid list corruption on reconnects
2019-10-22 22:35 [PATCH] CIFS: Fix retry mid list corruption on reconnects Pavel Shilovsky
@ 2019-10-23 2:56 ` Steve French
0 siblings, 0 replies; 2+ messages in thread
From: Steve French @ 2019-10-23 2:56 UTC (permalink / raw)
To: Pavel Shilovsky
Cc: Ronnie Sahlberg, David Wysochanski, linux-cifs, Pavel Shilovskiy
tentatively merged into cifs-2.6.git for-next
running buildbot now ...
http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com/#/builders/2/builds/268
On Tue, Oct 22, 2019 at 5:35 PM Pavel Shilovsky <piastryyy@gmail.com> wrote:
>
> When the client hits reconnect it iterates over the mid
> pending queue marking entries for retry and moving them
> to a temporary list to issue callbacks later without holding
> GlobalMid_Lock. In the same time there is no guarantee that
> mids can't be removed from the temporary list or even
> freed completely by another thread. It may cause a temporary
> list corruption:
>
> [ 430.454897] list_del corruption. prev->next should be ffff98d3a8f316c0, but was 2e885cb266355469
> [ 430.464668] ------------[ cut here ]------------
> [ 430.466569] kernel BUG at lib/list_debug.c:51!
> [ 430.468476] invalid opcode: 0000 [#1] SMP PTI
> [ 430.470286] CPU: 0 PID: 13267 Comm: cifsd Kdump: loaded Not tainted 5.4.0-rc3+ #19
> [ 430.473472] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
> [ 430.475872] RIP: 0010:__list_del_entry_valid.cold+0x31/0x55
> ...
> [ 430.510426] Call Trace:
> [ 430.511500] cifs_reconnect+0x25e/0x610 [cifs]
> [ 430.513350] cifs_readv_from_socket+0x220/0x250 [cifs]
> [ 430.515464] cifs_read_from_socket+0x4a/0x70 [cifs]
> [ 430.517452] ? try_to_wake_up+0x212/0x650
> [ 430.519122] ? cifs_small_buf_get+0x16/0x30 [cifs]
> [ 430.521086] ? allocate_buffers+0x66/0x120 [cifs]
> [ 430.523019] cifs_demultiplex_thread+0xdc/0xc30 [cifs]
> [ 430.525116] kthread+0xfb/0x130
> [ 430.526421] ? cifs_handle_standard+0x190/0x190 [cifs]
> [ 430.528514] ? kthread_park+0x90/0x90
> [ 430.530019] ret_from_fork+0x35/0x40
>
> Fix this by obtaining extra references for mids being retried
> and marking them as MID_DELETED which indicates that such a mid
> has been dequeued from the pending list.
>
> Also move mid cleanup logic from DeleteMidQEntry to
> _cifs_mid_q_entry_release which is called when the last reference
> to a particular mid is put. This allows to avoid any use-after-free
> of response buffers.
>
> The patch needs to be backported to stable kernels. A stable tag
> is not mentioned below because the patch doesn't apply cleanly
> to any actively maintained stable kernel.
>
> Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
> Reviewed-and-tested-by: David Wysochanski <dwysocha@redhat.com>
> Signed-off-by: Pavel Shilovsky <pshilov@microsoft.com>
> ---
> fs/cifs/connect.c | 10 +++++++++-
> fs/cifs/transport.c | 42 +++++++++++++++++++++++-------------------
> 2 files changed, 32 insertions(+), 20 deletions(-)
>
> diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
> index b724227b853c..ac43c79ebdf5 100644
> --- a/fs/cifs/connect.c
> +++ b/fs/cifs/connect.c
> @@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
> spin_lock(&GlobalMid_Lock);
> list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
> mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
> + kref_get(&mid_entry->refcount);
> if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
> mid_entry->mid_state = MID_RETRY_NEEDED;
> list_move(&mid_entry->qhead, &retry_list);
> + mid_entry->mid_flags |= MID_DELETED;
> }
> spin_unlock(&GlobalMid_Lock);
> mutex_unlock(&server->srv_mutex);
> @@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
> mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
> list_del_init(&mid_entry->qhead);
> mid_entry->callback(mid_entry);
> + cifs_mid_q_entry_release(mid_entry);
> }
>
> if (cifs_rdma_enabled(server)) {
> @@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
> if (mid->mid_flags & MID_DELETED)
> printk_once(KERN_WARNING
> "trying to dequeue a deleted mid\n");
> - else
> + else {
> list_del_init(&mid->qhead);
> + mid->mid_flags |= MID_DELETED;
> + }
> spin_unlock(&GlobalMid_Lock);
> }
>
> @@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
> list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
> mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
> cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
> + kref_get(&mid_entry->refcount);
> mid_entry->mid_state = MID_SHUTDOWN;
> list_move(&mid_entry->qhead, &dispose_list);
> + mid_entry->mid_flags |= MID_DELETED;
> }
> spin_unlock(&GlobalMid_Lock);
>
> @@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
> cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
> list_del_init(&mid_entry->qhead);
> mid_entry->callback(mid_entry);
> + cifs_mid_q_entry_release(mid_entry);
> }
> /* 1/8th of sec is more than enough time for them to exit */
> msleep(125);
> diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
> index 308ad0f495e1..ca3de62688d6 100644
> --- a/fs/cifs/transport.c
> +++ b/fs/cifs/transport.c
> @@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
>
> static void _cifs_mid_q_entry_release(struct kref *refcount)
> {
> - struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
> - refcount);
> -
> - mempool_free(mid, cifs_mid_poolp);
> -}
> -
> -void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
> -{
> - spin_lock(&GlobalMid_Lock);
> - kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
> - spin_unlock(&GlobalMid_Lock);
> -}
> -
> -void
> -DeleteMidQEntry(struct mid_q_entry *midEntry)
> -{
> + struct mid_q_entry *midEntry =
> + container_of(refcount, struct mid_q_entry, refcount);
> #ifdef CONFIG_CIFS_STATS2
> __le16 command = midEntry->server->vals->lock_cmd;
> __u16 smb_cmd = le16_to_cpu(midEntry->command);
> @@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
> }
> }
> #endif
> +
> + mempool_free(midEntry, cifs_mid_poolp);
> +}
> +
> +void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
> +{
> + spin_lock(&GlobalMid_Lock);
> + kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
> + spin_unlock(&GlobalMid_Lock);
> +}
> +
> +void DeleteMidQEntry(struct mid_q_entry *midEntry)
> +{
> cifs_mid_q_entry_release(midEntry);
> }
>
> @@ -173,8 +172,10 @@ void
> cifs_delete_mid(struct mid_q_entry *mid)
> {
> spin_lock(&GlobalMid_Lock);
> - list_del_init(&mid->qhead);
> - mid->mid_flags |= MID_DELETED;
> + if (!(mid->mid_flags & MID_DELETED)) {
> + list_del_init(&mid->qhead);
> + mid->mid_flags |= MID_DELETED;
> + }
> spin_unlock(&GlobalMid_Lock);
>
> DeleteMidQEntry(mid);
> @@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
> rc = -EHOSTDOWN;
> break;
> default:
> - list_del_init(&mid->qhead);
> + if (!(mid->mid_flags & MID_DELETED)) {
> + list_del_init(&mid->qhead);
> + mid->mid_flags |= MID_DELETED;
> + }
> cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
> __func__, mid->mid, mid->mid_state);
> rc = -EIO;
> --
> 2.17.1
>
--
Thanks,
Steve
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-10-23 2:56 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-22 22:35 [PATCH] CIFS: Fix retry mid list corruption on reconnects Pavel Shilovsky
2019-10-23 2:56 ` Steve French
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).