All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] scsi: target: tcmu: Fix memory leak
@ 2021-02-18 17:50 Bodo Stroesser
  2021-02-18 17:50 ` [PATCH 1/2] scsi: target: tcmu: Move some functions without code change Bodo Stroesser
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Bodo Stroesser @ 2021-02-18 17:50 UTC (permalink / raw)
  To: linux-scsi, target-devel, Martin K. Petersen
  Cc: Bodo Stroesser, Mike Christie

This small series applies to Martin's for-next.

This is the third attempt to fix a severe memory leak in tcmu.
Previous patches:
  https://lore.kernel.org/linux-scsi/20201218141534.9918-1-bostroesser@gmail.com/
  and
  https://lore.kernel.org/linux-scsi/20210210194031.7422-1-bostroesser@gmail.com/

Tcmu's refcounting relies on tcmu_open and tcmu_release being
called symmetrically by uio. But that is not true if userspace
daemon holds the uio device open or mmap'ed while tcmu calls
uio_unregister device. So refcount can stay above 0 for ever,
which means that tcmu does not free resources of a tcmu device.
In extreme cases the amount of memory leaked can be > 1 GB for
a single destroyed tcmu device.

This new patch series fixes the problem by moving refcounting from
tcmu_open/tcmu_release to new vm_operations_struct::open/*::close
handlers, which are called under all conditions.

Bodo Stroesser (2):
  scsi: target: tcmu: Move some functions without code change
  scsi: target: tcmu: Fix memory leak caused by wrong uio usage

 drivers/target/target_core_user.c | 189 +++++++++++++++++++++-----------------
 1 file changed, 106 insertions(+), 83 deletions(-)

-- 
2.12.3


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/2] scsi: target: tcmu: Move some functions without code change
  2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
@ 2021-02-18 17:50 ` Bodo Stroesser
  2021-02-18 17:50 ` [PATCH 2/2] scsi: target: tcmu: Fix memory leak caused by wrong uio usage Bodo Stroesser
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Bodo Stroesser @ 2021-02-18 17:50 UTC (permalink / raw)
  To: linux-scsi, target-devel, Martin K. Petersen
  Cc: Bodo Stroesser, Mike Christie

This patch just moves one block of code containing some functions
inside target_core_user.c to avoid adding prototypes in next patch.

Signed-off-by: Bodo Stroesser <bostroesser@gmail.com>
---
 drivers/target/target_core_user.c | 160 +++++++++++++++++++-------------------
 1 file changed, 80 insertions(+), 80 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a5991df23581..6d010cf22879 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1566,6 +1566,86 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
 	return &udev->se_dev;
 }
 
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+	struct se_device *dev = container_of(p, struct se_device, rcu_head);
+	struct tcmu_dev *udev = TCMU_DEV(dev);
+
+	kfree(udev->uio_info.name);
+	kfree(udev->name);
+	kfree(udev);
+}
+
+static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
+{
+	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+		kmem_cache_free(tcmu_cmd_cache, cmd);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static void tcmu_blocks_release(struct radix_tree_root *blocks,
+				int start, int end)
+{
+	int i;
+	struct page *page;
+
+	for (i = start; i < end; i++) {
+		page = radix_tree_delete(blocks, i);
+		if (page) {
+			__free_page(page);
+			atomic_dec(&global_db_count);
+		}
+	}
+}
+
+static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
+{
+	struct tcmu_tmr *tmr, *tmp;
+
+	list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
+		list_del_init(&tmr->queue_entry);
+		kfree(tmr);
+	}
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+	struct se_device *dev = &udev->se_dev;
+	struct tcmu_cmd *cmd;
+	bool all_expired = true;
+	int i;
+
+	vfree(udev->mb_addr);
+	udev->mb_addr = NULL;
+
+	spin_lock_bh(&timed_out_udevs_lock);
+	if (!list_empty(&udev->timedout_entry))
+		list_del(&udev->timedout_entry);
+	spin_unlock_bh(&timed_out_udevs_lock);
+
+	/* Upper layer should drain all requests before calling this */
+	mutex_lock(&udev->cmdr_lock);
+	idr_for_each_entry(&udev->commands, cmd, i) {
+		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
+			all_expired = false;
+	}
+	/* There can be left over TMR cmds. Remove them. */
+	tcmu_remove_all_queued_tmr(udev);
+	if (!list_empty(&udev->qfull_queue))
+		all_expired = false;
+	idr_destroy(&udev->commands);
+	WARN_ON(!all_expired);
+
+	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
+	bitmap_free(udev->data_bitmap);
+	mutex_unlock(&udev->cmdr_lock);
+
+	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
 static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
 {
 	struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
@@ -1751,86 +1831,6 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 	return 0;
 }
 
-static void tcmu_dev_call_rcu(struct rcu_head *p)
-{
-	struct se_device *dev = container_of(p, struct se_device, rcu_head);
-	struct tcmu_dev *udev = TCMU_DEV(dev);
-
-	kfree(udev->uio_info.name);
-	kfree(udev->name);
-	kfree(udev);
-}
-
-static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
-{
-	if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-		kmem_cache_free(tcmu_cmd_cache, cmd);
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static void tcmu_blocks_release(struct radix_tree_root *blocks,
-				int start, int end)
-{
-	int i;
-	struct page *page;
-
-	for (i = start; i < end; i++) {
-		page = radix_tree_delete(blocks, i);
-		if (page) {
-			__free_page(page);
-			atomic_dec(&global_db_count);
-		}
-	}
-}
-
-static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
-{
-	struct tcmu_tmr *tmr, *tmp;
-
-	list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
-		list_del_init(&tmr->queue_entry);
-		kfree(tmr);
-	}
-}
-
-static void tcmu_dev_kref_release(struct kref *kref)
-{
-	struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
-	struct se_device *dev = &udev->se_dev;
-	struct tcmu_cmd *cmd;
-	bool all_expired = true;
-	int i;
-
-	vfree(udev->mb_addr);
-	udev->mb_addr = NULL;
-
-	spin_lock_bh(&timed_out_udevs_lock);
-	if (!list_empty(&udev->timedout_entry))
-		list_del(&udev->timedout_entry);
-	spin_unlock_bh(&timed_out_udevs_lock);
-
-	/* Upper layer should drain all requests before calling this */
-	mutex_lock(&udev->cmdr_lock);
-	idr_for_each_entry(&udev->commands, cmd, i) {
-		if (tcmu_check_and_free_pending_cmd(cmd) != 0)
-			all_expired = false;
-	}
-	/* There can be left over TMR cmds. Remove them. */
-	tcmu_remove_all_queued_tmr(udev);
-	if (!list_empty(&udev->qfull_queue))
-		all_expired = false;
-	idr_destroy(&udev->commands);
-	WARN_ON(!all_expired);
-
-	tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
-	bitmap_free(udev->data_bitmap);
-	mutex_unlock(&udev->cmdr_lock);
-
-	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
-}
-
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
 	struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] scsi: target: tcmu: Fix memory leak caused by wrong uio usage
  2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
  2021-02-18 17:50 ` [PATCH 1/2] scsi: target: tcmu: Move some functions without code change Bodo Stroesser
@ 2021-02-18 17:50 ` Bodo Stroesser
  2021-02-19 19:01 ` [PATCH 0/2] scsi: target: tcmu: Fix memory leak michael.christie
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Bodo Stroesser @ 2021-02-18 17:50 UTC (permalink / raw)
  To: linux-scsi, target-devel, Martin K. Petersen
  Cc: Bodo Stroesser, Mike Christie

When user deletes a tcmu device via configFS, tcmu calls
uio_unregister_device(). During that call uio resets its pointer
to struct uio_info provided by tcmu. That means, after
uio_unregister_device uio will no longer execute any of the
callbacks tcmu had set in uio_info.

Especially, if userspace daemon still holds the corresponding uio
device open or mmap'ed while tcmu calls uio_unregister_device(),
uio will not call tcmu_release() when userspace finally closes
and munmaps the uio device.

Since tcmu does refcounting for the tcmu device in tcmu_open()
and tcmu_release(), in the decribed case refcount does not drop
to 0 and tcmu does not free tcmu device's resources.
In extrem cases this can cause memory leaking of up to 1 GB for
a single tcmu device.

After uio_unregister_device(), uio will reject every open, read,
write, mmap from userspace with -EOI. But userspace daemon can
still access the mmap'ed command ring and data area. Therefore
tcmu should wait until userspace munmaps the uio device before
it frees the resources, as we don't want to cause SIGSEGV or
SIGBUS to user space.

That said, current refcounting during tcmu_open and tcmu_release
does not work correctly, and refcounting better should be done in
the open and close callouts of the vm_operations_struct, which
tcmu assigns to each mmap of the uio device (because it wants its
own page fault handler).

This patch fixes the memory leak by removing refcounting from
tcmu_open and tcmu_close, and instead adding new tcmu_vma_open()
and tcmu_vma_close() handlers that only do refcounting.

Signed-off-by: Bodo Stroesser <bostroesser@gmail.com>
---
 drivers/target/target_core_user.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 6d010cf22879..bf73cd5f4b04 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1643,6 +1643,8 @@ static void tcmu_dev_kref_release(struct kref *kref)
 	bitmap_free(udev->data_bitmap);
 	mutex_unlock(&udev->cmdr_lock);
 
+	pr_debug("dev_kref_release\n");
+
 	call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
 }
 
@@ -1758,6 +1760,25 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
 	return page;
 }
 
+static void tcmu_vma_open(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+
+	pr_debug("vma_open\n");
+
+	kref_get(&udev->kref);
+}
+
+static void tcmu_vma_close(struct vm_area_struct *vma)
+{
+	struct tcmu_dev *udev = vma->vm_private_data;
+
+	pr_debug("vma_close\n");
+
+	/* release ref from tcmu_vma_open */
+	kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
 static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 {
 	struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -1796,6 +1817,8 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
+	.open = tcmu_vma_open,
+	.close = tcmu_vma_close,
 	.fault = tcmu_vma_fault,
 };
 
@@ -1812,6 +1835,8 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
 	if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT))
 		return -EINVAL;
 
+	tcmu_vma_open(vma);
+
 	return 0;
 }
 
@@ -1824,7 +1849,6 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
 		return -EBUSY;
 
 	udev->inode = inode;
-	kref_get(&udev->kref);
 
 	pr_debug("open\n");
 
@@ -1838,8 +1862,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
 	clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
 	pr_debug("close\n");
-	/* release ref from open */
-	kref_put(&udev->kref, tcmu_dev_kref_release);
+
 	return 0;
 }
 
-- 
2.12.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] scsi: target: tcmu: Fix memory leak
  2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
  2021-02-18 17:50 ` [PATCH 1/2] scsi: target: tcmu: Move some functions without code change Bodo Stroesser
  2021-02-18 17:50 ` [PATCH 2/2] scsi: target: tcmu: Fix memory leak caused by wrong uio usage Bodo Stroesser
@ 2021-02-19 19:01 ` michael.christie
  2021-02-23  3:35 ` Martin K. Petersen
  2021-02-26  2:22 ` Martin K. Petersen
  4 siblings, 0 replies; 6+ messages in thread
From: michael.christie @ 2021-02-19 19:01 UTC (permalink / raw)
  To: Bodo Stroesser, linux-scsi, target-devel, Martin K. Petersen

On 2/18/21 11:50 AM, Bodo Stroesser wrote:
> This small series applies to Martin's for-next.
> 
> This is the third attempt to fix a severe memory leak in tcmu.
> Previous patches:
>   https://urldefense.com/v3/__https://lore.kernel.org/linux-scsi/20201218141534.9918-1-bostroesser@gmail.com/__;!!GqivPVa7Brio!Oz9qazrvG2YRDRkm5ey3KEvdVmuukGz523-nIrg5dvnwt1bKlOAjBmh4g8ADxdmcca5_$ 
>   and
>   https://urldefense.com/v3/__https://lore.kernel.org/linux-scsi/20210210194031.7422-1-bostroesser@gmail.com/__;!!GqivPVa7Brio!Oz9qazrvG2YRDRkm5ey3KEvdVmuukGz523-nIrg5dvnwt1bKlOAjBmh4g8ADxY8eTeL_$ 
> 
> Tcmu's refcounting relies on tcmu_open and tcmu_release being
> called symmetrically by uio. But that is not true if userspace
> daemon holds the uio device open or mmap'ed while tcmu calls
> uio_unregister device. So refcount can stay above 0 for ever,
> which means that tcmu does not free resources of a tcmu device.
> In extreme cases the amount of memory leaked can be > 1 GB for
> a single destroyed tcmu device.
> 
> This new patch series fixes the problem by moving refcounting from
> tcmu_open/tcmu_release to new vm_operations_struct::open/*::close
> handlers, which are called under all conditions.
> 
> Bodo Stroesser (2):
>   scsi: target: tcmu: Move some functions without code change
>   scsi: target: tcmu: Fix memory leak caused by wrong uio usage
> 
>  drivers/target/target_core_user.c | 189 +++++++++++++++++++++-----------------
>  1 file changed, 106 insertions(+), 83 deletions(-)
> 

Reviewed-by: Mike Christie <michael.christie@oracle.com>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] scsi: target: tcmu: Fix memory leak
  2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
                   ` (2 preceding siblings ...)
  2021-02-19 19:01 ` [PATCH 0/2] scsi: target: tcmu: Fix memory leak michael.christie
@ 2021-02-23  3:35 ` Martin K. Petersen
  2021-02-26  2:22 ` Martin K. Petersen
  4 siblings, 0 replies; 6+ messages in thread
From: Martin K. Petersen @ 2021-02-23  3:35 UTC (permalink / raw)
  To: Bodo Stroesser
  Cc: linux-scsi, target-devel, Martin K. Petersen, Mike Christie


Bodo,

> This small series applies to Martin's for-next.
>
> This is the third attempt to fix a severe memory leak in tcmu.

Applied to 5.12/scsi-staging, thanks!

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 0/2] scsi: target: tcmu: Fix memory leak
  2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
                   ` (3 preceding siblings ...)
  2021-02-23  3:35 ` Martin K. Petersen
@ 2021-02-26  2:22 ` Martin K. Petersen
  4 siblings, 0 replies; 6+ messages in thread
From: Martin K. Petersen @ 2021-02-26  2:22 UTC (permalink / raw)
  To: Bodo Stroesser, linux-scsi, target-devel
  Cc: Martin K . Petersen, Mike Christie

On Thu, 18 Feb 2021 18:50:37 +0100, Bodo Stroesser wrote:

> This small series applies to Martin's for-next.
> 
> This is the third attempt to fix a severe memory leak in tcmu.
> Previous patches:
>   https://lore.kernel.org/linux-scsi/20201218141534.9918-1-bostroesser@gmail.com/
>   and
>   https://lore.kernel.org/linux-scsi/20210210194031.7422-1-bostroesser@gmail.com/
> 
> [...]

Applied to 5.12/scsi-queue, thanks!

[1/2] scsi: target: tcmu: Move some functions without code change
      https://git.kernel.org/mkp/scsi/c/43bf922cdd62
[2/2] scsi: target: tcmu: Fix memory leak caused by wrong uio usage
      https://git.kernel.org/mkp/scsi/c/8f33bb2400f4

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-02-26  2:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-02-18 17:50 [PATCH 0/2] scsi: target: tcmu: Fix memory leak Bodo Stroesser
2021-02-18 17:50 ` [PATCH 1/2] scsi: target: tcmu: Move some functions without code change Bodo Stroesser
2021-02-18 17:50 ` [PATCH 2/2] scsi: target: tcmu: Fix memory leak caused by wrong uio usage Bodo Stroesser
2021-02-19 19:01 ` [PATCH 0/2] scsi: target: tcmu: Fix memory leak michael.christie
2021-02-23  3:35 ` Martin K. Petersen
2021-02-26  2:22 ` Martin K. Petersen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.