All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots
@ 2023-02-22 16:53 GuoRui.Yu
  2023-03-13  5:13 ` Guorui Yu
  2023-03-15 14:47 ` Christoph Hellwig
  0 siblings, 2 replies; 4+ messages in thread
From: GuoRui.Yu @ 2023-02-22 16:53 UTC (permalink / raw)
  To: hch, m.szyprowski; +Cc: robin.murphy, iommu, linux-kernel, GuoRui.Yu, linux-mm

In general, if swiotlb is sufficient, the logic of index =
wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and
release the area->lock; But if swiotlb is insufficient and the device
has min_align_mask requirements, such as NVME, we may not be able to
satisfy index == wrap and exit the loop properly. In this case, other
kernel threads will not be able to acquire the area->lock and release
the slot, resulting in a deadlock.

The current implementation of wrap_area_index does not involve a modulo
operation, so adjusting the wrap to ensure the loop ends is not trivial.
Introduce the index_nowrap variable to record the number of loops and
exit the loop after completing the traversal.

Backtraces:
Other CPUs are waiting this core to exit the swiotlb_do_find_slots
loop.
[10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0
[10199.924403] Call Trace:
[10199.924404]  <TASK>
[10199.924405]  swiotlb_tbl_map_single+0xec/0x1f0
[10199.924407]  swiotlb_map+0x5c/0x260
[10199.924409]  ? nvme_pci_setup_prps+0x1ed/0x340
[10199.924411]  dma_direct_map_page+0x12e/0x1c0
[10199.924413]  nvme_map_data+0x304/0x370
[10199.924415]  nvme_prep_rq.part.0+0x31/0x120
[10199.924417]  nvme_queue_rq+0x77/0x1f0

...
[ 9639.596311] NMI backtrace for cpu 48
[ 9639.596336] Call Trace:
[ 9639.596337]
[ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0
[ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0
[ 9639.596347] swiotlb_map+0x5c/0x260
[ 9639.596349] dma_direct_map_sg+0x7a/0x280
[ 9639.596352] __dma_map_sg_attrs+0x30/0x70
[ 9639.596355] dma_map_sgtable+0x1d/0x30
[ 9639.596356] nvme_map_data+0xce/0x370

...
[ 9639.595665] NMI backtrace for cpu 50
[ 9639.595682] Call Trace:
[ 9639.595682]
[ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180
[ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0
[ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: GuoRui.Yu <GuoRui.Yu@linux.alibaba.com>
Signed-off-by: Xiaokang Hu <xiaokang.hxk@alibaba-inc.com>
---
 kernel/dma/swiotlb.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index a34c38bbe28f..638ba3ea94f4 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -632,7 +632,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	unsigned int iotlb_align_mask =
 		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
 	unsigned int nslots = nr_slots(alloc_size), stride;
-	unsigned int index, wrap, count = 0, i;
+	unsigned int index, index_nowrap = 0, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
 	unsigned int slot_base;
@@ -665,6 +665,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 		    (slot_addr(tbl_dma_addr, slot_index) &
 		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
 			index = wrap_area_index(mem, index + 1);
+			index_nowrap++;
 			continue;
 		}
 
@@ -680,7 +681,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 				goto found;
 		}
 		index = wrap_area_index(mem, index + stride);
-	} while (index != wrap);
+		index_nowrap += stride;
+	} while (index_nowrap < mem->area_nslabs);
 
 not_found:
 	spin_unlock_irqrestore(&area->lock, flags);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots
  2023-02-22 16:53 [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots GuoRui.Yu
@ 2023-03-13  5:13 ` Guorui Yu
  2023-03-15 14:47 ` Christoph Hellwig
  1 sibling, 0 replies; 4+ messages in thread
From: Guorui Yu @ 2023-03-13  5:13 UTC (permalink / raw)
  To: hch, m.szyprowski; +Cc: robin.murphy, iommu, linux-kernel, linux-mm

Hi Christoph, kindly ping for any comments here?

Thanks,
Guorui

在 2023/2/23 00:53, GuoRui.Yu 写道:
> In general, if swiotlb is sufficient, the logic of index =
> wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and
> release the area->lock; But if swiotlb is insufficient and the device
> has min_align_mask requirements, such as NVME, we may not be able to
> satisfy index == wrap and exit the loop properly. In this case, other
> kernel threads will not be able to acquire the area->lock and release
> the slot, resulting in a deadlock.
> 
> The current implementation of wrap_area_index does not involve a modulo
> operation, so adjusting the wrap to ensure the loop ends is not trivial.
> Introduce the index_nowrap variable to record the number of loops and
> exit the loop after completing the traversal.
> 
> Backtraces:
> Other CPUs are waiting this core to exit the swiotlb_do_find_slots
> loop.
> [10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0
> [10199.924403] Call Trace:
> [10199.924404]  <TASK>
> [10199.924405]  swiotlb_tbl_map_single+0xec/0x1f0
> [10199.924407]  swiotlb_map+0x5c/0x260
> [10199.924409]  ? nvme_pci_setup_prps+0x1ed/0x340
> [10199.924411]  dma_direct_map_page+0x12e/0x1c0
> [10199.924413]  nvme_map_data+0x304/0x370
> [10199.924415]  nvme_prep_rq.part.0+0x31/0x120
> [10199.924417]  nvme_queue_rq+0x77/0x1f0
> 
> ...
> [ 9639.596311] NMI backtrace for cpu 48
> [ 9639.596336] Call Trace:
> [ 9639.596337]
> [ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40
> [ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0
> [ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0
> [ 9639.596347] swiotlb_map+0x5c/0x260
> [ 9639.596349] dma_direct_map_sg+0x7a/0x280
> [ 9639.596352] __dma_map_sg_attrs+0x30/0x70
> [ 9639.596355] dma_map_sgtable+0x1d/0x30
> [ 9639.596356] nvme_map_data+0xce/0x370
> 
> ...
> [ 9639.595665] NMI backtrace for cpu 50
> [ 9639.595682] Call Trace:
> [ 9639.595682]
> [ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40
> [ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180
> [ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0
> [ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0
> 
> Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
> Signed-off-by: GuoRui.Yu <GuoRui.Yu@linux.alibaba.com>
> Signed-off-by: Xiaokang Hu <xiaokang.hxk@alibaba-inc.com>
> ---
>   kernel/dma/swiotlb.c | 6 ++++--
>   1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index a34c38bbe28f..638ba3ea94f4 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -632,7 +632,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   	unsigned int iotlb_align_mask =
>   		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
>   	unsigned int nslots = nr_slots(alloc_size), stride;
> -	unsigned int index, wrap, count = 0, i;
> +	unsigned int index, index_nowrap = 0, wrap, count = 0, i;
>   	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
>   	unsigned long flags;
>   	unsigned int slot_base;
> @@ -665,6 +665,7 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   		    (slot_addr(tbl_dma_addr, slot_index) &
>   		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
>   			index = wrap_area_index(mem, index + 1);
> +			index_nowrap++;
>   			continue;
>   		}
>   
> @@ -680,7 +681,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   				goto found;
>   		}
>   		index = wrap_area_index(mem, index + stride);
> -	} while (index != wrap);
> +		index_nowrap += stride;
> +	} while (index_nowrap < mem->area_nslabs);
>   
>   not_found:
>   	spin_unlock_irqrestore(&area->lock, flags);

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots
  2023-02-22 16:53 [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots GuoRui.Yu
  2023-03-13  5:13 ` Guorui Yu
@ 2023-03-15 14:47 ` Christoph Hellwig
  2023-03-16  2:01   ` Guorui Yu
  1 sibling, 1 reply; 4+ messages in thread
From: Christoph Hellwig @ 2023-03-15 14:47 UTC (permalink / raw)
  To: GuoRui.Yu; +Cc: hch, m.szyprowski, robin.murphy, iommu, linux-kernel, linux-mm

I think this looks generall fine, but the index_nowrap variable
name seems very confusing.  What about this slighlt adjusted
version?

---
From 11559745f0920b53ba5f8b2fc6241891e1dfcf4b Mon Sep 17 00:00:00 2001
From: "GuoRui.Yu" <GuoRui.Yu@linux.alibaba.com>
Subject: swiotlb: fix the deadlock in swiotlb_do_find_slots

In general, if swiotlb is sufficient, the logic of index =
wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and
release the area->lock; But if swiotlb is insufficient and the device
has min_align_mask requirements, such as NVME, we may not be able to
satisfy index == wrap and exit the loop properly. In this case, other
kernel threads will not be able to acquire the area->lock and release
the slot, resulting in a deadlock.

The current implementation of wrap_area_index does not involve a modulo
operation, so adjusting the wrap to ensure the loop ends is not trivial.
Introduce a new variable to record the number of loops and exit the loop
after completing the traversal.

Backtraces:
Other CPUs are waiting this core to exit the swiotlb_do_find_slots
loop.
[10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0
[10199.924403] Call Trace:
[10199.924404]  <TASK>
[10199.924405]  swiotlb_tbl_map_single+0xec/0x1f0
[10199.924407]  swiotlb_map+0x5c/0x260
[10199.924409]  ? nvme_pci_setup_prps+0x1ed/0x340
[10199.924411]  dma_direct_map_page+0x12e/0x1c0
[10199.924413]  nvme_map_data+0x304/0x370
[10199.924415]  nvme_prep_rq.part.0+0x31/0x120
[10199.924417]  nvme_queue_rq+0x77/0x1f0

...
[ 9639.596311] NMI backtrace for cpu 48
[ 9639.596336] Call Trace:
[ 9639.596337]
[ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0
[ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0
[ 9639.596347] swiotlb_map+0x5c/0x260
[ 9639.596349] dma_direct_map_sg+0x7a/0x280
[ 9639.596352] __dma_map_sg_attrs+0x30/0x70
[ 9639.596355] dma_map_sgtable+0x1d/0x30
[ 9639.596356] nvme_map_data+0xce/0x370

...
[ 9639.595665] NMI backtrace for cpu 50
[ 9639.595682] Call Trace:
[ 9639.595682]
[ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40
[ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180
[ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0
[ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0

Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
Signed-off-by: GuoRui.Yu <GuoRui.Yu@linux.alibaba.com>
Signed-off-by: Xiaokang Hu <xiaokang.hxk@alibaba-inc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/swiotlb.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 03e3251cd9d2b6..91454b513db069 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -625,8 +625,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 	unsigned int iotlb_align_mask =
 		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
 	unsigned int nslots = nr_slots(alloc_size), stride;
-	unsigned int index, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+	unsigned int index, slots_checked, count = 0, i;
 	unsigned long flags;
 	unsigned int slot_base;
 	unsigned int slot_index;
@@ -649,15 +649,16 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 		goto not_found;
 
 	slot_base = area_index * mem->area_nslabs;
-	index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+	index = wrap_area_index(mem, ALIGN(area->index, stride));
 
-	do {
+	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
 		slot_index = slot_base + index;
 
 		if (orig_addr &&
 		    (slot_addr(tbl_dma_addr, slot_index) &
 		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
 			index = wrap_area_index(mem, index + 1);
+			slots_checked++;
 			continue;
 		}
 
@@ -673,7 +674,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
 				goto found;
 		}
 		index = wrap_area_index(mem, index + stride);
-	} while (index != wrap);
+		slots_checked += stride;
+	}
 
 not_found:
 	spin_unlock_irqrestore(&area->lock, flags);
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots
  2023-03-15 14:47 ` Christoph Hellwig
@ 2023-03-16  2:01   ` Guorui Yu
  0 siblings, 0 replies; 4+ messages in thread
From: Guorui Yu @ 2023-03-16  2:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: m.szyprowski, robin.murphy, iommu, linux-kernel, linux-mm

Hi Christoph,

Thanks for your reply, and this new version looks go to me.

Yours sincerely,
Guorui

在 2023/3/15 22:47, Christoph Hellwig 写道:
> I think this looks generall fine, but the index_nowrap variable
> name seems very confusing.  What about this slighlt adjusted
> version?
> 
> ---
>  From 11559745f0920b53ba5f8b2fc6241891e1dfcf4b Mon Sep 17 00:00:00 2001
> From: "GuoRui.Yu" <GuoRui.Yu@linux.alibaba.com>
> Subject: swiotlb: fix the deadlock in swiotlb_do_find_slots
> 
> In general, if swiotlb is sufficient, the logic of index =
> wrap_area_index(mem, index + 1) is fine, it will quickly take a slot and
> release the area->lock; But if swiotlb is insufficient and the device
> has min_align_mask requirements, such as NVME, we may not be able to
> satisfy index == wrap and exit the loop properly. In this case, other
> kernel threads will not be able to acquire the area->lock and release
> the slot, resulting in a deadlock.
> 
> The current implementation of wrap_area_index does not involve a modulo
> operation, so adjusting the wrap to ensure the loop ends is not trivial.
> Introduce a new variable to record the number of loops and exit the loop
> after completing the traversal.
> 
> Backtraces:
> Other CPUs are waiting this core to exit the swiotlb_do_find_slots
> loop.
> [10199.924391] RIP: 0010:swiotlb_do_find_slots+0x1fe/0x3e0
> [10199.924403] Call Trace:
> [10199.924404]  <TASK>
> [10199.924405]  swiotlb_tbl_map_single+0xec/0x1f0
> [10199.924407]  swiotlb_map+0x5c/0x260
> [10199.924409]  ? nvme_pci_setup_prps+0x1ed/0x340
> [10199.924411]  dma_direct_map_page+0x12e/0x1c0
> [10199.924413]  nvme_map_data+0x304/0x370
> [10199.924415]  nvme_prep_rq.part.0+0x31/0x120
> [10199.924417]  nvme_queue_rq+0x77/0x1f0
> 
> ...
> [ 9639.596311] NMI backtrace for cpu 48
> [ 9639.596336] Call Trace:
> [ 9639.596337]
> [ 9639.596338] _raw_spin_lock_irqsave+0x37/0x40
> [ 9639.596341] swiotlb_do_find_slots+0xef/0x3e0
> [ 9639.596344] swiotlb_tbl_map_single+0xec/0x1f0
> [ 9639.596347] swiotlb_map+0x5c/0x260
> [ 9639.596349] dma_direct_map_sg+0x7a/0x280
> [ 9639.596352] __dma_map_sg_attrs+0x30/0x70
> [ 9639.596355] dma_map_sgtable+0x1d/0x30
> [ 9639.596356] nvme_map_data+0xce/0x370
> 
> ...
> [ 9639.595665] NMI backtrace for cpu 50
> [ 9639.595682] Call Trace:
> [ 9639.595682]
> [ 9639.595683] _raw_spin_lock_irqsave+0x37/0x40
> [ 9639.595686] swiotlb_release_slots.isra.0+0x86/0x180
> [ 9639.595688] dma_direct_unmap_sg+0xcf/0x1a0
> [ 9639.595690] nvme_unmap_data.part.0+0x43/0xc0
> 
> Fixes: 1f221a0d0dbf ("swiotlb: respect min_align_mask")
> Signed-off-by: GuoRui.Yu <GuoRui.Yu@linux.alibaba.com>
> Signed-off-by: Xiaokang Hu <xiaokang.hxk@alibaba-inc.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   kernel/dma/swiotlb.c | 10 ++++++----
>   1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 03e3251cd9d2b6..91454b513db069 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -625,8 +625,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   	unsigned int iotlb_align_mask =
>   		dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
>   	unsigned int nslots = nr_slots(alloc_size), stride;
> -	unsigned int index, wrap, count = 0, i;
>   	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
> +	unsigned int index, slots_checked, count = 0, i;
>   	unsigned long flags;
>   	unsigned int slot_base;
>   	unsigned int slot_index;
> @@ -649,15 +649,16 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   		goto not_found;
>   
>   	slot_base = area_index * mem->area_nslabs;
> -	index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
> +	index = wrap_area_index(mem, ALIGN(area->index, stride));
>   
> -	do {
> +	for (slots_checked = 0; slots_checked < mem->area_nslabs; ) {
>   		slot_index = slot_base + index;
>   
>   		if (orig_addr &&
>   		    (slot_addr(tbl_dma_addr, slot_index) &
>   		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
>   			index = wrap_area_index(mem, index + 1);
> +			slots_checked++;
>   			continue;
>   		}
>   
> @@ -673,7 +674,8 @@ static int swiotlb_do_find_slots(struct device *dev, int area_index,
>   				goto found;
>   		}
>   		index = wrap_area_index(mem, index + stride);
> -	} while (index != wrap);
> +		slots_checked += stride;
> +	}
>   
>   not_found:
>   	spin_unlock_irqrestore(&area->lock, flags);

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-03-16  2:02 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-22 16:53 [PATCH v2] swiotlb: fix the deadlock in swiotlb_do_find_slots GuoRui.Yu
2023-03-13  5:13 ` Guorui Yu
2023-03-15 14:47 ` Christoph Hellwig
2023-03-16  2:01   ` Guorui Yu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.