All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-17 14:47 ` Tianyu Lan
  0 siblings, 0 replies; 11+ messages in thread
From: Tianyu Lan @ 2022-06-17 14:47 UTC (permalink / raw)
  To: corbet, hch, m.szyprowski, robin.murphy, paulmck, akpm, bp, tglx,
	songmuchun, rdunlap, damien.lemoal, michael.h.kelley, kys
  Cc: Tianyu Lan, iommu, linux-doc, linux-kernel, vkuznets, wei.liu,
	parri.andrea, thomas.lendacky, linux-hyperv, kirill.shutemov,
	andi.kleen, Andi Kleen

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

Traditionally swiotlb was not performance critical because it was only
used for slow devices. But in some setups, like TDX/SEV confidential
guests, all IO has to go through swiotlb. Currently swiotlb only has a
single lock. Under high IO load with multiple CPUs this can lead to
significat lock contention on the swiotlb lock.

This patch splits the swiotlb bounce buffer pool into individual areas
which have their own lock. Each CPU tries to allocate in its own area
first. Only if that fails does it search other areas. On freeing the
allocation is freed into the area where the memory was originally
allocated from.

Area number can be set via swiotlb_adjust_nareas() and swiotlb kernel
parameter.

This idea from Andi Kleen patch(https://github.com/intel/tdx/commit/4529b578
4c141782c72ec9bd9a92df2b68cb7d45).

Based-on-idea-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 .../admin-guide/kernel-parameters.txt         |   4 +-
 include/linux/swiotlb.h                       |  27 +++
 kernel/dma/swiotlb.c                          | 202 ++++++++++++++----
 3 files changed, 194 insertions(+), 39 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8090130b544b..5d46271982d5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5869,8 +5869,10 @@
 			it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
 
 	swiotlb=	[ARM,IA-64,PPC,MIPS,X86]
-			Format: { <int> | force | noforce }
+			Format: { <int> [,<int>] | force | noforce }
 			<int> -- Number of I/O TLB slabs
+			<int> -- Second integer after comma. Number of swiotlb
+				 areas with their own lock. Must be power of 2.
 			force -- force using of bounce buffers even if they
 			         wouldn't be automatically used by the kernel
 			noforce -- Never use bounce buffers (for debugging)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7ed35dd3de6e..7157428cf3ac 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -62,6 +62,22 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
+/**
+ * struct io_tlb_area - IO TLB memory area descriptor
+ *
+ * This is a single area with a single lock.
+ *
+ * @used:	The number of used IO TLB block.
+ * @index:	The slot index to start searching in this area for next round.
+ * @lock:	The lock to protect the above data structures in the map and
+ *		unmap calls.
+ */
+struct io_tlb_area {
+	unsigned long used;
+	unsigned int index;
+	spinlock_t lock;
+};
+
 /**
  * struct io_tlb_mem - IO TLB Memory Pool Descriptor
  *
@@ -89,6 +105,8 @@ extern enum swiotlb_force swiotlb_force;
  * @late_alloc:	%true if allocated using the page allocator
  * @force_bounce: %true if swiotlb bouncing is forced
  * @for_alloc:  %true if the pool is used for memory allocation
+ * @nareas:  The area number in the pool.
+ * @area_nslabs: The slot number in the area.
  */
 struct io_tlb_mem {
 	phys_addr_t start;
@@ -102,6 +120,9 @@ struct io_tlb_mem {
 	bool late_alloc;
 	bool force_bounce;
 	bool for_alloc;
+	unsigned int nareas;
+	unsigned int area_nslabs;
+	struct io_tlb_area *areas;
 	struct io_tlb_slot {
 		phys_addr_t orig_addr;
 		size_t alloc_size;
@@ -130,6 +151,7 @@ unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long size);
+void __init swiotlb_adjust_nareas(unsigned int nareas);
 #else
 static inline void swiotlb_init(bool addressing_limited, unsigned int flags)
 {
@@ -162,6 +184,11 @@ static inline bool is_swiotlb_active(struct device *dev)
 static inline void swiotlb_adjust_size(unsigned long size)
 {
 }
+
+static inline void swiotlb_adjust_nareas(unsigned int nareas)
+{
+}
+
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index cb50f8d38360..139d08068912 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -62,6 +62,8 @@
 
 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
+#define DEFAULT_NUM_AREAS 1
+
 static bool swiotlb_force_bounce;
 static bool swiotlb_force_disable;
 
@@ -70,6 +72,7 @@ struct io_tlb_mem io_tlb_default_mem;
 phys_addr_t swiotlb_unencrypted_base;
 
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long default_nareas = DEFAULT_NUM_AREAS;
 
 static int __init
 setup_io_tlb_npages(char *str)
@@ -79,6 +82,10 @@ setup_io_tlb_npages(char *str)
 		default_nslabs =
 			ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
 	}
+	if (*str == ',')
+		++str;
+	if (isdigit(*str))
+		swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
 	if (*str == ',')
 		++str;
 	if (!strcmp(str, "force"))
@@ -103,6 +110,27 @@ unsigned long swiotlb_size_or_default(void)
 	return default_nslabs << IO_TLB_SHIFT;
 }
 
+void __init swiotlb_adjust_nareas(unsigned int nareas)
+{
+	if (!is_power_of_2(nareas)) {
+		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
+		return;
+	}
+
+	default_nareas = nareas;
+
+	pr_info("area num %d.\n", nareas);
+	/* Round up number of slabs to the next power of 2.
+	 * The last area is going be smaller than the rest if
+	 * default_nslabs is not power of two.
+	 */
+	if (nareas > DEFAULT_NUM_AREAS) {
+		default_nslabs = roundup_pow_of_two(default_nslabs);
+		pr_info("SWIOTLB bounce buffer size roundup to %luMB",
+			(default_nslabs << IO_TLB_SHIFT) >> 20);
+	}
+}
+
 void __init swiotlb_adjust_size(unsigned long size)
 {
 	/*
@@ -112,8 +140,18 @@ void __init swiotlb_adjust_size(unsigned long size)
 	 */
 	if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
 		return;
+
+	/* Round up number of slabs to the next power of 2.
+	 * The last area is going be smaller than the rest if default_nslabs is
+	 * not power of two.
+	 */
 	size = ALIGN(size, IO_TLB_SIZE);
 	default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+	if (default_nareas != DEFAULT_NUM_AREAS) {
+		default_nslabs = roundup_pow_of_two(default_nslabs);
+		size = default_nslabs << IO_TLB_SHIFT;
+	}
+
 	pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
 }
 
@@ -192,7 +230,8 @@ void __init swiotlb_update_mem_attributes(void)
 }
 
 static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
-		unsigned long nslabs, unsigned int flags, bool late_alloc)
+				    unsigned long nslabs, unsigned int flags,
+				    bool late_alloc, unsigned int nareas)
 {
 	void *vaddr = phys_to_virt(start);
 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
@@ -202,10 +241,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 	mem->end = mem->start + bytes;
 	mem->index = 0;
 	mem->late_alloc = late_alloc;
+	mem->nareas = nareas;
+	mem->area_nslabs = nslabs / mem->nareas;
 
 	mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
 
 	spin_lock_init(&mem->lock);
+	for (i = 0; i < mem->nareas; i++) {
+		spin_lock_init(&mem->areas[i].lock);
+		mem->areas[i].index = 0;
+	}
+
 	for (i = 0; i < mem->nslabs; i++) {
 		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
@@ -274,7 +320,13 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false);
+	mem->areas = memblock_alloc(sizeof(struct io_tlb_area) *
+			    default_nareas, SMP_CACHE_BYTES);
+	if (!mem->areas)
+		panic("%s: Failed to allocate mem->areas.\n", __func__);
+
+	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags,
+				false, default_nareas);
 
 	if (flags & SWIOTLB_VERBOSE)
 		swiotlb_print_info();
@@ -296,7 +348,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
 	unsigned char *vstart = NULL;
-	unsigned int order;
+	unsigned int order, area_order;
 	bool retried = false;
 	int rc = 0;
 
@@ -337,19 +389,31 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 			(PAGE_SIZE << order) >> 20);
 	}
 
+	area_order = get_order(array_size(sizeof(*mem->areas),
+		default_nareas));
+	mem->areas = (struct io_tlb_area *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
+	if (!mem->areas)
+		goto error_area;
+
 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 		get_order(array_size(sizeof(*mem->slots), nslabs)));
-	if (!mem->slots) {
-		free_pages((unsigned long)vstart, order);
-		return -ENOMEM;
-	}
+	if (!mem->slots)
+		goto error_slots;
 
 	set_memory_decrypted((unsigned long)vstart,
 			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
-	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true);
+	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
+				default_nareas);
 
 	swiotlb_print_info();
 	return 0;
+
+error_slots:
+	free_pages((unsigned long)mem->areas, area_order);
+error_area:
+	free_pages((unsigned long)vstart, order);
+	return -ENOMEM;
 }
 
 void __init swiotlb_exit(void)
@@ -357,6 +421,7 @@ void __init swiotlb_exit(void)
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long tbl_vaddr;
 	size_t tbl_size, slots_size;
+	unsigned int area_order;
 
 	if (swiotlb_force_bounce)
 		return;
@@ -371,9 +436,14 @@ void __init swiotlb_exit(void)
 
 	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
 	if (mem->late_alloc) {
+		area_order = get_order(array_size(sizeof(*mem->areas),
+			mem->nareas));
+		free_pages((unsigned long)mem->areas, area_order);
 		free_pages(tbl_vaddr, get_order(tbl_size));
 		free_pages((unsigned long)mem->slots, get_order(slots_size));
 	} else {
+		memblock_free_late(__pa(mem->areas),
+				   mem->nareas * sizeof(struct io_tlb_area));
 		memblock_free_late(mem->start, tbl_size);
 		memblock_free_late(__pa(mem->slots), slots_size);
 	}
@@ -476,9 +546,9 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
 	return nr_slots(boundary_mask + 1);
 }
 
-static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
+static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
 {
-	if (index >= mem->nslabs)
+	if (index >= mem->area_nslabs)
 		return 0;
 	return index;
 }
@@ -487,10 +557,13 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
-			      size_t alloc_size, unsigned int alloc_align_mask)
+static int swiotlb_do_find_slots(struct io_tlb_mem *mem,
+				 struct io_tlb_area *area,
+				 int area_index,
+				 struct device *dev, phys_addr_t orig_addr,
+				 size_t alloc_size,
+				 unsigned int alloc_align_mask)
 {
-	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
 	dma_addr_t tbl_dma_addr =
 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -501,8 +574,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	unsigned int index, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
+	unsigned int slot_base;
+	unsigned int slot_index;
 
 	BUG_ON(!nslots);
+	BUG_ON(area_index >= mem->nareas);
 
 	/*
 	 * For mappings with an alignment requirement don't bother looping to
@@ -514,16 +590,20 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
 	stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
-	spin_lock_irqsave(&mem->lock, flags);
-	if (unlikely(nslots > mem->nslabs - mem->used))
+	spin_lock_irqsave(&area->lock, flags);
+	if (unlikely(nslots > mem->area_nslabs - area->used))
 		goto not_found;
 
-	index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
+	slot_base = area_index * mem->area_nslabs;
+	index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+
 	do {
+		slot_index = slot_base + index;
+
 		if (orig_addr &&
-		    (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
-			    (orig_addr & iotlb_align_mask)) {
-			index = wrap_index(mem, index + 1);
+		    (slot_addr(tbl_dma_addr, slot_index) &
+		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
+			index = wrap_area_index(mem, index + 1);
 			continue;
 		}
 
@@ -532,26 +612,26 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (!iommu_is_span_boundary(index, nslots,
+		if (!iommu_is_span_boundary(slot_index, nslots,
 					    nr_slots(tbl_dma_addr),
 					    max_slots)) {
-			if (mem->slots[index].list >= nslots)
+			if (mem->slots[slot_index].list >= nslots)
 				goto found;
 		}
-		index = wrap_index(mem, index + stride);
+		index = wrap_area_index(mem, index + stride);
 	} while (index != wrap);
 
 not_found:
-	spin_unlock_irqrestore(&mem->lock, flags);
+	spin_unlock_irqrestore(&area->lock, flags);
 	return -1;
 
 found:
-	for (i = index; i < index + nslots; i++) {
+	for (i = slot_index; i < slot_index + nslots; i++) {
 		mem->slots[i].list = 0;
 		mem->slots[i].alloc_size =
-			alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+			alloc_size - (offset + ((i - slot_index) << IO_TLB_SHIFT));
 	}
-	for (i = index - 1;
+	for (i = slot_index - 1;
 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
 	     mem->slots[i].list; i--)
 		mem->slots[i].list = ++count;
@@ -559,14 +639,43 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	/*
 	 * Update the indices to avoid searching in the next round.
 	 */
-	if (index + nslots < mem->nslabs)
-		mem->index = index + nslots;
+	if (index + nslots < mem->area_nslabs)
+		area->index = index + nslots;
 	else
-		mem->index = 0;
-	mem->used += nslots;
+		area->index = 0;
+	area->used += nslots;
+	spin_unlock_irqrestore(&area->lock, flags);
+	return slot_index;
+}
 
-	spin_unlock_irqrestore(&mem->lock, flags);
-	return index;
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+			      size_t alloc_size, unsigned int alloc_align_mask)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+	int start = raw_smp_processor_id() & ((1U << __fls(mem->nareas)) - 1);
+	int i = start, index;
+
+	do {
+		index = swiotlb_do_find_slots(mem, mem->areas + i, i,
+					      dev, orig_addr, alloc_size,
+					      alloc_align_mask);
+		if (index >= 0)
+			return index;
+		if (++i >= mem->nareas)
+			i = 0;
+	} while (i != start);
+
+	return -1;
+}
+
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+	int i;
+	unsigned long used = 0;
+
+	for (i = 0; i < mem->nareas; i++)
+		used += mem->areas[i].used;
+	return used;
 }
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
@@ -598,7 +707,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
 	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-				 alloc_size, mem->nslabs, mem->used);
+				 alloc_size, mem->nslabs, mem_used(mem));
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
@@ -628,6 +737,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
 	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
+	int aindex = index / mem->area_nslabs;
+	struct io_tlb_area *area = &mem->areas[aindex];
 	int count, i;
 
 	/*
@@ -636,7 +747,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
-	spin_lock_irqsave(&mem->lock, flags);
+	BUG_ON(aindex >= mem->nareas);
+
+	spin_lock_irqsave(&area->lock, flags);
 	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
 		count = mem->slots[index + nslots].list;
 	else
@@ -660,8 +773,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
 	     i--)
 		mem->slots[i].list = ++count;
-	mem->used -= nslots;
-	spin_unlock_irqrestore(&mem->lock, flags);
+	area->used -= nslots;
+	spin_unlock_irqrestore(&area->lock, flags);
 }
 
 /*
@@ -759,12 +872,14 @@ EXPORT_SYMBOL_GPL(is_swiotlb_active);
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
 					 const char *dirname)
 {
+	unsigned long used = mem_used(mem);
+
 	mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
 	if (!mem->nslabs)
 		return;
 
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
-	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &used);
 }
 
 static int __init __maybe_unused swiotlb_create_default_debugfs(void)
@@ -815,6 +930,9 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 	struct io_tlb_mem *mem = rmem->priv;
 	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
 
+	/* Set Per-device io tlb area to one */
+	unsigned int nareas = 1;
+
 	/*
 	 * Since multiple devices can share the same pool, the private data,
 	 * io_tlb_mem struct, will be initialized by the first device attached
@@ -831,10 +949,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 			return -ENOMEM;
 		}
 
+		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
+				     GFP_KERNEL);
+		if (!mem->areas) {
+			kfree(mem);
+			kfree(mem->slots);
+			return -ENOMEM;
+		}
+
 		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
 				     rmem->size >> PAGE_SHIFT);
 		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
-				false);
+					false, nareas);
 		mem->for_alloc = true;
 
 		rmem->priv = mem;
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-17 14:47 ` Tianyu Lan
  0 siblings, 0 replies; 11+ messages in thread
From: Tianyu Lan @ 2022-06-17 14:47 UTC (permalink / raw)
  To: corbet, hch, m.szyprowski, robin.murphy, paulmck, akpm, bp, tglx,
	songmuchun, rdunlap, damien.lemoal, michael.h.kelley, kys
  Cc: parri.andrea, thomas.lendacky, wei.liu, Andi Kleen, Tianyu Lan,
	linux-hyperv, linux-doc, linux-kernel, kirill.shutemov, iommu,
	andi.kleen, vkuznets

From: Tianyu Lan <Tianyu.Lan@microsoft.com>

Traditionally swiotlb was not performance critical because it was only
used for slow devices. But in some setups, like TDX/SEV confidential
guests, all IO has to go through swiotlb. Currently swiotlb only has a
single lock. Under high IO load with multiple CPUs this can lead to
significat lock contention on the swiotlb lock.

This patch splits the swiotlb bounce buffer pool into individual areas
which have their own lock. Each CPU tries to allocate in its own area
first. Only if that fails does it search other areas. On freeing the
allocation is freed into the area where the memory was originally
allocated from.

Area number can be set via swiotlb_adjust_nareas() and swiotlb kernel
parameter.

This idea from Andi Kleen patch(https://github.com/intel/tdx/commit/4529b578
4c141782c72ec9bd9a92df2b68cb7d45).

Based-on-idea-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
 .../admin-guide/kernel-parameters.txt         |   4 +-
 include/linux/swiotlb.h                       |  27 +++
 kernel/dma/swiotlb.c                          | 202 ++++++++++++++----
 3 files changed, 194 insertions(+), 39 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8090130b544b..5d46271982d5 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5869,8 +5869,10 @@
 			it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
 
 	swiotlb=	[ARM,IA-64,PPC,MIPS,X86]
-			Format: { <int> | force | noforce }
+			Format: { <int> [,<int>] | force | noforce }
 			<int> -- Number of I/O TLB slabs
+			<int> -- Second integer after comma. Number of swiotlb
+				 areas with their own lock. Must be power of 2.
 			force -- force using of bounce buffers even if they
 			         wouldn't be automatically used by the kernel
 			noforce -- Never use bounce buffers (for debugging)
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7ed35dd3de6e..7157428cf3ac 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -62,6 +62,22 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 
+/**
+ * struct io_tlb_area - IO TLB memory area descriptor
+ *
+ * This is a single area with a single lock.
+ *
+ * @used:	The number of used IO TLB block.
+ * @index:	The slot index to start searching in this area for next round.
+ * @lock:	The lock to protect the above data structures in the map and
+ *		unmap calls.
+ */
+struct io_tlb_area {
+	unsigned long used;
+	unsigned int index;
+	spinlock_t lock;
+};
+
 /**
  * struct io_tlb_mem - IO TLB Memory Pool Descriptor
  *
@@ -89,6 +105,8 @@ extern enum swiotlb_force swiotlb_force;
  * @late_alloc:	%true if allocated using the page allocator
  * @force_bounce: %true if swiotlb bouncing is forced
  * @for_alloc:  %true if the pool is used for memory allocation
+ * @nareas:  The area number in the pool.
+ * @area_nslabs: The slot number in the area.
  */
 struct io_tlb_mem {
 	phys_addr_t start;
@@ -102,6 +120,9 @@ struct io_tlb_mem {
 	bool late_alloc;
 	bool force_bounce;
 	bool for_alloc;
+	unsigned int nareas;
+	unsigned int area_nslabs;
+	struct io_tlb_area *areas;
 	struct io_tlb_slot {
 		phys_addr_t orig_addr;
 		size_t alloc_size;
@@ -130,6 +151,7 @@ unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
 bool is_swiotlb_active(struct device *dev);
 void __init swiotlb_adjust_size(unsigned long size);
+void __init swiotlb_adjust_nareas(unsigned int nareas);
 #else
 static inline void swiotlb_init(bool addressing_limited, unsigned int flags)
 {
@@ -162,6 +184,11 @@ static inline bool is_swiotlb_active(struct device *dev)
 static inline void swiotlb_adjust_size(unsigned long size)
 {
 }
+
+static inline void swiotlb_adjust_nareas(unsigned int nareas)
+{
+}
+
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index cb50f8d38360..139d08068912 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -62,6 +62,8 @@
 
 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
+#define DEFAULT_NUM_AREAS 1
+
 static bool swiotlb_force_bounce;
 static bool swiotlb_force_disable;
 
@@ -70,6 +72,7 @@ struct io_tlb_mem io_tlb_default_mem;
 phys_addr_t swiotlb_unencrypted_base;
 
 static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long default_nareas = DEFAULT_NUM_AREAS;
 
 static int __init
 setup_io_tlb_npages(char *str)
@@ -79,6 +82,10 @@ setup_io_tlb_npages(char *str)
 		default_nslabs =
 			ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
 	}
+	if (*str == ',')
+		++str;
+	if (isdigit(*str))
+		swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
 	if (*str == ',')
 		++str;
 	if (!strcmp(str, "force"))
@@ -103,6 +110,27 @@ unsigned long swiotlb_size_or_default(void)
 	return default_nslabs << IO_TLB_SHIFT;
 }
 
+void __init swiotlb_adjust_nareas(unsigned int nareas)
+{
+	if (!is_power_of_2(nareas)) {
+		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
+		return;
+	}
+
+	default_nareas = nareas;
+
+	pr_info("area num %d.\n", nareas);
+	/* Round up number of slabs to the next power of 2.
+	 * The last area is going be smaller than the rest if
+	 * default_nslabs is not power of two.
+	 */
+	if (nareas > DEFAULT_NUM_AREAS) {
+		default_nslabs = roundup_pow_of_two(default_nslabs);
+		pr_info("SWIOTLB bounce buffer size roundup to %luMB",
+			(default_nslabs << IO_TLB_SHIFT) >> 20);
+	}
+}
+
 void __init swiotlb_adjust_size(unsigned long size)
 {
 	/*
@@ -112,8 +140,18 @@ void __init swiotlb_adjust_size(unsigned long size)
 	 */
 	if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
 		return;
+
+	/* Round up number of slabs to the next power of 2.
+	 * The last area is going be smaller than the rest if default_nslabs is
+	 * not power of two.
+	 */
 	size = ALIGN(size, IO_TLB_SIZE);
 	default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+	if (default_nareas != DEFAULT_NUM_AREAS) {
+		default_nslabs = roundup_pow_of_two(default_nslabs);
+		size = default_nslabs << IO_TLB_SHIFT;
+	}
+
 	pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
 }
 
@@ -192,7 +230,8 @@ void __init swiotlb_update_mem_attributes(void)
 }
 
 static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
-		unsigned long nslabs, unsigned int flags, bool late_alloc)
+				    unsigned long nslabs, unsigned int flags,
+				    bool late_alloc, unsigned int nareas)
 {
 	void *vaddr = phys_to_virt(start);
 	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
@@ -202,10 +241,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 	mem->end = mem->start + bytes;
 	mem->index = 0;
 	mem->late_alloc = late_alloc;
+	mem->nareas = nareas;
+	mem->area_nslabs = nslabs / mem->nareas;
 
 	mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
 
 	spin_lock_init(&mem->lock);
+	for (i = 0; i < mem->nareas; i++) {
+		spin_lock_init(&mem->areas[i].lock);
+		mem->areas[i].index = 0;
+	}
+
 	for (i = 0; i < mem->nslabs; i++) {
 		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
@@ -274,7 +320,13 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
-	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false);
+	mem->areas = memblock_alloc(sizeof(struct io_tlb_area) *
+			    default_nareas, SMP_CACHE_BYTES);
+	if (!mem->areas)
+		panic("%s: Failed to allocate mem->areas.\n", __func__);
+
+	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags,
+				false, default_nareas);
 
 	if (flags & SWIOTLB_VERBOSE)
 		swiotlb_print_info();
@@ -296,7 +348,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
 	unsigned char *vstart = NULL;
-	unsigned int order;
+	unsigned int order, area_order;
 	bool retried = false;
 	int rc = 0;
 
@@ -337,19 +389,31 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 			(PAGE_SIZE << order) >> 20);
 	}
 
+	area_order = get_order(array_size(sizeof(*mem->areas),
+		default_nareas));
+	mem->areas = (struct io_tlb_area *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
+	if (!mem->areas)
+		goto error_area;
+
 	mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 		get_order(array_size(sizeof(*mem->slots), nslabs)));
-	if (!mem->slots) {
-		free_pages((unsigned long)vstart, order);
-		return -ENOMEM;
-	}
+	if (!mem->slots)
+		goto error_slots;
 
 	set_memory_decrypted((unsigned long)vstart,
 			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
-	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true);
+	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
+				default_nareas);
 
 	swiotlb_print_info();
 	return 0;
+
+error_slots:
+	free_pages((unsigned long)mem->areas, area_order);
+error_area:
+	free_pages((unsigned long)vstart, order);
+	return -ENOMEM;
 }
 
 void __init swiotlb_exit(void)
@@ -357,6 +421,7 @@ void __init swiotlb_exit(void)
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long tbl_vaddr;
 	size_t tbl_size, slots_size;
+	unsigned int area_order;
 
 	if (swiotlb_force_bounce)
 		return;
@@ -371,9 +436,14 @@ void __init swiotlb_exit(void)
 
 	set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
 	if (mem->late_alloc) {
+		area_order = get_order(array_size(sizeof(*mem->areas),
+			mem->nareas));
+		free_pages((unsigned long)mem->areas, area_order);
 		free_pages(tbl_vaddr, get_order(tbl_size));
 		free_pages((unsigned long)mem->slots, get_order(slots_size));
 	} else {
+		memblock_free_late(__pa(mem->areas),
+				   mem->nareas * sizeof(struct io_tlb_area));
 		memblock_free_late(mem->start, tbl_size);
 		memblock_free_late(__pa(mem->slots), slots_size);
 	}
@@ -476,9 +546,9 @@ static inline unsigned long get_max_slots(unsigned long boundary_mask)
 	return nr_slots(boundary_mask + 1);
 }
 
-static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
+static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
 {
-	if (index >= mem->nslabs)
+	if (index >= mem->area_nslabs)
 		return 0;
 	return index;
 }
@@ -487,10 +557,13 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
  * Find a suitable number of IO TLB entries size that will fit this request and
  * allocate a buffer from that IO TLB pool.
  */
-static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
-			      size_t alloc_size, unsigned int alloc_align_mask)
+static int swiotlb_do_find_slots(struct io_tlb_mem *mem,
+				 struct io_tlb_area *area,
+				 int area_index,
+				 struct device *dev, phys_addr_t orig_addr,
+				 size_t alloc_size,
+				 unsigned int alloc_align_mask)
 {
-	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
 	dma_addr_t tbl_dma_addr =
 		phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
@@ -501,8 +574,11 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	unsigned int index, wrap, count = 0, i;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
 	unsigned long flags;
+	unsigned int slot_base;
+	unsigned int slot_index;
 
 	BUG_ON(!nslots);
+	BUG_ON(area_index >= mem->nareas);
 
 	/*
 	 * For mappings with an alignment requirement don't bother looping to
@@ -514,16 +590,20 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
 	stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
-	spin_lock_irqsave(&mem->lock, flags);
-	if (unlikely(nslots > mem->nslabs - mem->used))
+	spin_lock_irqsave(&area->lock, flags);
+	if (unlikely(nslots > mem->area_nslabs - area->used))
 		goto not_found;
 
-	index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
+	slot_base = area_index * mem->area_nslabs;
+	index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+
 	do {
+		slot_index = slot_base + index;
+
 		if (orig_addr &&
-		    (slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
-			    (orig_addr & iotlb_align_mask)) {
-			index = wrap_index(mem, index + 1);
+		    (slot_addr(tbl_dma_addr, slot_index) &
+		     iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
+			index = wrap_area_index(mem, index + 1);
 			continue;
 		}
 
@@ -532,26 +612,26 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 		 * contiguous buffers, we allocate the buffers from that slot
 		 * and mark the entries as '0' indicating unavailable.
 		 */
-		if (!iommu_is_span_boundary(index, nslots,
+		if (!iommu_is_span_boundary(slot_index, nslots,
 					    nr_slots(tbl_dma_addr),
 					    max_slots)) {
-			if (mem->slots[index].list >= nslots)
+			if (mem->slots[slot_index].list >= nslots)
 				goto found;
 		}
-		index = wrap_index(mem, index + stride);
+		index = wrap_area_index(mem, index + stride);
 	} while (index != wrap);
 
 not_found:
-	spin_unlock_irqrestore(&mem->lock, flags);
+	spin_unlock_irqrestore(&area->lock, flags);
 	return -1;
 
 found:
-	for (i = index; i < index + nslots; i++) {
+	for (i = slot_index; i < slot_index + nslots; i++) {
 		mem->slots[i].list = 0;
 		mem->slots[i].alloc_size =
-			alloc_size - (offset + ((i - index) << IO_TLB_SHIFT));
+			alloc_size - (offset + ((i - slot_index) << IO_TLB_SHIFT));
 	}
-	for (i = index - 1;
+	for (i = slot_index - 1;
 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
 	     mem->slots[i].list; i--)
 		mem->slots[i].list = ++count;
@@ -559,14 +639,43 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	/*
 	 * Update the indices to avoid searching in the next round.
 	 */
-	if (index + nslots < mem->nslabs)
-		mem->index = index + nslots;
+	if (index + nslots < mem->area_nslabs)
+		area->index = index + nslots;
 	else
-		mem->index = 0;
-	mem->used += nslots;
+		area->index = 0;
+	area->used += nslots;
+	spin_unlock_irqrestore(&area->lock, flags);
+	return slot_index;
+}
 
-	spin_unlock_irqrestore(&mem->lock, flags);
-	return index;
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+			      size_t alloc_size, unsigned int alloc_align_mask)
+{
+	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+	int start = raw_smp_processor_id() & ((1U << __fls(mem->nareas)) - 1);
+	int i = start, index;
+
+	do {
+		index = swiotlb_do_find_slots(mem, mem->areas + i, i,
+					      dev, orig_addr, alloc_size,
+					      alloc_align_mask);
+		if (index >= 0)
+			return index;
+		if (++i >= mem->nareas)
+			i = 0;
+	} while (i != start);
+
+	return -1;
+}
+
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+	int i;
+	unsigned long used = 0;
+
+	for (i = 0; i < mem->nareas; i++)
+		used += mem->areas[i].used;
+	return used;
 }
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
@@ -598,7 +707,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
 	"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-				 alloc_size, mem->nslabs, mem->used);
+				 alloc_size, mem->nslabs, mem_used(mem));
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
@@ -628,6 +737,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
 	int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
 	int nslots = nr_slots(mem->slots[index].alloc_size + offset);
+	int aindex = index / mem->area_nslabs;
+	struct io_tlb_area *area = &mem->areas[aindex];
 	int count, i;
 
 	/*
@@ -636,7 +747,9 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	 * While returning the entries to the free list, we merge the entries
 	 * with slots below and above the pool being returned.
 	 */
-	spin_lock_irqsave(&mem->lock, flags);
+	BUG_ON(aindex >= mem->nareas);
+
+	spin_lock_irqsave(&area->lock, flags);
 	if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
 		count = mem->slots[index + nslots].list;
 	else
@@ -660,8 +773,8 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 	     io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
 	     i--)
 		mem->slots[i].list = ++count;
-	mem->used -= nslots;
-	spin_unlock_irqrestore(&mem->lock, flags);
+	area->used -= nslots;
+	spin_unlock_irqrestore(&area->lock, flags);
 }
 
 /*
@@ -759,12 +872,14 @@ EXPORT_SYMBOL_GPL(is_swiotlb_active);
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
 					 const char *dirname)
 {
+	unsigned long used = mem_used(mem);
+
 	mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
 	if (!mem->nslabs)
 		return;
 
 	debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
-	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
+	debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &used);
 }
 
 static int __init __maybe_unused swiotlb_create_default_debugfs(void)
@@ -815,6 +930,9 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 	struct io_tlb_mem *mem = rmem->priv;
 	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
 
+	/* Set Per-device io tlb area to one */
+	unsigned int nareas = 1;
+
 	/*
 	 * Since multiple devices can share the same pool, the private data,
 	 * io_tlb_mem struct, will be initialized by the first device attached
@@ -831,10 +949,18 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 			return -ENOMEM;
 		}
 
+		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
+				     GFP_KERNEL);
+		if (!mem->areas) {
+			kfree(mem);
+			kfree(mem->slots);
+			return -ENOMEM;
+		}
+
 		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
 				     rmem->size >> PAGE_SHIFT);
 		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
-				false);
+					false, nareas);
 		mem->for_alloc = true;
 
 		rmem->priv = mem;
-- 
2.25.1

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
  2022-06-17 14:47 ` Tianyu Lan
@ 2022-06-22 10:54   ` Christoph Hellwig
  -1 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2022-06-22 10:54 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: corbet, hch, m.szyprowski, robin.murphy, paulmck, akpm, bp, tglx,
	songmuchun, rdunlap, damien.lemoal, michael.h.kelley, kys,
	Tianyu Lan, iommu, linux-doc, linux-kernel, vkuznets, wei.liu,
	parri.andrea, thomas.lendacky, linux-hyperv, kirill.shutemov,
	andi.kleen, Andi Kleen

Thanks,

this looks pretty good to me.  A few comments below:

On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
> +/**
> + * struct io_tlb_area - IO TLB memory area descriptor
> + *
> + * This is a single area with a single lock.
> + *
> + * @used:	The number of used IO TLB block.
> + * @index:	The slot index to start searching in this area for next round.
> + * @lock:	The lock to protect the above data structures in the map and
> + *		unmap calls.
> + */
> +struct io_tlb_area {
> +	unsigned long used;
> +	unsigned int index;
> +	spinlock_t lock;
> +};

This can go into swiotlb.c.

> +void __init swiotlb_adjust_nareas(unsigned int nareas);

And this should be marked static.

> +#define DEFAULT_NUM_AREAS 1

I'd drop this define, the magic 1 and a > 1 comparism seems to
convey how it is used much better as the checks aren't about default
or not, but about larger than one.

I also think that we want some good way to size the default, e.g.
by number of CPUs or memory size.

> +void __init swiotlb_adjust_nareas(unsigned int nareas)
> +{
> +	if (!is_power_of_2(nareas)) {
> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
> +		return;
> +	}
> +
> +	default_nareas = nareas;
> +
> +	pr_info("area num %d.\n", nareas);
> +	/* Round up number of slabs to the next power of 2.
> +	 * The last area is going be smaller than the rest if
> +	 * default_nslabs is not power of two.
> +	 */

Please follow the normal kernel comment style with a /* on its own line.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-22 10:54   ` Christoph Hellwig
  0 siblings, 0 replies; 11+ messages in thread
From: Christoph Hellwig @ 2022-06-22 10:54 UTC (permalink / raw)
  To: Tianyu Lan
  Cc: linux-hyperv, linux-doc, kys, hch, wei.liu, Andi Kleen, corbet,
	damien.lemoal, michael.h.kelley, andi.kleen, bp, parri.andrea,
	thomas.lendacky, Tianyu Lan, paulmck, kirill.shutemov,
	songmuchun, tglx, akpm, rdunlap, linux-kernel, iommu, vkuznets,
	robin.murphy

Thanks,

this looks pretty good to me.  A few comments below:

On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
> +/**
> + * struct io_tlb_area - IO TLB memory area descriptor
> + *
> + * This is a single area with a single lock.
> + *
> + * @used:	The number of used IO TLB block.
> + * @index:	The slot index to start searching in this area for next round.
> + * @lock:	The lock to protect the above data structures in the map and
> + *		unmap calls.
> + */
> +struct io_tlb_area {
> +	unsigned long used;
> +	unsigned int index;
> +	spinlock_t lock;
> +};

This can go into swiotlb.c.

> +void __init swiotlb_adjust_nareas(unsigned int nareas);

And this should be marked static.

> +#define DEFAULT_NUM_AREAS 1

I'd drop this define, the magic 1 and a > 1 comparism seems to
convey how it is used much better as the checks aren't about default
or not, but about larger than one.

I also think that we want some good way to size the default, e.g.
by number of CPUs or memory size.

> +void __init swiotlb_adjust_nareas(unsigned int nareas)
> +{
> +	if (!is_power_of_2(nareas)) {
> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
> +		return;
> +	}
> +
> +	default_nareas = nareas;
> +
> +	pr_info("area num %d.\n", nareas);
> +	/* Round up number of slabs to the next power of 2.
> +	 * The last area is going be smaller than the rest if
> +	 * default_nslabs is not power of two.
> +	 */

Please follow the normal kernel comment style with a /* on its own line.

_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
  2022-06-22 10:54   ` Christoph Hellwig
@ 2022-06-22 12:55     ` Dongli Zhang
  -1 siblings, 0 replies; 11+ messages in thread
From: Dongli Zhang @ 2022-06-22 12:55 UTC (permalink / raw)
  To: Christoph Hellwig, Tianyu Lan
  Cc: linux-hyperv, linux-doc, kys, wei.liu, Andi Kleen, corbet,
	damien.lemoal, michael.h.kelley, andi.kleen, bp, parri.andrea,
	thomas.lendacky, Tianyu Lan, paulmck, kirill.shutemov,
	songmuchun, tglx, akpm, rdunlap, linux-kernel, iommu, vkuznets,
	robin.murphy

I will build the next RFC version of 64-bit swiotlb on top of this patch (or
next version of this patch), so that it will render a more finalized view of
32-bt/64-bit plus multiple area.

Thank you very much!

Dongli Zhang

On 6/22/22 3:54 AM, Christoph Hellwig wrote:
> Thanks,
> 
> this looks pretty good to me.  A few comments below:
> 
> On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
>> +/**
>> + * struct io_tlb_area - IO TLB memory area descriptor
>> + *
>> + * This is a single area with a single lock.
>> + *
>> + * @used:	The number of used IO TLB block.
>> + * @index:	The slot index to start searching in this area for next round.
>> + * @lock:	The lock to protect the above data structures in the map and
>> + *		unmap calls.
>> + */
>> +struct io_tlb_area {
>> +	unsigned long used;
>> +	unsigned int index;
>> +	spinlock_t lock;
>> +};
> 
> This can go into swiotlb.c.
> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas);
> 
> And this should be marked static.
> 
>> +#define DEFAULT_NUM_AREAS 1
> 
> I'd drop this define, the magic 1 and a > 1 comparism seems to
> convey how it is used much better as the checks aren't about default
> or not, but about larger than one.
> 
> I also think that we want some good way to size the default, e.g.
> by number of CPUs or memory size.
> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas)
>> +{
>> +	if (!is_power_of_2(nareas)) {
>> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
>> +		return;
>> +	}
>> +
>> +	default_nareas = nareas;
>> +
>> +	pr_info("area num %d.\n", nareas);
>> +	/* Round up number of slabs to the next power of 2.
>> +	 * The last area is going be smaller than the rest if
>> +	 * default_nslabs is not power of two.
>> +	 */
> 
> Please follow the normal kernel comment style with a /* on its own line.
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://urldefense.com/v3/__https://lists.linuxfoundation.org/mailman/listinfo/iommu__;!!ACWV5N9M2RV99hQ!Jd_DYgd6_uOF8IPr8h1tratEG51zFXtwVpaPa_OW3AEJlWe8gOnmA_fGOdaFUfsVcj1sT5oYw2j4vacY$ 
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-22 12:55     ` Dongli Zhang
  0 siblings, 0 replies; 11+ messages in thread
From: Dongli Zhang @ 2022-06-22 12:55 UTC (permalink / raw)
  To: Christoph Hellwig, Tianyu Lan
  Cc: linux-hyperv, linux-doc, kys, wei.liu, Andi Kleen, corbet,
	damien.lemoal, michael.h.kelley, andi.kleen, bp, parri.andrea,
	thomas.lendacky, Tianyu Lan, paulmck, kirill.shutemov,
	songmuchun, tglx, akpm, rdunlap, linux-kernel, iommu, vkuznets,
	robin.murphy

I will build the next RFC version of 64-bit swiotlb on top of this patch (or
next version of this patch), so that it will render a more finalized view of
32-bt/64-bit plus multiple area.

Thank you very much!

Dongli Zhang

On 6/22/22 3:54 AM, Christoph Hellwig wrote:
> Thanks,
> 
> this looks pretty good to me.  A few comments below:
> 
> On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
>> +/**
>> + * struct io_tlb_area - IO TLB memory area descriptor
>> + *
>> + * This is a single area with a single lock.
>> + *
>> + * @used:	The number of used IO TLB block.
>> + * @index:	The slot index to start searching in this area for next round.
>> + * @lock:	The lock to protect the above data structures in the map and
>> + *		unmap calls.
>> + */
>> +struct io_tlb_area {
>> +	unsigned long used;
>> +	unsigned int index;
>> +	spinlock_t lock;
>> +};
> 
> This can go into swiotlb.c.
> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas);
> 
> And this should be marked static.
> 
>> +#define DEFAULT_NUM_AREAS 1
> 
> I'd drop this define, the magic 1 and a > 1 comparism seems to
> convey how it is used much better as the checks aren't about default
> or not, but about larger than one.
> 
> I also think that we want some good way to size the default, e.g.
> by number of CPUs or memory size.
> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas)
>> +{
>> +	if (!is_power_of_2(nareas)) {
>> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
>> +		return;
>> +	}
>> +
>> +	default_nareas = nareas;
>> +
>> +	pr_info("area num %d.\n", nareas);
>> +	/* Round up number of slabs to the next power of 2.
>> +	 * The last area is going be smaller than the rest if
>> +	 * default_nslabs is not power of two.
>> +	 */
> 
> Please follow the normal kernel comment style with a /* on its own line.
> 
> _______________________________________________
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://urldefense.com/v3/__https://lists.linuxfoundation.org/mailman/listinfo/iommu__;!!ACWV5N9M2RV99hQ!Jd_DYgd6_uOF8IPr8h1tratEG51zFXtwVpaPa_OW3AEJlWe8gOnmA_fGOdaFUfsVcj1sT5oYw2j4vacY$ 
> 
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
  2022-06-22 10:54   ` Christoph Hellwig
@ 2022-06-23 14:48     ` Tianyu Lan
  -1 siblings, 0 replies; 11+ messages in thread
From: Tianyu Lan @ 2022-06-23 14:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: corbet, m.szyprowski, robin.murphy, paulmck, akpm, bp, tglx,
	songmuchun, rdunlap, damien.lemoal, michael.h.kelley, kys,
	Tianyu Lan, iommu, linux-doc, linux-kernel, vkuznets, wei.liu,
	parri.andrea, thomas.lendacky, linux-hyperv, kirill.shutemov,
	andi.kleen, Andi Kleen

On 6/22/2022 6:54 PM, Christoph Hellwig wrote:
> Thanks,
> 
> this looks pretty good to me.  A few comments below:
> 

Thanks for your review.

> On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
>> +/**
>> + * struct io_tlb_area - IO TLB memory area descriptor
>> + *
>> + * This is a single area with a single lock.
>> + *
>> + * @used:	The number of used IO TLB block.
>> + * @index:	The slot index to start searching in this area for next round.
>> + * @lock:	The lock to protect the above data structures in the map and
>> + *		unmap calls.
>> + */
>> +struct io_tlb_area {
>> +	unsigned long used;
>> +	unsigned int index;
>> +	spinlock_t lock;
>> +};
> 
> This can go into swiotlb.c.

struct io_tlb_area is used in the struct io_tlb_mem.

> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas);
> 
> And this should be marked static.
> 
>> +#define DEFAULT_NUM_AREAS 1
> 
> I'd drop this define, the magic 1 and a > 1 comparism seems to
> convey how it is used much better as the checks aren't about default
> or not, but about larger than one.
> 
> I also think that we want some good way to size the default, e.g.
> by number of CPUs or memory size.

swiotlb_adjust_nareas() is exposed to platforms to set area number.
When swiotlb_init() is called, smp_init() isn't called at that point and
so standard API of checking cpu number (e.g, num_online_cpus()) doesn't
work. Platforms may have other ways to get cpu number(e.g x86 may ACPI
MADT table entries to get cpu nubmer) and set area number. I will post 
following patch to set cpu number via swiotlb_adjust_nareas(),

> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas)
>> +{
>> +	if (!is_power_of_2(nareas)) {
>> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
>> +		return;
>> +	}
>> +
>> +	default_nareas = nareas;
>> +
>> +	pr_info("area num %d.\n", nareas);
>> +	/* Round up number of slabs to the next power of 2.
>> +	 * The last area is going be smaller than the rest if
>> +	 * default_nslabs is not power of two.
>> +	 */
> 
> Please follow the normal kernel comment style with a /* on its own line.
> 

OK. Will update.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-23 14:48     ` Tianyu Lan
  0 siblings, 0 replies; 11+ messages in thread
From: Tianyu Lan @ 2022-06-23 14:48 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-hyperv, linux-doc, kys, wei.liu, Andi Kleen, corbet,
	damien.lemoal, michael.h.kelley, andi.kleen, bp, parri.andrea,
	thomas.lendacky, Tianyu Lan, paulmck, kirill.shutemov,
	songmuchun, tglx, akpm, rdunlap, linux-kernel, iommu, vkuznets,
	robin.murphy

On 6/22/2022 6:54 PM, Christoph Hellwig wrote:
> Thanks,
> 
> this looks pretty good to me.  A few comments below:
> 

Thanks for your review.

> On Fri, Jun 17, 2022 at 10:47:41AM -0400, Tianyu Lan wrote:
>> +/**
>> + * struct io_tlb_area - IO TLB memory area descriptor
>> + *
>> + * This is a single area with a single lock.
>> + *
>> + * @used:	The number of used IO TLB block.
>> + * @index:	The slot index to start searching in this area for next round.
>> + * @lock:	The lock to protect the above data structures in the map and
>> + *		unmap calls.
>> + */
>> +struct io_tlb_area {
>> +	unsigned long used;
>> +	unsigned int index;
>> +	spinlock_t lock;
>> +};
> 
> This can go into swiotlb.c.

struct io_tlb_area is used in the struct io_tlb_mem.

> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas);
> 
> And this should be marked static.
> 
>> +#define DEFAULT_NUM_AREAS 1
> 
> I'd drop this define, the magic 1 and a > 1 comparism seems to
> convey how it is used much better as the checks aren't about default
> or not, but about larger than one.
> 
> I also think that we want some good way to size the default, e.g.
> by number of CPUs or memory size.

swiotlb_adjust_nareas() is exposed to platforms to set area number.
When swiotlb_init() is called, smp_init() isn't called at that point and
so standard API of checking cpu number (e.g, num_online_cpus()) doesn't
work. Platforms may have other ways to get cpu number(e.g x86 may ACPI
MADT table entries to get cpu nubmer) and set area number. I will post 
following patch to set cpu number via swiotlb_adjust_nareas(),

> 
>> +void __init swiotlb_adjust_nareas(unsigned int nareas)
>> +{
>> +	if (!is_power_of_2(nareas)) {
>> +		pr_err("swiotlb: Invalid areas parameter %d.\n", nareas);
>> +		return;
>> +	}
>> +
>> +	default_nareas = nareas;
>> +
>> +	pr_info("area num %d.\n", nareas);
>> +	/* Round up number of slabs to the next power of 2.
>> +	 * The last area is going be smaller than the rest if
>> +	 * default_nslabs is not power of two.
>> +	 */
> 
> Please follow the normal kernel comment style with a /* on its own line.
> 

OK. Will update.
_______________________________________________
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
  2022-06-17 14:47 ` Tianyu Lan
@ 2022-06-29 16:13 ` Dan Carpenter
  -1 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2022-06-28 22:18 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 5133 bytes --]

CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220617144741.921308-1-ltykernel@gmail.com>
References: <20220617144741.921308-1-ltykernel@gmail.com>
TO: Tianyu Lan <ltykernel@gmail.com>

Hi Tianyu,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on linus/master v5.19-rc4 next-20220628]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Tianyu-Lan/swiotlb-Split-up-single-swiotlb-lock/20220618-010819
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
:::::: branch date: 11 days ago
:::::: commit date: 11 days ago
config: x86_64-randconfig-m001-20220627 (https://download.01.org/0day-ci/archive/20220629/202206290648.EDU4wqJI-lkp(a)intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
kernel/dma/swiotlb.c:956 rmem_swiotlb_device_init() error: dereferencing freed memory 'mem'

vim +/mem +956 kernel/dma/swiotlb.c

f4111e39a52aa5d Claire Chang      2021-06-19  926  
0b84e4f8b793eb4 Claire Chang      2021-06-19  927  static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
0b84e4f8b793eb4 Claire Chang      2021-06-19  928  				    struct device *dev)
0b84e4f8b793eb4 Claire Chang      2021-06-19  929  {
0b84e4f8b793eb4 Claire Chang      2021-06-19  930  	struct io_tlb_mem *mem = rmem->priv;
0b84e4f8b793eb4 Claire Chang      2021-06-19  931  	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
0b84e4f8b793eb4 Claire Chang      2021-06-19  932  
47bb420a6670ad3 Tianyu Lan        2022-06-17  933  	/* Set Per-device io tlb area to one */
47bb420a6670ad3 Tianyu Lan        2022-06-17  934  	unsigned int nareas = 1;
47bb420a6670ad3 Tianyu Lan        2022-06-17  935  
0b84e4f8b793eb4 Claire Chang      2021-06-19  936  	/*
0b84e4f8b793eb4 Claire Chang      2021-06-19  937  	 * Since multiple devices can share the same pool, the private data,
0b84e4f8b793eb4 Claire Chang      2021-06-19  938  	 * io_tlb_mem struct, will be initialized by the first device attached
0b84e4f8b793eb4 Claire Chang      2021-06-19  939  	 * to it.
0b84e4f8b793eb4 Claire Chang      2021-06-19  940  	 */
0b84e4f8b793eb4 Claire Chang      2021-06-19  941  	if (!mem) {
463e862ac63ef27 Will Deacon       2021-07-20  942  		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
0b84e4f8b793eb4 Claire Chang      2021-06-19  943  		if (!mem)
0b84e4f8b793eb4 Claire Chang      2021-06-19  944  			return -ENOMEM;
0b84e4f8b793eb4 Claire Chang      2021-06-19  945  
404f9373c4e5c94 Robin Murphy      2022-01-24  946  		mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
463e862ac63ef27 Will Deacon       2021-07-20  947  		if (!mem->slots) {
463e862ac63ef27 Will Deacon       2021-07-20  948  			kfree(mem);
463e862ac63ef27 Will Deacon       2021-07-20  949  			return -ENOMEM;
463e862ac63ef27 Will Deacon       2021-07-20  950  		}
463e862ac63ef27 Will Deacon       2021-07-20  951  
47bb420a6670ad3 Tianyu Lan        2022-06-17  952  		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
47bb420a6670ad3 Tianyu Lan        2022-06-17  953  				     GFP_KERNEL);
47bb420a6670ad3 Tianyu Lan        2022-06-17  954  		if (!mem->areas) {
47bb420a6670ad3 Tianyu Lan        2022-06-17  955  			kfree(mem);
47bb420a6670ad3 Tianyu Lan        2022-06-17 @956  			kfree(mem->slots);
47bb420a6670ad3 Tianyu Lan        2022-06-17  957  			return -ENOMEM;
47bb420a6670ad3 Tianyu Lan        2022-06-17  958  		}
47bb420a6670ad3 Tianyu Lan        2022-06-17  959  
0b84e4f8b793eb4 Claire Chang      2021-06-19  960  		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
0b84e4f8b793eb4 Claire Chang      2021-06-19  961  				     rmem->size >> PAGE_SHIFT);
e15db62bc5648ab Christoph Hellwig 2022-06-01  962  		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
47bb420a6670ad3 Tianyu Lan        2022-06-17  963  					false, nareas);
0b84e4f8b793eb4 Claire Chang      2021-06-19  964  		mem->for_alloc = true;
0b84e4f8b793eb4 Claire Chang      2021-06-19  965  
0b84e4f8b793eb4 Claire Chang      2021-06-19  966  		rmem->priv = mem;
0b84e4f8b793eb4 Claire Chang      2021-06-19  967  
35265899acef135 Robin Murphy      2022-01-24  968  		swiotlb_create_debugfs_files(mem, rmem->name);
0b84e4f8b793eb4 Claire Chang      2021-06-19  969  	}
0b84e4f8b793eb4 Claire Chang      2021-06-19  970  
0b84e4f8b793eb4 Claire Chang      2021-06-19  971  	dev->dma_io_tlb_mem = mem;
0b84e4f8b793eb4 Claire Chang      2021-06-19  972  
0b84e4f8b793eb4 Claire Chang      2021-06-19  973  	return 0;
0b84e4f8b793eb4 Claire Chang      2021-06-19  974  }
0b84e4f8b793eb4 Claire Chang      2021-06-19  975  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-29 16:13 ` Dan Carpenter
  0 siblings, 0 replies; 11+ messages in thread
From: Dan Carpenter @ 2022-06-29 16:13 UTC (permalink / raw)
  To: kbuild-all

[-- Attachment #1: Type: text/plain, Size: 4597 bytes --]

Hi Tianyu,

url:    https://github.com/intel-lab-lkp/linux/commits/Tianyu-Lan/swiotlb-Split-up-single-swiotlb-lock/20220618-010819
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
config: x86_64-randconfig-m001-20220627 (https://download.01.org/0day-ci/archive/20220629/202206290648.EDU4wqJI-lkp(a)intel.com/config)
compiler: gcc-11 (Debian 11.3.0-3) 11.3.0

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>

smatch warnings:
kernel/dma/swiotlb.c:956 rmem_swiotlb_device_init() error: dereferencing freed memory 'mem'

vim +/mem +956 kernel/dma/swiotlb.c

0b84e4f8b793eb4 Claire Chang      2021-06-19  927  static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
0b84e4f8b793eb4 Claire Chang      2021-06-19  928  				    struct device *dev)
0b84e4f8b793eb4 Claire Chang      2021-06-19  929  {
0b84e4f8b793eb4 Claire Chang      2021-06-19  930  	struct io_tlb_mem *mem = rmem->priv;
0b84e4f8b793eb4 Claire Chang      2021-06-19  931  	unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
0b84e4f8b793eb4 Claire Chang      2021-06-19  932  
47bb420a6670ad3 Tianyu Lan        2022-06-17  933  	/* Set Per-device io tlb area to one */
47bb420a6670ad3 Tianyu Lan        2022-06-17  934  	unsigned int nareas = 1;
47bb420a6670ad3 Tianyu Lan        2022-06-17  935  
0b84e4f8b793eb4 Claire Chang      2021-06-19  936  	/*
0b84e4f8b793eb4 Claire Chang      2021-06-19  937  	 * Since multiple devices can share the same pool, the private data,
0b84e4f8b793eb4 Claire Chang      2021-06-19  938  	 * io_tlb_mem struct, will be initialized by the first device attached
0b84e4f8b793eb4 Claire Chang      2021-06-19  939  	 * to it.
0b84e4f8b793eb4 Claire Chang      2021-06-19  940  	 */
0b84e4f8b793eb4 Claire Chang      2021-06-19  941  	if (!mem) {
463e862ac63ef27 Will Deacon       2021-07-20  942  		mem = kzalloc(sizeof(*mem), GFP_KERNEL);
0b84e4f8b793eb4 Claire Chang      2021-06-19  943  		if (!mem)
0b84e4f8b793eb4 Claire Chang      2021-06-19  944  			return -ENOMEM;
0b84e4f8b793eb4 Claire Chang      2021-06-19  945  
404f9373c4e5c94 Robin Murphy      2022-01-24  946  		mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
463e862ac63ef27 Will Deacon       2021-07-20  947  		if (!mem->slots) {
463e862ac63ef27 Will Deacon       2021-07-20  948  			kfree(mem);
463e862ac63ef27 Will Deacon       2021-07-20  949  			return -ENOMEM;
463e862ac63ef27 Will Deacon       2021-07-20  950  		}
463e862ac63ef27 Will Deacon       2021-07-20  951  
47bb420a6670ad3 Tianyu Lan        2022-06-17  952  		mem->areas = kcalloc(nareas, sizeof(*mem->areas),
47bb420a6670ad3 Tianyu Lan        2022-06-17  953  				     GFP_KERNEL);
47bb420a6670ad3 Tianyu Lan        2022-06-17  954  		if (!mem->areas) {
47bb420a6670ad3 Tianyu Lan        2022-06-17  955  			kfree(mem);
                                                                              ^^^^
47bb420a6670ad3 Tianyu Lan        2022-06-17 @956  			kfree(mem->slots);
                                                                              ^^^^^^^^^^
This free needs to be done first.

47bb420a6670ad3 Tianyu Lan        2022-06-17  957  			return -ENOMEM;
47bb420a6670ad3 Tianyu Lan        2022-06-17  958  		}
47bb420a6670ad3 Tianyu Lan        2022-06-17  959  
0b84e4f8b793eb4 Claire Chang      2021-06-19  960  		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
0b84e4f8b793eb4 Claire Chang      2021-06-19  961  				     rmem->size >> PAGE_SHIFT);
e15db62bc5648ab Christoph Hellwig 2022-06-01  962  		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
47bb420a6670ad3 Tianyu Lan        2022-06-17  963  					false, nareas);
0b84e4f8b793eb4 Claire Chang      2021-06-19  964  		mem->for_alloc = true;
0b84e4f8b793eb4 Claire Chang      2021-06-19  965  
0b84e4f8b793eb4 Claire Chang      2021-06-19  966  		rmem->priv = mem;
0b84e4f8b793eb4 Claire Chang      2021-06-19  967  
35265899acef135 Robin Murphy      2022-01-24  968  		swiotlb_create_debugfs_files(mem, rmem->name);
0b84e4f8b793eb4 Claire Chang      2021-06-19  969  	}
0b84e4f8b793eb4 Claire Chang      2021-06-19  970  
0b84e4f8b793eb4 Claire Chang      2021-06-19  971  	dev->dma_io_tlb_mem = mem;
0b84e4f8b793eb4 Claire Chang      2021-06-19  972  
0b84e4f8b793eb4 Claire Chang      2021-06-19  973  	return 0;
0b84e4f8b793eb4 Claire Chang      2021-06-19  974  }

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock
@ 2022-06-19  5:55 kernel test robot
  0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2022-06-19  5:55 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 19132 bytes --]

:::::: 
:::::: Manual check reason: "low confidence static check warning: kernel/dma/swiotlb.c:655:14: warning: Dereference of null pointer [clang-analyzer-core.NullDereference]"
:::::: 

CC: llvm(a)lists.linux.dev
CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220617144741.921308-1-ltykernel@gmail.com>
References: <20220617144741.921308-1-ltykernel@gmail.com>
TO: Tianyu Lan <ltykernel@gmail.com>

Hi Tianyu,

[FYI, it's a private test report for your RFC patch.]
[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on linus/master v5.19-rc2 next-20220617]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Tianyu-Lan/swiotlb-Split-up-single-swiotlb-lock/20220618-010819
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
:::::: branch date: 2 days ago
:::::: commit date: 2 days ago
config: s390-randconfig-c005-20220617 (https://download.01.org/0day-ci/archive/20220619/202206191339.ynzj6UiE-lkp(a)intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 91688716ba49942051dccdf7b9c4f81a7ec8feaf)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install s390 cross compiling tool for clang build
        # apt-get install binutils-s390x-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/47bb420a6670ad36f2bec2dd442d79f17453186d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Tianyu-Lan/swiotlb-Split-up-single-swiotlb-lock/20220618-010819
        git checkout 47bb420a6670ad36f2bec2dd442d79f17453186d
        # save the config file
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=s390 clang-analyzer 

If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>


clang-analyzer warnings: (new ones prefixed by >>)
           ^
   include/asm-generic/bug.h:71:27: note: expanded from macro 'BUG_ON'
   #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
                             ^
   kernel/dma/swiotlb.c:752:2: note: Calling '__raw_spin_lock_irqsave'
           spin_lock_irqsave(&area->lock, flags);
           ^
   include/linux/spinlock.h:379:2: note: expanded from macro 'spin_lock_irqsave'
           raw_spin_lock_irqsave(spinlock_check(lock), flags);     \
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/spinlock.h:242:11: note: expanded from macro 'raw_spin_lock_irqsave'
                   flags = _raw_spin_lock_irqsave(lock);   \
                           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/spinlock_api_smp.h:59:38: note: expanded from macro '_raw_spin_lock_irqsave'
   #define _raw_spin_lock_irqsave(lock) __raw_spin_lock_irqsave(lock)
                                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/spinlock_api_smp.h:108:2: note: Loop condition is false.  Exiting loop
           local_irq_save(flags);
           ^
   include/linux/irqflags.h:244:36: note: expanded from macro 'local_irq_save'
   #define local_irq_save(flags)   do { raw_local_irq_save(flags); } while (0)
                                        ^
   include/linux/irqflags.h:176:2: note: expanded from macro 'raw_local_irq_save'
           do {                                            \
           ^
   include/linux/spinlock_api_smp.h:108:2: note: Loop condition is false.  Exiting loop
           local_irq_save(flags);
           ^
   include/linux/irqflags.h:244:31: note: expanded from macro 'local_irq_save'
   #define local_irq_save(flags)   do { raw_local_irq_save(flags); } while (0)
                                   ^
   include/linux/spinlock_api_smp.h:110:2: note: Loop condition is false.  Exiting loop
           spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
           ^
   include/linux/lockdep.h:522:35: note: expanded from macro 'spin_acquire'
   #define spin_acquire(l, s, t, i)                lock_acquire_exclusive(l, s, t, NULL, i)
                                                   ^
   include/linux/lockdep.h:518:48: note: expanded from macro 'lock_acquire_exclusive'
   #define lock_acquire_exclusive(l, s, t, n, i)           lock_acquire(l, s, t, 0, 1, n, i)
                                                           ^
   include/linux/lockdep.h:356:44: note: expanded from macro 'lock_acquire'
   # define lock_acquire(l, s, t, r, c, n, i)      do { } while (0)
                                                   ^
   include/linux/spinlock_api_smp.h:111:44: note: Calling 'do_raw_spin_lock'
           LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
                                                     ^
   include/linux/lockdep.h:477:2: note: expanded from macro 'LOCK_CONTENDED'
           lock(_lock)
           ^~~~~~~~~~~
   include/linux/spinlock.h:185:2: note: Calling 'arch_spin_lock'
           arch_spin_lock(&lock->raw_lock);
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/s390/include/asm/spinlock.h:66:7: note: Calling 'arch_spin_trylock_once'
           if (!arch_spin_trylock_once(lp))
                ^~~~~~~~~~~~~~~~~~~~~~~~~~
   arch/s390/include/asm/spinlock.h:61:52: note: Dereference of null pointer
           return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
                                                             ^
   arch/s390/include/asm/spinlock.h:19:26: note: expanded from macro 'SPINLOCK_LOCKVAL'
   #define SPINLOCK_LOCKVAL (S390_lowcore.spinlock_lockval)
                            ^
   include/linux/compiler.h:77:40: note: expanded from macro 'likely'
   # define likely(x)      __builtin_expect(!!(x), 1)
                                               ^
   kernel/dma/swiotlb.c:268:2: warning: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
           memset(vaddr, 0, bytes);
           ^~~~~~
   kernel/dma/swiotlb.c:268:2: note: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11
           memset(vaddr, 0, bytes);
           ^~~~~~
   kernel/dma/swiotlb.c:451:2: warning: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
           memset(mem, 0, sizeof(*mem));
           ^~~~~~
   kernel/dma/swiotlb.c:451:2: note: Call to function 'memset' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memset_s' in case of C11
           memset(mem, 0, sizeof(*mem));
           ^~~~~~
   kernel/dma/swiotlb.c:519:5: warning: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
                                   memcpy(vaddr, buffer + offset, sz);
                                   ^~~~~~
   kernel/dma/swiotlb.c:519:5: note: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11
                                   memcpy(vaddr, buffer + offset, sz);
                                   ^~~~~~
   kernel/dma/swiotlb.c:521:5: warning: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
                                   memcpy(buffer + offset, vaddr, sz);
                                   ^~~~~~
   kernel/dma/swiotlb.c:521:5: note: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11
                                   memcpy(buffer + offset, vaddr, sz);
                                   ^~~~~~
   kernel/dma/swiotlb.c:531:3: warning: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
                   memcpy(vaddr, phys_to_virt(orig_addr), size);
                   ^~~~~~
   kernel/dma/swiotlb.c:531:3: note: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11
                   memcpy(vaddr, phys_to_virt(orig_addr), size);
                   ^~~~~~
   kernel/dma/swiotlb.c:533:3: warning: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11 [clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling]
                   memcpy(phys_to_virt(orig_addr), vaddr, size);
                   ^~~~~~
   kernel/dma/swiotlb.c:533:3: note: Call to function 'memcpy' is insecure as it does not provide security checks introduced in the C11 standard. Replace with analogous functions that support length arguments or provides boundary checks such as 'memcpy_s' in case of C11
                   memcpy(phys_to_virt(orig_addr), vaddr, size);
                   ^~~~~~
>> kernel/dma/swiotlb.c:655:14: warning: Dereference of null pointer [clang-analyzer-core.NullDereference]
           int start = raw_smp_processor_id() & ((1U << __fls(mem->nareas)) - 1);
                       ^
   arch/s390/include/asm/smp.h:14:32: note: expanded from macro 'raw_smp_processor_id'
   #define raw_smp_processor_id()  (S390_lowcore.cpu_nr)
                                   ^
   kernel/dma/swiotlb.c:827:17: note: Calling 'swiotlb_tbl_map_single'
           swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
                          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/dma/swiotlb.c:692:6: note: Assuming 'mem' is non-null
           if (!mem)
               ^~~~
   kernel/dma/swiotlb.c:692:2: note: Taking false branch
           if (!mem)
           ^
   kernel/dma/swiotlb.c:695:2: note: Taking false branch
           if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
           ^
   kernel/dma/swiotlb.c:698:6: note: 'mapping_size' is <= 'alloc_size'
           if (mapping_size > alloc_size) {
               ^~~~~~~~~~~~
   kernel/dma/swiotlb.c:698:2: note: Taking false branch
           if (mapping_size > alloc_size) {
           ^
   kernel/dma/swiotlb.c:704:10: note: Calling 'swiotlb_find_slots'
           index = swiotlb_find_slots(dev, orig_addr,
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/dma/swiotlb.c:655:14: note: Dereference of null pointer
           int start = raw_smp_processor_id() & ((1U << __fls(mem->nareas)) - 1);
                       ^
   arch/s390/include/asm/smp.h:14:32: note: expanded from macro 'raw_smp_processor_id'
   #define raw_smp_processor_id()  (S390_lowcore.cpu_nr)
                                   ^~~~~~~~~~~~~~~~~~~~~
   Suppressed 60 warnings (48 in non-user code, 12 with check filters).
   Use -header-filter=.* to display errors from all non-system headers. Use -system-headers to display errors from system headers as well.
   108 warnings generated.
   arch/s390/include/asm/preempt.h:17:9: warning: Dereference of null pointer [clang-analyzer-core.NullDereference]
           return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
                  ^
   include/asm-generic/rwonce.h:50:2: note: expanded from macro 'READ_ONCE'
           __READ_ONCE(x);                                                 \
           ^
   include/asm-generic/rwonce.h:44:24: note: expanded from macro '__READ_ONCE'
   #define __READ_ONCE(x)  (*(const volatile __unqual_scalar_typeof(x) *)&(x))
                           ^
   init/main.c:1418:2: note: Loop condition is true.  Entering loop body
           for (fn = __initcall_start; fn < __initcall0_start; fn++)
           ^
   init/main.c:1419:3: note: Calling 'do_one_initcall'
                   do_one_initcall(initcall_from_entry(fn));
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   init/main.c:1292:14: note: Calling 'preempt_count'
           int count = preempt_count();
                       ^~~~~~~~~~~~~~~
   arch/s390/include/asm/preempt.h:17:9: note: Left side of '||' is false
           return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
                  ^
   include/asm-generic/rwonce.h:49:2: note: expanded from macro 'READ_ONCE'
           compiletime_assert_rwonce_type(x);                              \
           ^
   include/asm-generic/rwonce.h:36:21: note: expanded from macro 'compiletime_assert_rwonce_type'
           compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
                              ^
   include/linux/compiler_types.h:319:3: note: expanded from macro '__native_word'
           (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
            ^
   arch/s390/include/asm/preempt.h:17:9: note: Left side of '||' is false
           return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
                  ^
   include/asm-generic/rwonce.h:49:2: note: expanded from macro 'READ_ONCE'
           compiletime_assert_rwonce_type(x);                              \
           ^
   include/asm-generic/rwonce.h:36:21: note: expanded from macro 'compiletime_assert_rwonce_type'
           compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
                              ^
   include/linux/compiler_types.h:319:3: note: expanded from macro '__native_word'
           (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
            ^
   arch/s390/include/asm/preempt.h:17:9: note: Left side of '||' is true
           return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
                  ^
   include/asm-generic/rwonce.h:49:2: note: expanded from macro 'READ_ONCE'
           compiletime_assert_rwonce_type(x);                              \
           ^
   include/asm-generic/rwonce.h:36:21: note: expanded from macro 'compiletime_assert_rwonce_type'
           compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
                              ^
   include/linux/compiler_types.h:320:28: note: expanded from macro '__native_word'
            sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
                                     ^
   arch/s390/include/asm/preempt.h:17:9: note: Taking false branch
           return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
                  ^
   include/asm-generic/rwonce.h:49:2: note: expanded from macro 'READ_ONCE'
           compiletime_assert_rwonce_type(x);                              \
           ^
   include/asm-generic/rwonce.h:36:2: note: expanded from macro 'compiletime_assert_rwonce_type'
           compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long),  \
           ^
   include/linux/compiler_types.h:352:2: note: expanded from macro 'compiletime_assert'
           _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)

vim +655 kernel/dma/swiotlb.c

^1da177e4c3f41 arch/ia64/lib/swiotlb.c Linus Torvalds    2005-04-16  650  
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  651  static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  652  			      size_t alloc_size, unsigned int alloc_align_mask)
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  653  {
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  654  	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17 @655  	int start = raw_smp_processor_id() & ((1U << __fls(mem->nareas)) - 1);
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  656  	int i = start, index;
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  657  
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  658  	do {
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  659  		index = swiotlb_do_find_slots(mem, mem->areas + i, i,
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  660  					      dev, orig_addr, alloc_size,
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  661  					      alloc_align_mask);
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  662  		if (index >= 0)
26a7e094783d48 kernel/dma/swiotlb.c    Christoph Hellwig 2021-02-04  663  			return index;
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  664  		if (++i >= mem->nareas)
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  665  			i = 0;
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  666  	} while (i != start);
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  667  
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  668  	return -1;
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  669  }
47bb420a6670ad kernel/dma/swiotlb.c    Tianyu Lan        2022-06-17  670  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-06-29 16:13 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-28 22:18 [RFC PATCH V4 1/1] swiotlb: Split up single swiotlb lock kernel test robot
2022-06-29 16:13 ` Dan Carpenter
  -- strict thread matches above, loose matches on Subject: below --
2022-06-19  5:55 kernel test robot
2022-06-17 14:47 Tianyu Lan
2022-06-17 14:47 ` Tianyu Lan
2022-06-22 10:54 ` Christoph Hellwig
2022-06-22 10:54   ` Christoph Hellwig
2022-06-22 12:55   ` Dongli Zhang
2022-06-22 12:55     ` Dongli Zhang
2022-06-23 14:48   ` Tianyu Lan
2022-06-23 14:48     ` Tianyu Lan

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.