[PATCH] swiotlb: Validate bounce size in the sync/unmap path

From: Martin Radev <martin.b.radev@gmail.com>
To: konrad.wilk@oracle.com, hch@lst.de, m.szyprowski@samsung.com,
	robin.murphy@arm.com, iommu@lists.linux-foundation.org,
	linux-kernel@vger.kernel.org, joro@8bytes.org,
	kirill.shutemov@linux.intel.com, thomas.lendacky@amd.com
Cc: robert.buhren@sect.tu-berlin.de, file@sect.tu-berlin.de,
	mathias.morbitzer@aisec.fraunhofer.de
Subject: [PATCH] swiotlb: Validate bounce size in the sync/unmap path
Date: Tue, 12 Jan 2021 16:07:29 +0100	[thread overview]
Message-ID: <X/27MSbfDGCY9WZu@martin> (raw)

The size of the buffer being bounced is not checked if it happens
to be larger than the size of the mapped buffer. Because the size
can be controlled by a device, as it's the case with virtio devices,
this can lead to memory corruption.

This patch saves the remaining buffer memory for each slab and uses
that information for validation in the sync/unmap paths before
swiotlb_bounce is called.

Validating this argument is important under the threat models of
AMD SEV-SNP and Intel TDX, where the HV is considered untrusted.

Signed-off-by: Martin Radev <martin.b.radev@gmail.com>
---
 kernel/dma/swiotlb.c | 52 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..98d79103aa1f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -102,6 +102,11 @@ static unsigned int max_segment;
 #define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 static phys_addr_t *io_tlb_orig_addr;
 
+/*
+ * The mapped buffer's size should be validated during a sync operation.
+ */
+static size_t *io_tlb_orig_size;
+
 /*
  * Protect the above data structures in the map and unmap calls
  */
@@ -240,9 +245,16 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
 		      __func__, alloc_size, PAGE_SIZE);
 
+	alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
+	io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
+	if (!io_tlb_orig_size)
+		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+		      __func__, alloc_size, PAGE_SIZE);
+
 	for (i = 0; i < io_tlb_nslabs; i++) {
 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		io_tlb_orig_size[i] = 0;
 	}
 	io_tlb_index = 0;
 	no_iotlb_memory = false;
@@ -363,7 +375,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	 * between io_tlb_start and io_tlb_end.
 	 */
 	io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-	                              get_order(io_tlb_nslabs * sizeof(int)));
+				      get_order(io_tlb_nslabs * sizeof(int)));
 	if (!io_tlb_list)
 		goto cleanup3;
 
@@ -374,9 +386,18 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!io_tlb_orig_addr)
 		goto cleanup4;
 
+	io_tlb_orig_size = (size_t *)
+		__get_free_pages(GFP_KERNEL,
+				 get_order(io_tlb_nslabs *
+					   sizeof(size_t)));
+	if (!io_tlb_orig_size)
+		goto cleanup5;
+
+
 	for (i = 0; i < io_tlb_nslabs; i++) {
 		io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
 		io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+		io_tlb_orig_size[i] = 0;
 	}
 	io_tlb_index = 0;
 	no_iotlb_memory = false;
@@ -389,6 +410,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
 	return 0;
 
+cleanup5:
+	free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+							      sizeof(phys_addr_t)));
+
 cleanup4:
 	free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
 	                                                 sizeof(int)));
@@ -404,6 +429,8 @@ void __init swiotlb_exit(void)
 		return;
 
 	if (late_alloc) {
+		free_pages((unsigned long)io_tlb_orig_size,
+			   get_order(io_tlb_nslabs * sizeof(size_t)));
 		free_pages((unsigned long)io_tlb_orig_addr,
 			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -413,6 +440,8 @@ void __init swiotlb_exit(void)
 	} else {
 		memblock_free_late(__pa(io_tlb_orig_addr),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+		memblock_free_late(__pa(io_tlb_orig_size),
+				   PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
 		memblock_free_late(__pa(io_tlb_list),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
 		memblock_free_late(io_tlb_start,
@@ -581,8 +610,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	 * This is needed when we sync the memory.  Then we sync the buffer if
 	 * needed.
 	 */
-	for (i = 0; i < nslots; i++)
+	for (i = 0; i < nslots; i++) {
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+		io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+	}
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
@@ -590,6 +621,17 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
 	return tlb_addr;
 }
 
+static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
+{
+	if (*size > orig_size) {
+		/* Warn and truncate mapping_size */
+		dev_WARN_ONCE(hwdev, 1,
+			"Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
+			orig_size, *size);
+		*size = orig_size;
+	}
+}
+
 /*
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
@@ -602,6 +644,8 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
+	validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+
 	/*
 	 * First, sync the memory before unmapping the entry
 	 */
@@ -627,6 +671,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 		for (i = index + nslots - 1; i >= index; i--) {
 			io_tlb_list[i] = ++count;
 			io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+			io_tlb_orig_size[i] = 0;
 		}
 		/*
 		 * Step 2: merge the returned slots with the preceding slots,
@@ -645,12 +690,15 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     enum dma_sync_target target)
 {
 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+	size_t orig_size = io_tlb_orig_size[index];
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
 	orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
 
+	validate_sync_size_and_truncate(hwdev, orig_size, &size);
+
 	switch (target) {
 	case SYNC_FOR_CPU:
 		if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-- 
2.17.1