All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexandre Chartre <alexandre.chartre@oracle.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	hpa@zytor.com, dave.hansen@linux.intel.com, luto@kernel.org,
	peterz@infradead.org, x86@kernel.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Cc: pbonzini@redhat.com, konrad.wilk@oracle.com,
	jan.setjeeilers@oracle.com, liran.alon@oracle.com,
	junaids@google.com, graf@amazon.de, rppt@linux.vnet.ibm.com,
	kuzuno@gmail.com, mgross@linux.intel.com,
	alexandre.chartre@oracle.com
Subject: [RFC v4][PATCH part-2 12/13] mm/dpt: Handle decorated page-table mapped range leaks and overlaps
Date: Mon,  4 May 2020 16:58:09 +0200	[thread overview]
Message-ID: <20200504145810.11882-13-alexandre.chartre@oracle.com> (raw)
In-Reply-To: <20200504145810.11882-1-alexandre.chartre@oracle.com>

When mapping a buffer in a decorated page-table, data around the buffer can
also be mapped if the entire buffer is not aligned with the page directory
size used for the mapping. So, data can potentially leak into the decorated
page-table. In such a case, print a warning that data are leaking.

Also data effectively mapped can overlap with an already mapped buffer.
This is not an issue when mapping data but, when unmapping, make sure
data from another buffer don't get unmapped as a side effect.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/include/asm/dpt.h |   1 +
 arch/x86/mm/dpt.c          | 197 +++++++++++++++++++++++++++++++------
 2 files changed, 168 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/dpt.h b/arch/x86/include/asm/dpt.h
index 3234ba968d80..e0adbf69dadf 100644
--- a/arch/x86/include/asm/dpt.h
+++ b/arch/x86/include/asm/dpt.h
@@ -25,6 +25,7 @@ struct dpt_range_mapping {
 	void *ptr;			/* range start address */
 	size_t size;			/* range size */
 	enum page_table_level level;	/* mapping level */
+	int refcnt;			/* reference count (for overlap) */
 };
 
 /*
diff --git a/arch/x86/mm/dpt.c b/arch/x86/mm/dpt.c
index 9517e3081716..d3d3c3de2943 100644
--- a/arch/x86/mm/dpt.c
+++ b/arch/x86/mm/dpt.c
@@ -9,6 +9,22 @@
 
 #include <asm/dpt.h>
 
+
+static unsigned long page_directory_size[] = {
+	[PGT_LEVEL_PTE] = PAGE_SIZE,
+	[PGT_LEVEL_PMD] = PMD_SIZE,
+	[PGT_LEVEL_PUD] = PUD_SIZE,
+	[PGT_LEVEL_P4D] = P4D_SIZE,
+	[PGT_LEVEL_PGD] = PGDIR_SIZE,
+};
+
+#define DPT_RANGE_MAP_ADDR(r)	\
+	round_down((unsigned long)((r)->ptr), page_directory_size[(r)->level])
+
+#define DPT_RANGE_MAP_END(r)	\
+	round_up((unsigned long)((r)->ptr + (r)->size), \
+		 page_directory_size[(r)->level])
+
 /*
  * Get the pointer to the beginning of a page table directory from a page
  * table directory entry.
@@ -572,6 +588,70 @@ static int dpt_copy_pgd_range(struct dpt *dpt,
 	return 0;
 }
 
+/*
+ * Map a VA range, taking into account any overlap with already mapped
+ * VA ranges. On error, return < 0. Otherwise return the number of
+ * ranges the specified range is overlapping with.
+ */
+static int dpt_map_overlap(struct dpt *dpt, void *ptr, size_t size,
+			   enum page_table_level level)
+{
+	unsigned long map_addr, map_end;
+	unsigned long addr, end;
+	struct dpt_range_mapping *range;
+	bool need_mapping;
+	int err, overlap;
+
+	addr = (unsigned long)ptr;
+	end = addr + (unsigned long)size;
+	need_mapping = true;
+	overlap = 0;
+
+	lockdep_assert_held(&dpt->lock);
+	list_for_each_entry(range, &dpt->mapping_list, list) {
+
+		if (range->ptr == ptr && range->size == size) {
+			/* we are mapping the same range again */
+			pr_debug("DPT %p: MAP %px/%lx/%d already mapped\n",
+				 dpt, ptr, size, level);
+			return -EBUSY;
+		}
+
+		/* check overlap with mapped range */
+		map_addr = DPT_RANGE_MAP_ADDR(range);
+		map_end = DPT_RANGE_MAP_END(range);
+		if (end <= map_addr || addr >= map_end) {
+			/* no overlap, continue */
+			continue;
+		}
+
+		pr_debug("DPT %p: MAP %px/%lx/%d overlaps with %px/%lx/%d\n",
+			 dpt, ptr, size, level,
+			 range->ptr, range->size, range->level);
+		range->refcnt++;
+		overlap++;
+
+		/*
+		 * Check if new range is included into an existing range.
+		 * If so then the new range is already entirely mapped.
+		 */
+		if (addr >= map_addr && end <= map_end) {
+			pr_debug("DPT %p: MAP %px/%lx/%d implicitly mapped\n",
+				 dpt, ptr, size, level);
+			need_mapping = false;
+		}
+	}
+
+	if (need_mapping) {
+		err = dpt_copy_pgd_range(dpt, dpt->pagetable, current->mm->pgd,
+					 addr, end, level);
+		if (err)
+			return err;
+	}
+
+	return overlap;
+}
+
 /*
  * Copy page table entries from the current page table (i.e. from the
  * kernel page table) to the specified decorated page-table. The level
@@ -582,47 +662,48 @@ int dpt_map_range(struct dpt *dpt, void *ptr, size_t size,
 		  enum page_table_level level)
 {
 	struct dpt_range_mapping *range_mapping;
+	unsigned long page_dir_size = page_directory_size[level];
 	unsigned long addr = (unsigned long)ptr;
 	unsigned long end = addr + ((unsigned long)size);
+	unsigned long map_addr, map_end;
 	unsigned long flags;
-	int err;
+	int overlap;
 
-	pr_debug("DPT %p: MAP %px/%lx/%d\n", dpt, ptr, size, level);
+	map_addr = round_down(addr, page_dir_size);
+	map_end = round_up(end, page_dir_size);
 
-	spin_lock_irqsave(&dpt->lock, flags);
-
-	/* check if the range is already mapped */
-	range_mapping = dpt_get_range_mapping(dpt, ptr);
-	if (range_mapping) {
-		pr_debug("DPT %p: MAP %px/%lx/%d already mapped\n",
-			 dpt, ptr, size, level);
-		err = -EBUSY;
-		goto done;
-	}
+	pr_debug("DPT %p: MAP %px/%lx/%d -> %lx-%lx\n", dpt, ptr, size, level,
+		 map_addr, map_end);
+	if (map_addr < addr)
+		pr_debug("DPT %p: MAP LEAK %lx-%lx\n", dpt, map_addr, addr);
+	if (map_end > end)
+		pr_debug("DPT %p: MAP LEAK %lx-%lx\n", dpt, end, map_end);
 
-	/* map new range */
+	/* add new range */
 	range_mapping = kmalloc(sizeof(*range_mapping), GFP_KERNEL);
-	if (!range_mapping) {
-		err = -ENOMEM;
-		goto done;
-	}
+	if (!range_mapping)
+		return -ENOMEM;
 
-	err = dpt_copy_pgd_range(dpt, dpt->pagetable, current->mm->pgd,
-				 addr, end, level);
-	if (err) {
-		kfree(range_mapping);
-		goto done;
+	spin_lock_irqsave(&dpt->lock, flags);
+
+	/*
+	 * Map the new range with taking overlap with already mapped ranges
+	 * into account.
+	 */
+	overlap = dpt_map_overlap(dpt, ptr, size, level);
+	if (overlap < 0) {
+		spin_unlock_irqrestore(&dpt->lock, flags);
+		return overlap;
 	}
 
 	INIT_LIST_HEAD(&range_mapping->list);
 	range_mapping->ptr = ptr;
 	range_mapping->size = size;
 	range_mapping->level = level;
+	range_mapping->refcnt = overlap + 1;
 	list_add(&range_mapping->list, &dpt->mapping_list);
-done:
 	spin_unlock_irqrestore(&dpt->lock, flags);
-
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(dpt_map_range);
 
@@ -741,13 +822,72 @@ static void dpt_clear_pgd_range(struct dpt *dpt, pgd_t *pagetable,
 	} while (pgd++, addr = next, addr < end);
 }
 
+
+/*
+ * Unmap a VA range, taking into account any overlap with other mapped
+ * VA ranges.
+ */
+static void dpt_unmap_overlap(struct dpt *dpt, struct dpt_range_mapping *range)
+{
+	unsigned long pgdir_size = page_directory_size[range->level];
+	unsigned long chunk_addr, chunk_end;
+	unsigned long map_addr, map_end;
+	struct dpt_range_mapping *r;
+	unsigned long addr, end;
+	bool overlap;
+
+	addr = DPT_RANGE_MAP_ADDR(range);
+	end = DPT_RANGE_MAP_END(range);
+
+	lockdep_assert_held(&dpt->lock);
+
+	/*
+	 * Unmap the VA range by chunk to handle mapping overlap
+	 * with any another range.
+	 * XXX can be improved with a sorted range list
+	 */
+	chunk_addr = addr;
+	while (chunk_addr < end) {
+		overlap = false;
+		list_for_each_entry(r, &dpt->mapping_list, list) {
+			map_addr = DPT_RANGE_MAP_ADDR(r);
+			map_end = DPT_RANGE_MAP_END(r);
+			/*
+			 * Check if there's an overlap and how far it goes.
+			 */
+			chunk_end = chunk_addr;
+			while (chunk_end >= map_addr && chunk_end < map_end) {
+				overlap = true;
+				chunk_end += pgdir_size;
+				if (chunk_end >= end)
+					break;
+			}
+			if (overlap) {
+				pr_debug("DPT %p: UNMAP %px/%lx/%d overlaps with %px/%lx/%d\n",
+					 dpt, range->ptr, range->size,
+					 range->level,
+					 r->ptr, r->size, r->level);
+				break;
+			}
+		}
+
+		if (!overlap) {
+			pr_debug("DPT %p: UNMAP CHUNK %lx/%lx/%d\n", dpt,
+				 chunk_addr, pgdir_size, range->level);
+			chunk_end = chunk_addr + pgdir_size;
+			dpt_clear_pgd_range(dpt, dpt->pagetable, chunk_addr,
+					    chunk_end, range->level);
+		}
+		chunk_addr = chunk_end;
+	}
+}
+
 /*
  * Clear page table entries in the specified decorated page-table.
  */
 void dpt_unmap(struct dpt *dpt, void *ptr)
 {
 	struct dpt_range_mapping *range_mapping;
-	unsigned long addr, end;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dpt->lock, flags);
@@ -758,13 +898,10 @@ void dpt_unmap(struct dpt *dpt, void *ptr)
 		goto done;
 	}
 
-	addr = (unsigned long)range_mapping->ptr;
-	end = addr + range_mapping->size;
 	pr_debug("DPT %p: UNMAP %px/%lx/%d\n", dpt, ptr,
 		 range_mapping->size, range_mapping->level);
-	dpt_clear_pgd_range(dpt, dpt->pagetable, addr, end,
-			    range_mapping->level);
 	list_del(&range_mapping->list);
+	dpt_unmap_overlap(dpt, range_mapping);
 	kfree(range_mapping);
 done:
 	spin_unlock_irqrestore(&dpt->lock, flags);
-- 
2.18.2


  parent reply	other threads:[~2020-05-04 15:04 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-04 14:57 [RFC v4][PATCH part-2 00/13] ASI - Part II (Decorated Page-Table) Alexandre Chartre
2020-05-04 14:57 ` [RFC v4][PATCH part-2 01/13] mm/x86: Introduce decorated page-table (dpt) Alexandre Chartre
2020-05-04 14:57 ` [RFC v4][PATCH part-2 02/13] mm/dpt: Track buffers allocated for a decorated page-table Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 03/13] mm/dpt: Add decorated page-table entry offset functions Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 04/13] mm/dpt: Add decorated page-table entry allocation functions Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 05/13] mm/dpt: Add decorated page-table entry set functions Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 06/13] mm/dpt: Functions to populate a decorated page-table from a VA range Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 07/13] mm/dpt: Helper functions to map module into a decorated page-table Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 08/13] mm/dpt: Keep track of VA ranges mapped in " Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 09/13] mm/dpt: Functions to clear decorated page-table entries for a VA range Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 10/13] mm/dpt: Function to copy page-table entries for percpu buffer Alexandre Chartre
2020-05-04 14:58 ` [RFC v4][PATCH part-2 11/13] mm/dpt: Add decorated page-table remap function Alexandre Chartre
2020-05-04 14:58 ` Alexandre Chartre [this message]
2020-05-04 14:58 ` [RFC v4][PATCH part-2 13/13] mm/asi: Function to init decorated page-table with ASI core mappings Alexandre Chartre
2020-05-14  9:29 ` [RFC v4][PATCH part-2 00/13] ASI - Part II (Decorated Page-Table) Mike Rapoport
2020-05-14 11:42   ` Alexandre Chartre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200504145810.11882-13-alexandre.chartre@oracle.com \
    --to=alexandre.chartre@oracle.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=graf@amazon.de \
    --cc=hpa@zytor.com \
    --cc=jan.setjeeilers@oracle.com \
    --cc=junaids@google.com \
    --cc=konrad.wilk@oracle.com \
    --cc=kuzuno@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=liran.alon@oracle.com \
    --cc=luto@kernel.org \
    --cc=mgross@linux.intel.com \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rppt@linux.vnet.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.