All of lore.kernel.org
 help / color / mirror / Atom feed
From: Reinette Chatre <reinette.chatre@intel.com>
To: tglx@linutronix.de, fenghua.yu@intel.com, tony.luck@intel.com
Cc: gavin.hindman@intel.com, vikas.shivappa@linux.intel.com,
	dave.hansen@intel.com, mingo@redhat.com, hpa@zytor.com,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	Reinette Chatre <reinette.chatre@intel.com>
Subject: [RFC PATCH V2 22/22] x86/intel_rdt: Support contiguous memory of all sizes
Date: Tue, 13 Feb 2018 07:47:06 -0800	[thread overview]
Message-ID: <1f495b99c766db9d7c942962bca138e695f4651c.1518443616.git.reinette.chatre@intel.com> (raw)
In-Reply-To: <cover.1518443616.git.reinette.chatre@intel.com>
In-Reply-To: <cover.1518443616.git.reinette.chatre@intel.com>

Through "mm/hugetlb: Enable large allocations through gigantic page
API" we are able to allocate contiguous memory regions larger than what
the SLAB allocators can support.

Use the alloc_gigantic_page/free_gigantic_page API to support allocation
of large contiguous memory regions in order to support pseudo-locked
regions larger than 4MB.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 89 ++++++++++++++++++++++-------
 1 file changed, 68 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index 90f040166fcd..99918943a98a 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -23,6 +23,7 @@
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/debugfs.h>
+#include <linux/hugetlb.h>
 #include <linux/kernfs.h>
 #include <linux/kref.h>
 #include <linux/kthread.h>
@@ -136,7 +137,7 @@ struct pseudo_lock_region {
 	bool			locked;
 	struct kref		refcount;
 	bool			deleted;
-	void			*kmem;
+	struct page		*kmem;
 #ifdef CONFIG_INTEL_RDT_DEBUGFS
 	struct dentry		*debugfs_dir;
 #endif
@@ -202,12 +203,69 @@ static int pseudo_lock_clos_set(struct pseudo_lock_region *plr,
 	return ret;
 }
 
+/**
+ * contig_mem_alloc - Allocate contiguous memory for pseudo-locked region
+ * @plr: pseudo-locked region for which memory is requested
+ *
+ * In an effort to ensure best coverage of cache with allocated memory
+ * (fewest conflicting physical addresses) allocate contiguous memory
+ * that will be pseudo-locked. The SLAB allocators are restricted wrt
+ * the maximum memory it can allocate. If more memory is required than
+ * what can be requested from the SLAB allocators a gigantic page is
+ * requested instead.
+ */
+static int contig_mem_alloc(struct pseudo_lock_region *plr)
+{
+	void *kmem;
+
+	/*
+	 * We should not be allocating from the slab cache - we need whole
+	 * pages.
+	 */
+	if (plr->size < KMALLOC_MAX_CACHE_SIZE) {
+		rdt_last_cmd_puts("requested region smaller than page size\n");
+		return -EINVAL;
+	}
+
+	if (plr->size > KMALLOC_MAX_SIZE) {
+		plr->kmem = alloc_gigantic_page(cpu_to_node(plr->cpu),
+						get_order(plr->size),
+						GFP_KERNEL | __GFP_ZERO);
+		if (!plr->kmem) {
+			rdt_last_cmd_puts("unable to allocate gigantic page\n");
+			return -ENOMEM;
+		}
+	} else {
+		kmem = kzalloc(plr->size, GFP_KERNEL);
+		if (!kmem) {
+			rdt_last_cmd_puts("unable to allocate memory\n");
+			return -ENOMEM;
+		}
+
+		if (!PAGE_ALIGNED(kmem)) {
+			rdt_last_cmd_puts("received unaligned memory\n");
+			kfree(kmem);
+			return -ENOMEM;
+		}
+		plr->kmem = virt_to_page(kmem);
+	}
+	return 0;
+}
+
+static void contig_mem_free(struct pseudo_lock_region *plr)
+{
+	if (plr->size > KMALLOC_MAX_SIZE)
+		free_gigantic_page(plr->kmem, get_order(plr->size));
+	else
+		kfree(page_to_virt(plr->kmem));
+}
+
 static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
 {
-	plr->size = 0;
 	plr->line_size = 0;
-	kfree(plr->kmem);
+	contig_mem_free(plr);
 	plr->kmem = NULL;
+	plr->size = 0;
 	plr->r = NULL;
 	plr->d = NULL;
 }
@@ -444,7 +502,7 @@ static int measure_cycles_hist_fn(void *_plr)
 	 * local register variable used for memory pointer.
 	 */
 	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
-	mem_r = plr->kmem;
+	mem_r = page_to_virt(plr->kmem);
 	for (i = 0; i < plr->size; i += 32) {
 		start = rdtsc_ordered();
 		asm volatile("mov (%0,%1,1), %%eax\n\t"
@@ -568,7 +626,7 @@ static int measure_cycles_perf_fn(void *_plr)
 		pseudo_wrmsrl_notrace(MSR_ARCH_PERFMON_EVENTSEL0 + 3,
 				      l3_miss_bits);
 	}
-	mem_r = plr->kmem;
+	mem_r = page_to_virt(plr->kmem);
 	size = plr->size;
 	line_size = plr->line_size;
 	for (i = 0; i < size; i += line_size) {
@@ -912,20 +970,9 @@ static int pseudo_lock_region_init(struct pseudo_lock_region *plr,
 		return -ENOSPC;
 	}
 
-	/*
-	 * We do not yet support contiguous regions larger than
-	 * KMALLOC_MAX_SIZE
-	 */
-	if (plr->size > KMALLOC_MAX_SIZE) {
-		rdt_last_cmd_puts("requested region exceeds maximum size\n");
-		return -E2BIG;
-	}
-
-	plr->kmem = kzalloc(plr->size, GFP_KERNEL);
-	if (!plr->kmem) {
-		rdt_last_cmd_puts("unable to allocate memory\n");
+	ret = contig_mem_alloc(plr);
+	if (ret < 0)
 		return -ENOMEM;
-	}
 
 	plr->r = r;
 	plr->d = d;
@@ -996,7 +1043,7 @@ static int pseudo_lock_fn(void *_plr)
 	__wrmsr(MSR_MISC_FEATURE_CONTROL, prefetch_disable_bits, 0x0);
 	closid_p = this_cpu_read(pqr_state.cur_closid);
 	rmid_p = this_cpu_read(pqr_state.cur_rmid);
-	mem_r = plr->kmem;
+	mem_r = page_to_virt(plr->kmem);
 	size = plr->size;
 	line_size = plr->line_size;
 	__wrmsr(IA32_PQR_ASSOC, rmid_p, plr->closid);
@@ -1630,7 +1677,7 @@ static int pseudo_lock_dev_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	physical = __pa(plr->kmem) >> PAGE_SHIFT;
+	physical = page_to_phys(plr->kmem) >> PAGE_SHIFT;
 	psize = plr->size - off;
 
 	if (off > plr->size) {
@@ -1652,7 +1699,7 @@ static int pseudo_lock_dev_mmap(struct file *file, struct vm_area_struct *vma)
 		return -ENOSPC;
 	}
 
-	memset(plr->kmem + off, 0, vsize);
+	memset(page_to_virt(plr->kmem) + off, 0, vsize);
 
 	if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
 			    vsize, vma->vm_page_prot)) {
-- 
2.13.6

  parent reply	other threads:[~2018-02-13 23:50 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-13 15:46 [RFC PATCH V2 00/22] Intel(R) Resource Director Technology Cache Pseudo-Locking enabling Reinette Chatre
2018-02-13 15:46 ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 01/22] x86/intel_rdt: Documentation for Cache Pseudo-Locking Reinette Chatre
2018-02-19 20:35   ` Thomas Gleixner
2018-02-19 22:15     ` Reinette Chatre
2018-02-19 22:19       ` Thomas Gleixner
2018-02-19 22:24         ` Reinette Chatre
2018-02-19 21:27   ` Randy Dunlap
2018-02-19 22:21     ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 02/22] x86/intel_rdt: Make useful functions available internally Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 03/22] x86/intel_rdt: Introduce hooks to create pseudo-locking files Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 04/22] x86/intel_rdt: Introduce test to determine if closid is in use Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 05/22] x86/intel_rdt: Print more accurate pseudo-locking availability Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 06/22] x86/intel_rdt: Create pseudo-locked regions Reinette Chatre
2018-02-19 20:57   ` Thomas Gleixner
2018-02-19 23:02     ` Reinette Chatre
2018-02-19 23:16       ` Thomas Gleixner
2018-02-20  3:21         ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 07/22] x86/intel_rdt: Connect pseudo-locking directory to operations Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 08/22] x86/intel_rdt: Introduce pseudo-locking resctrl files Reinette Chatre
2018-02-19 21:01   ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 09/22] x86/intel_rdt: Discover supported platforms via prefetch disable bits Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 10/22] x86/intel_rdt: Disable pseudo-locking if CDP enabled Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 11/22] x86/intel_rdt: Associate pseudo-locked regions with its domain Reinette Chatre
2018-02-19 21:19   ` Thomas Gleixner
2018-02-19 23:00     ` Reinette Chatre
2018-02-19 23:19       ` Thomas Gleixner
2018-02-20  3:17         ` Reinette Chatre
2018-02-20 10:00           ` Thomas Gleixner
2018-02-20 16:02             ` Reinette Chatre
2018-02-20 17:18               ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 12/22] x86/intel_rdt: Support CBM checking from value and character buffer Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 13/22] x86/intel_rdt: Support schemata write - pseudo-locking core Reinette Chatre
2018-02-20 17:15   ` Thomas Gleixner
2018-02-20 18:47     ` Reinette Chatre
2018-02-20 23:21       ` Thomas Gleixner
2018-02-21  1:58         ` Mike Kravetz
2018-02-21  6:10           ` Reinette Chatre
2018-02-21  8:34           ` Thomas Gleixner
2018-02-21  5:58         ` Reinette Chatre
2018-02-27  0:34     ` Reinette Chatre
2018-02-27 10:36       ` Thomas Gleixner
2018-02-27 15:38         ` Thomas Gleixner
2018-02-27 19:52         ` Reinette Chatre
2018-02-27 21:33           ` Reinette Chatre
2018-02-28 18:39           ` Thomas Gleixner
2018-02-28 19:17             ` Reinette Chatre
2018-02-28 19:40               ` Thomas Gleixner
2018-02-27 21:01     ` Reinette Chatre
2018-02-28 17:57       ` Thomas Gleixner
2018-02-28 17:59         ` Thomas Gleixner
2018-02-28 18:34           ` Reinette Chatre
2018-02-28 18:42             ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 14/22] x86/intel_rdt: Enable testing for pseudo-locked region Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 15/22] x86/intel_rdt: Prevent new allocations from pseudo-locked regions Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 16/22] x86/intel_rdt: Create debugfs files for pseudo-locking testing Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 17/22] x86/intel_rdt: Create character device exposing pseudo-locked region Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 18/22] x86/intel_rdt: More precise L2 hit/miss measurements Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 19/22] x86/intel_rdt: Support L3 cache performance event of Broadwell Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 20/22] x86/intel_rdt: Limit C-states dynamically when pseudo-locking active Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 21/22] mm/hugetlb: Enable large allocations through gigantic page API Reinette Chatre
2018-02-13 15:47   ` Reinette Chatre
2018-02-13 15:47 ` Reinette Chatre [this message]
2018-02-14 18:12 ` [RFC PATCH V2 00/22] Intel(R) Resource Director Technology Cache Pseudo-Locking enabling Mike Kravetz
2018-02-14 18:12   ` Mike Kravetz
2018-02-14 18:31   ` Reinette Chatre
2018-02-14 18:31     ` Reinette Chatre
2018-02-15 20:39     ` Reinette Chatre
2018-02-15 20:39       ` Reinette Chatre
2018-02-15 21:10       ` Mike Kravetz
2018-02-15 21:10         ` Mike Kravetz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1f495b99c766db9d7c942962bca138e695f4651c.1518443616.git.reinette.chatre@intel.com \
    --to=reinette.chatre@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=gavin.hindman@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.