All of lore.kernel.org
 help / color / mirror / Atom feed
From: Reinette Chatre <reinette.chatre@intel.com>
To: tglx@linutronix.de, fenghua.yu@intel.com, tony.luck@intel.com
Cc: gavin.hindman@intel.com, vikas.shivappa@linux.intel.com,
	dave.hansen@intel.com, mingo@redhat.com, hpa@zytor.com,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	Reinette Chatre <reinette.chatre@intel.com>
Subject: [RFC PATCH V2 17/22] x86/intel_rdt: Create character device exposing pseudo-locked region
Date: Tue, 13 Feb 2018 07:47:01 -0800	[thread overview]
Message-ID: <2b2b20654cd86d0d602784126440c0a63515b665.1518443616.git.reinette.chatre@intel.com> (raw)
In-Reply-To: <cover.1518443616.git.reinette.chatre@intel.com>
In-Reply-To: <cover.1518443616.git.reinette.chatre@intel.com>

Once a pseudo-locked region has been created it needs to be made
available to user space to provide benefit there.

A character device supporting mmap() is created for each pseudo-locked
region. A user space application can now use mmap() system call to map
pseudo-locked region into its virtual address space.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
 arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c | 267 +++++++++++++++++++++++++++-
 1 file changed, 265 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
index c03413021f45..b4923aa4314c 100644
--- a/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
+++ b/arch/x86/kernel/cpu/intel_rdt_pseudo_lock.c
@@ -26,6 +26,7 @@
 #include <linux/kernfs.h>
 #include <linux/kref.h>
 #include <linux/kthread.h>
+#include <linux/mman.h>
 #include <linux/seq_file.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
@@ -52,6 +53,14 @@
  */
 static u64 prefetch_disable_bits;
 
+/*
+ * Major number assigned to and shared by all devices exposing
+ * pseudo-locked regions.
+ */
+static unsigned int pseudo_lock_major;
+static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, 0);
+static struct class *pseudo_lock_class;
+
 struct kernfs_node *pseudo_lock_kn;
 
 /*
@@ -189,6 +198,15 @@ static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
 	plr->d = NULL;
 }
 
+/**
+ * pseudo_lock_minor_release - Return minor number to available
+ * @minor: The minor number being released
+ */
+static void pseudo_lock_minor_release(unsigned int minor)
+{
+	__set_bit(minor, &pseudo_lock_minor_avail);
+}
+
 static void __pseudo_lock_region_release(struct pseudo_lock_region *plr)
 {
 	bool is_new_plr = (plr == new_plr);
@@ -199,6 +217,9 @@ static void __pseudo_lock_region_release(struct pseudo_lock_region *plr)
 
 	if (plr->locked) {
 		plr->d->plr = NULL;
+		device_destroy(pseudo_lock_class,
+			       MKDEV(pseudo_lock_major, plr->minor));
+		pseudo_lock_minor_release(plr->minor);
 		/*
 		 * Resource groups come and go. Simply returning this
 		 * pseudo-locked region's bits to the default CLOS may
@@ -763,11 +784,74 @@ static int pseudo_lock_fn(void *_plr)
 	return 0;
 }
 
+/**
+ * pseudo_lock_minor_get - Obtain available minor number
+ * @minor: Pointer to where new minor number will be stored
+ *
+ * A bitmask is used to track available minor numbers. Here the next free
+ * minor number is allocated and returned.
+ *
+ * RETURNS:
+ * Zero on success, error on failure.
+ */
+static int pseudo_lock_minor_get(unsigned int *minor)
+{
+	unsigned long first_bit;
+
+	first_bit = find_first_bit(&pseudo_lock_minor_avail, MINORBITS);
+
+	if (first_bit == MINORBITS)
+		return -ENOSPC;
+
+	__clear_bit(first_bit, &pseudo_lock_minor_avail);
+	*minor = first_bit;
+
+	return 0;
+}
+
+/**
+ * region_find_by_minor - Locate a pseudo-lock region by inode minor number
+ * @minor: The minor number of the device representing pseudo-locked region
+ *
+ * When the character device is accessed we need to determine which
+ * pseudo-locked region it belongs to. This is done by matching the minor
+ * number of the device to the pseudo-locked region it belongs.
+ *
+ * Minor numbers are assigned at the time a pseudo-locked region is associated
+ * with a cache instance.
+ *
+ * LOCKING:
+ * rdt_pseudo_lock_mutex must be held
+ *
+ * RETURNS:
+ * On success returns pointer to pseudo-locked region, NULL on failure.
+ */
+static struct pseudo_lock_region *region_find_by_minor(unsigned int minor)
+{
+	struct pseudo_lock_region *plr_match = NULL;
+	struct rdt_resource *r;
+	struct rdt_domain *d;
+
+	lockdep_assert_held(&rdt_pseudo_lock_mutex);
+
+	for_each_alloc_enabled_rdt_resource(r) {
+		list_for_each_entry(d, &r->domains, list) {
+			if (d->plr && d->plr->minor == minor) {
+				plr_match = d->plr;
+				break;
+			}
+		}
+	}
+	return plr_match;
+}
+
 static int pseudo_lock_doit(struct pseudo_lock_region *plr,
 			    struct rdt_resource *r,
 			    struct rdt_domain *d)
 {
 	struct task_struct *thread;
+	unsigned int new_minor;
+	struct device *dev;
 	int closid;
 	int ret, i;
 
@@ -858,11 +942,45 @@ static int pseudo_lock_doit(struct pseudo_lock_region *plr,
 			pseudo_lock_clos_set(plr, i, d->ctrl_val[0]);
 	}
 
+	ret = pseudo_lock_minor_get(&new_minor);
+	if (ret < 0) {
+		rdt_last_cmd_puts("unable to obtain a new minor number\n");
+		goto out_clos_def;
+	}
+
 	plr->locked = true;
 	d->plr = plr;
 	new_plr = NULL;
 
 	/*
+	 * Unlock access but do not release the reference. The
+	 * pseudo-locked region will still be here when we return.
+	 * If anything else attempts to access the region while we do not
+	 * have the mutex the region would be considered locked.
+	 *
+	 * We need to release the mutex temporarily to avoid a potential
+	 * deadlock with the mm->mmap_sem semaphore which is obtained in
+	 * the device_create() callpath below as well as before our mmap()
+	 * callback is called.
+	 */
+	mutex_unlock(&rdt_pseudo_lock_mutex);
+
+	dev = device_create(pseudo_lock_class, NULL,
+			    MKDEV(pseudo_lock_major, new_minor),
+			    plr, "%s", plr->kn->name);
+
+	mutex_lock(&rdt_pseudo_lock_mutex);
+
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		rdt_last_cmd_printf("failed to created character device: %d\n",
+				    ret);
+		goto out_minor;
+	}
+
+	plr->minor = new_minor;
+
+	/*
 	 * We do not return CBM to CLOS here since that will result in a
 	 * CBM of all zeroes which is an illegal MSR write.
 	 */
@@ -870,6 +988,8 @@ static int pseudo_lock_doit(struct pseudo_lock_region *plr,
 	ret = 0;
 	goto out;
 
+out_minor:
+	pseudo_lock_minor_release(new_minor);
 out_clos_def:
 	pseudo_lock_clos_set(plr, 0, d->ctrl_val[0] | plr->cbm);
 out_closid:
@@ -1184,6 +1304,127 @@ static int pseudo_lock_debugfs_create(void)
 }
 #endif
 
+static int pseudo_lock_dev_open(struct inode *inode, struct file *filp)
+{
+	struct pseudo_lock_region *plr;
+
+	mutex_lock(&rdt_pseudo_lock_mutex);
+
+	plr = region_find_by_minor(iminor(inode));
+	if (!plr) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -ENODEV;
+	}
+
+	filp->private_data = plr;
+	/* Perform a non-seekable open - llseek is not supported */
+	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
+
+	mutex_unlock(&rdt_pseudo_lock_mutex);
+
+	return 0;
+}
+
+static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
+{
+	mutex_lock(&rdt_pseudo_lock_mutex);
+	filp->private_data = NULL;
+	mutex_unlock(&rdt_pseudo_lock_mutex);
+	return 0;
+}
+
+static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
+{
+	/* Not supported */
+	return -EINVAL;
+}
+
+static const struct vm_operations_struct pseudo_mmap_ops = {
+	.mremap = pseudo_lock_dev_mremap,
+};
+
+static int pseudo_lock_dev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned long vsize = vma->vm_end - vma->vm_start;
+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+	struct pseudo_lock_region *plr;
+	unsigned long physical;
+	unsigned long psize;
+
+	mutex_lock(&rdt_pseudo_lock_mutex);
+
+	plr = file->private_data;
+	WARN_ON(!plr);
+	if (!plr) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -ENODEV;
+	}
+
+	/*
+	 * Task is required to run with affinity to the cpus associated
+	 * with the pseudo-locked region. If this is not the case the task
+	 * may be scheduled elsewhere and invalidate entries in the
+	 * pseudo-locked region.
+	 */
+	if (!cpumask_subset(&current->cpus_allowed, &plr->d->cpu_mask)) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -EINVAL;
+	}
+
+	physical = __pa(plr->kmem) >> PAGE_SHIFT;
+	psize = plr->size - off;
+
+	if (off > plr->size) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -ENOSPC;
+	}
+
+	/*
+	 * Ensure changes are carried directly to the memory being mapped,
+	 * do not allow copy-on-write mapping.
+	 */
+	if (!(vma->vm_flags & VM_SHARED)) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -EINVAL;
+	}
+
+	if (vsize > psize) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -ENOSPC;
+	}
+
+	memset(plr->kmem + off, 0, vsize);
+
+	if (remap_pfn_range(vma, vma->vm_start, physical + vma->vm_pgoff,
+			    vsize, vma->vm_page_prot)) {
+		mutex_unlock(&rdt_pseudo_lock_mutex);
+		return -EAGAIN;
+	}
+	vma->vm_ops = &pseudo_mmap_ops;
+	mutex_unlock(&rdt_pseudo_lock_mutex);
+	return 0;
+}
+
+static const struct file_operations pseudo_lock_dev_fops = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+	.read =		NULL,
+	.write =	NULL,
+	.open =		pseudo_lock_dev_open,
+	.release =	pseudo_lock_dev_release,
+	.mmap =		pseudo_lock_dev_mmap,
+};
+
+static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
+{
+	struct pseudo_lock_region *plr;
+
+	plr = dev_get_drvdata(dev);
+	if (mode)
+		*mode = 0600;
+	return kasprintf(GFP_KERNEL, "pseudo_lock/%s", plr->kn->name);
+}
+
 /**
  * rdt_pseudo_lock_fs_init - Create and initialize pseudo-locking files
  * @root: location in kernfs where directory and files should be created
@@ -1245,10 +1486,26 @@ int rdt_pseudo_lock_fs_init(struct kernfs_node *root)
 	if (prefetch_disable_bits == 0)
 		return 0;
 
+	ret = register_chrdev(0, "pseudo_lock", &pseudo_lock_dev_fops);
+	if (ret < 0)
+		return ret;
+
+	pseudo_lock_major = ret;
+
+	pseudo_lock_class = class_create(THIS_MODULE, "pseudo_lock");
+	if (IS_ERR(pseudo_lock_class)) {
+		ret = PTR_ERR(pseudo_lock_class);
+		goto out_char;
+	}
+
+	pseudo_lock_class->devnode = pseudo_lock_devnode;
+
 	pseudo_lock_kn = kernfs_create_dir(root, "pseudo_lock",
 					   root->mode, NULL);
-	if (IS_ERR(pseudo_lock_kn))
-		return PTR_ERR(pseudo_lock_kn);
+	if (IS_ERR(pseudo_lock_kn)) {
+		ret = PTR_ERR(pseudo_lock_kn);
+		goto out_class;
+	}
 
 	kn = __kernfs_create_file(pseudo_lock_kn, "avail", 0444,
 				  0, &pseudo_lock_avail_ops,
@@ -1276,6 +1533,10 @@ int rdt_pseudo_lock_fs_init(struct kernfs_node *root)
 error:
 	kernfs_remove(pseudo_lock_kn);
 	pseudo_lock_kn = NULL;
+out_class:
+	class_destroy(pseudo_lock_class);
+out_char:
+	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
 out:
 	return ret;
 }
@@ -1321,5 +1582,7 @@ void rdt_pseudo_lock_fs_remove(void)
 #endif
 	kernfs_remove(pseudo_lock_kn);
 	pseudo_lock_kn = NULL;
+	class_destroy(pseudo_lock_class);
+	unregister_chrdev(pseudo_lock_major, "pseudo_lock");
 	mutex_unlock(&rdt_pseudo_lock_mutex);
 }
-- 
2.13.6

  parent reply	other threads:[~2018-02-13 23:49 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-13 15:46 [RFC PATCH V2 00/22] Intel(R) Resource Director Technology Cache Pseudo-Locking enabling Reinette Chatre
2018-02-13 15:46 ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 01/22] x86/intel_rdt: Documentation for Cache Pseudo-Locking Reinette Chatre
2018-02-19 20:35   ` Thomas Gleixner
2018-02-19 22:15     ` Reinette Chatre
2018-02-19 22:19       ` Thomas Gleixner
2018-02-19 22:24         ` Reinette Chatre
2018-02-19 21:27   ` Randy Dunlap
2018-02-19 22:21     ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 02/22] x86/intel_rdt: Make useful functions available internally Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 03/22] x86/intel_rdt: Introduce hooks to create pseudo-locking files Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 04/22] x86/intel_rdt: Introduce test to determine if closid is in use Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 05/22] x86/intel_rdt: Print more accurate pseudo-locking availability Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 06/22] x86/intel_rdt: Create pseudo-locked regions Reinette Chatre
2018-02-19 20:57   ` Thomas Gleixner
2018-02-19 23:02     ` Reinette Chatre
2018-02-19 23:16       ` Thomas Gleixner
2018-02-20  3:21         ` Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 07/22] x86/intel_rdt: Connect pseudo-locking directory to operations Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 08/22] x86/intel_rdt: Introduce pseudo-locking resctrl files Reinette Chatre
2018-02-19 21:01   ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 09/22] x86/intel_rdt: Discover supported platforms via prefetch disable bits Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 10/22] x86/intel_rdt: Disable pseudo-locking if CDP enabled Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 11/22] x86/intel_rdt: Associate pseudo-locked regions with its domain Reinette Chatre
2018-02-19 21:19   ` Thomas Gleixner
2018-02-19 23:00     ` Reinette Chatre
2018-02-19 23:19       ` Thomas Gleixner
2018-02-20  3:17         ` Reinette Chatre
2018-02-20 10:00           ` Thomas Gleixner
2018-02-20 16:02             ` Reinette Chatre
2018-02-20 17:18               ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 12/22] x86/intel_rdt: Support CBM checking from value and character buffer Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 13/22] x86/intel_rdt: Support schemata write - pseudo-locking core Reinette Chatre
2018-02-20 17:15   ` Thomas Gleixner
2018-02-20 18:47     ` Reinette Chatre
2018-02-20 23:21       ` Thomas Gleixner
2018-02-21  1:58         ` Mike Kravetz
2018-02-21  6:10           ` Reinette Chatre
2018-02-21  8:34           ` Thomas Gleixner
2018-02-21  5:58         ` Reinette Chatre
2018-02-27  0:34     ` Reinette Chatre
2018-02-27 10:36       ` Thomas Gleixner
2018-02-27 15:38         ` Thomas Gleixner
2018-02-27 19:52         ` Reinette Chatre
2018-02-27 21:33           ` Reinette Chatre
2018-02-28 18:39           ` Thomas Gleixner
2018-02-28 19:17             ` Reinette Chatre
2018-02-28 19:40               ` Thomas Gleixner
2018-02-27 21:01     ` Reinette Chatre
2018-02-28 17:57       ` Thomas Gleixner
2018-02-28 17:59         ` Thomas Gleixner
2018-02-28 18:34           ` Reinette Chatre
2018-02-28 18:42             ` Thomas Gleixner
2018-02-13 15:46 ` [RFC PATCH V2 14/22] x86/intel_rdt: Enable testing for pseudo-locked region Reinette Chatre
2018-02-13 15:46 ` [RFC PATCH V2 15/22] x86/intel_rdt: Prevent new allocations from pseudo-locked regions Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 16/22] x86/intel_rdt: Create debugfs files for pseudo-locking testing Reinette Chatre
2018-02-13 15:47 ` Reinette Chatre [this message]
2018-02-13 15:47 ` [RFC PATCH V2 18/22] x86/intel_rdt: More precise L2 hit/miss measurements Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 19/22] x86/intel_rdt: Support L3 cache performance event of Broadwell Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 20/22] x86/intel_rdt: Limit C-states dynamically when pseudo-locking active Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 21/22] mm/hugetlb: Enable large allocations through gigantic page API Reinette Chatre
2018-02-13 15:47   ` Reinette Chatre
2018-02-13 15:47 ` [RFC PATCH V2 22/22] x86/intel_rdt: Support contiguous memory of all sizes Reinette Chatre
2018-02-14 18:12 ` [RFC PATCH V2 00/22] Intel(R) Resource Director Technology Cache Pseudo-Locking enabling Mike Kravetz
2018-02-14 18:12   ` Mike Kravetz
2018-02-14 18:31   ` Reinette Chatre
2018-02-14 18:31     ` Reinette Chatre
2018-02-15 20:39     ` Reinette Chatre
2018-02-15 20:39       ` Reinette Chatre
2018-02-15 21:10       ` Mike Kravetz
2018-02-15 21:10         ` Mike Kravetz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2b2b20654cd86d0d602784126440c0a63515b665.1518443616.git.reinette.chatre@intel.com \
    --to=reinette.chatre@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=gavin.hindman@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.