All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Fenghua Yu" <fenghua.yu@intel.com>
To: "Thomas Gleixner" <tglx@linutronix.de>
Cc: "H. Peter Anvin" <h.peter.anvin@intel.com>,
	"Ingo Molnar" <mingo@elte.hu>, "Tony Luck" <tony.luck@intel.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Stephane Eranian" <eranian@google.com>,
	"Borislav Petkov" <bp@suse.de>,
	"Dave Hansen" <dave.hansen@intel.com>,
	"Nilay Vaish" <nilayvaish@gmail.com>, "Shaohua Li" <shli@fb.com>,
	"David Carrillo-Cisneros" <davidcc@google.com>,
	"Ravi V Shankar" <ravi.v.shankar@intel.com>,
	"Sai Prakhya" <sai.praneeth.prakhya@intel.com>,
	"Vikas Shivappa" <vikas.shivappa@linux.intel.com>,
	"linux-kernel" <linux-kernel@vger.kernel.org>,
	"x86" <x86@kernel.org>, "Fenghua Yu" <fenghua.yu@intel.com>
Subject: [PATCH v4 13/18] x86/intel_rdt: Add mkdir to resctrl file system
Date: Fri, 14 Oct 2016 19:12:23 -0700	[thread overview]
Message-ID: <1476497548-11169-14-git-send-email-fenghua.yu@intel.com> (raw)
In-Reply-To: <1476497548-11169-1-git-send-email-fenghua.yu@intel.com>

From: Fenghua Yu <fenghua.yu@intel.com>

Resource control groups are represented as directories in the resctrl
file system. The root directory describes the default resources available
to tasks that have not been assigned specific resources. Other directories
can be created at the root level to make new resource groups. It is not
permitted to make directories within other directories.

Hardware uses a CLOSID (Class of service ID) to determine which resource
limits are currently in effect. The exact number available is enumerated
by CPUID leaf 0x10, but on current implementations it is a small number.
We implement a simple bitmask allocator for CLOSIDs.

Each resource control group uses one CLOSID, which limits the total number
of directories that can be created.

Resource groups can be removed using rmdir.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/include/asm/intel_rdt.h         |   9 ++
 arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 255 +++++++++++++++++++++++++++++++
 2 files changed, 264 insertions(+)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index ea8c09b3..7eb8078 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -9,13 +9,20 @@
  * @kn:				kernfs node
  * @rdtgroup_list:		linked list for all rdtgroups
  * @closid:			closid for this rdtgroup
+ * @flags:			status bits
+ * @waitcount:			how many cpus expect to find this
  */
 struct rdtgroup {
 	struct kernfs_node	*kn;
 	struct list_head	rdtgroup_list;
 	int			closid;
+	int			flags;
+	atomic_t		waitcount;
 };
 
+/* rdtgroup.flags */
+#define	RDT_DELETED		1
+
 /* List of all resource groups */
 extern struct list_head rdt_all_groups;
 
@@ -142,4 +149,6 @@ union cpuid_0x10_1_edx {
 };
 
 void rdt_cbm_update(void *arg);
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
+void rdtgroup_kn_unlock(struct kernfs_node *kn);
 #endif /* _ASM_X86_INTEL_RDT_H */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 316fa0c..9e0044d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -26,16 +26,79 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/cpu.h>
 
 #include <uapi/linux/magic.h>
 
 #include <asm/intel_rdt.h>
+#include <asm/intel_rdt_common.h>
 
 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
 struct kernfs_root *rdt_root;
 struct rdtgroup rdtgroup_default;
 LIST_HEAD(rdt_all_groups);
 
+/*
+ * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
+ * we can keep a bitmap of free CLOSIDs in a single integer.
+ *
+ * Please note: This only supports global CLOSID across multiple
+ * resources and multiple sockets. User can create rdtgroups including root
+ * rdtgroup up to the number of CLOSIDs, which is 16 on Broadwell. When
+ * number of caches is big or number of supported resources sharing CLOSID
+ * is growing, it's getting harder to find usable rdtgroups which is limited
+ * by the small number of CLOSIDs.
+ *
+ * In the future, if it's necessary, we can implement more complex CLOSID
+ * allocation per socket/per resource domain and utilize CLOSIDs as many
+ * as possible. E.g. on 2-socket Broadwell, user can create upto 16x16=256
+ * rdtgroups and each rdtgroup has different combination of two L3 CBMs.
+ */
+static int closid_free_map;
+
+static void closid_init(void)
+{
+	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+	int rdt_max_closid;
+
+	/* Enabling L3 CDP halves the number of CLOSIDs */
+	if (r->cdp_enabled)
+		r->num_closid = r->max_closid / 2;
+	else
+		r->num_closid = r->max_closid;
+
+	/* Compute rdt_max_closid across all resources */
+	rdt_max_closid = 0;
+	for_each_rdt_resource(r)
+		rdt_max_closid = max(rdt_max_closid, r->num_closid);
+	if (rdt_max_closid > 32) {
+		pr_warn_once("Only using 32/%d CLOSIDs\n", rdt_max_closid);
+		rdt_max_closid = 32;
+	}
+
+	closid_free_map = BIT_MASK(rdt_max_closid) - 1;
+
+	/* CLOSID 0 is always reserved for the default group */
+	closid_free_map &= ~1;
+}
+
+int closid_alloc(void)
+{
+	int closid = ffs(closid_free_map);
+
+	if (closid == 0)
+		return -ENOSPC;
+	closid--;
+	closid_free_map &= ~(1 << closid);
+
+	return closid;
+}
+
+static void closid_free(int closid)
+{
+	closid_free_map |= 1 << closid;
+}
+
 /* set uid and gid of rdtgroup dirs and files to that of the creator */
 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
 {
@@ -249,6 +312,54 @@ static int parse_rdtgroupfs_options(char *data)
 	return 0;
 }
 
+/*
+ * We don't allow rdtgroup directories to be created anywhere
+ * except the root directory. Thus when looking for the rdtgroup
+ * structure for a kernfs node we are either looking at a directory,
+ * in which case the rdtgroup structure is pointed at by the "priv"
+ * field, otherwise we have a file, and need only look to the parent
+ * to find the rdtgroup.
+ */
+static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
+{
+	if (kernfs_type(kn) == KERNFS_DIR)
+		return kn->priv;
+	else
+		return kn->parent->priv;
+}
+
+struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
+{
+	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+	atomic_inc(&rdtgrp->waitcount);
+	kernfs_break_active_protection(kn);
+
+	mutex_lock(&rdtgroup_mutex);
+
+	/* Was this group deleted while we waited? */
+	if (rdtgrp->flags & RDT_DELETED)
+		return NULL;
+
+	return rdtgrp;
+}
+
+void rdtgroup_kn_unlock(struct kernfs_node *kn)
+{
+	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
+
+	mutex_unlock(&rdtgroup_mutex);
+
+	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
+	    (rdtgrp->flags & RDT_DELETED)) {
+		kernfs_unbreak_active_protection(kn);
+		kernfs_put(kn);
+		kfree(rdtgrp);
+	} else {
+		kernfs_unbreak_active_protection(kn);
+	}
+}
+
 static struct dentry *rdt_mount(struct file_system_type *fs_type,
 				int flags, const char *unused_dev_name,
 				void *data)
@@ -272,6 +383,8 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 		goto out;
 	}
 
+	closid_init();
+
 	dentry = kernfs_mount(fs_type, flags, rdt_root,
 			      RDTGROUP_SUPER_MAGIC, &new_sb);
 	if (IS_ERR(dentry))
@@ -319,6 +432,39 @@ static void reset_all_cbms(struct rdt_resource *r)
 	put_cpu();
 }
 
+static void rdt_reset_pqr_assoc_closid(void *v)
+{
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
+
+	state->closid = 0;
+	wrmsr(MSR_IA32_PQR_ASSOC, state->rmid, 0);
+}
+
+/*
+ * Forcibly remove all of subdirectories under root.
+ */
+static void rmdir_all_sub(void)
+{
+	struct rdtgroup *rdtgrp;
+	struct list_head *l, *next;
+
+	get_cpu();
+	/* Reset PQR_ASSOC MSR on this cpu. */
+	rdt_reset_pqr_assoc_closid(NULL);
+	/* Reset PQR_ASSOC MSR on the rest of cpus. */
+	smp_call_function_many(cpu_online_mask, rdt_reset_pqr_assoc_closid,
+			       NULL, 1);
+	put_cpu();
+	list_for_each_safe(l, next, &rdt_all_groups) {
+		rdtgrp = list_entry(l, struct rdtgroup, rdtgroup_list);
+		if (rdtgrp == &rdtgroup_default)
+			continue;
+		kernfs_remove(rdtgrp->kn);
+		list_del(&rdtgrp->rdtgroup_list);
+		kfree(rdtgrp);
+	}
+}
+
 static void rdt_kill_sb(struct super_block *sb)
 {
 	struct rdt_resource *r;
@@ -334,6 +480,7 @@ static void rdt_kill_sb(struct super_block *sb)
 		set_l3_qos_cfg(r);
 	}
 
+	rmdir_all_sub();
 	static_branch_disable(&rdt_enable_key);
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
@@ -345,7 +492,115 @@ static struct file_system_type rdt_fs_type = {
 	.kill_sb = rdt_kill_sb,
 };
 
+static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
+			  umode_t mode)
+{
+	struct rdtgroup *parent, *rdtgrp;
+	struct kernfs_node *kn;
+	int ret, closid;
+
+	/* Only allow mkdir in the root directory */
+	if (parent_kn != rdtgroup_default.kn)
+		return -EPERM;
+
+	/* Do not accept '\n' to avoid unparsable situation. */
+	if (strchr(name, '\n'))
+		return -EINVAL;
+
+	parent = rdtgroup_kn_lock_live(parent_kn);
+	if (!parent) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	ret = closid_alloc();
+	if (ret < 0)
+		goto out_unlock;
+	closid = ret;
+
+	/* allocate the rdtgroup. */
+	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
+	if (!rdtgrp) {
+		ret = -ENOSPC;
+		goto out_closid_free;
+	}
+	rdtgrp->closid = closid;
+	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
+
+	/* kernfs creates the directory for rdtgrp */
+	kn = kernfs_create_dir(parent->kn, name, mode, rdtgrp);
+	if (IS_ERR(kn)) {
+		ret = PTR_ERR(kn);
+		goto out_cancel_ref;
+	}
+	rdtgrp->kn = kn;
+
+	/*
+	 * This extra ref will be put in kernfs_remove() and guarantees
+	 * that @rdtgrp->kn is always accessible.
+	 */
+	kernfs_get(kn);
+
+	ret = rdtgroup_kn_set_ugid(kn);
+	if (ret)
+		goto out_destroy;
+
+	kernfs_activate(kn);
+
+	ret = 0;
+	goto out_unlock;
+
+out_destroy:
+	kernfs_remove(rdtgrp->kn);
+out_cancel_ref:
+	kfree(rdtgrp);
+out_closid_free:
+	closid_free(closid);
+out_unlock:
+	rdtgroup_kn_unlock(parent_kn);
+	return ret;
+}
+
+static int rdtgroup_rmdir(struct kernfs_node *kn)
+{
+	struct rdtgroup *rdtgrp;
+	int ret = 0;
+
+	rdtgrp = rdtgroup_kn_lock_live(kn);
+	if (!rdtgrp) {
+		rdtgroup_kn_unlock(kn);
+		return -ENOENT;
+	}
+
+	/*
+	 * rmdir is for deleting resource groups. Don't
+	 * allow deletion of "info" or any of its subdirectories
+	 */
+	if (!rdtgrp) {
+		mutex_unlock(&rdtgroup_mutex);
+		kernfs_unbreak_active_protection(kn);
+		return -EPERM;
+	}
+
+	rdtgrp->flags = RDT_DELETED;
+	closid_free(rdtgrp->closid);
+	list_del(&rdtgrp->rdtgroup_list);
+
+	/*
+	 * one extra hold on this, will drop when we kfree(rdtgrp)
+	 * in rdtgroup_kn_unlock()
+	 */
+	kernfs_get(kn);
+	kernfs_remove(rdtgrp->kn);
+
+	rdtgroup_kn_unlock(kn);
+
+	return ret;
+}
+
 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
+	.mkdir	= rdtgroup_mkdir,
+	.rmdir	= rdtgroup_rmdir,
 };
 
 static int __init rdtgroup_setup_root(void)
-- 
2.5.0

  parent reply	other threads:[~2016-10-14 23:11 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-15  2:12 [PATCH v4 00/18] Intel Cache Allocation Technology Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 01/18] Documentation, ABI: Add a document entry for cache id Fenghua Yu
2016-10-17 10:31   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 02/18] cacheinfo: Introduce " Fenghua Yu
2016-10-17 10:32   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 03/18] x86, intel_cacheinfo: Enable cache id in x86 Fenghua Yu
2016-10-17 10:48   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 04/18] x86/intel_rdt: Feature discovery Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 05/18] Documentation, x86: Documentation for Intel resource allocation user interface Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 06/18] x86/intel_rdt: Add CONFIG, Makefile, and basic initialization Fenghua Yu
2016-10-17 10:57   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 07/18] x86/intel_rdt: Add Haswell feature discovery Fenghua Yu
2016-10-17 11:03   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 08/18] x86/intel_rdt: Pick up L3/L2 RDT parameters from CPUID Fenghua Yu
2016-10-17 13:45   ` Thomas Gleixner
2016-10-17 18:06     ` Fenghua Yu
2016-10-17 16:35       ` Luck, Tony
2016-10-17 16:43         ` Yu, Fenghua
2016-10-17 20:20           ` Luck, Tony
2016-10-17 16:54         ` Thomas Gleixner
2016-10-17 16:53       ` Thomas Gleixner
2016-10-17 17:02       ` Thomas Gleixner
2016-10-17 21:22         ` Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 09/18] x86/cqm: Move PQR_ASSOC management code into generic code used by both CQM and CAT Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 10/18] x86/intel_rdt: Build structures for each resource based on cache topology Fenghua Yu
2016-10-17 14:44   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 11/18] x86/intel_rdt: Add basic resctrl filesystem support Fenghua Yu
2016-10-17 19:35   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 12/18] x86/intel_rdt: Add "info" files to resctrl file system Fenghua Yu
2016-10-17 19:46   ` Thomas Gleixner
2016-10-15  2:12 ` Fenghua Yu [this message]
2016-10-17 21:14   ` [PATCH v4 13/18] x86/intel_rdt: Add mkdir " Thomas Gleixner
2016-10-17 21:50     ` Luck, Tony
2016-10-17 22:52       ` Thomas Gleixner
2016-10-17 23:00         ` Luck, Tony
2016-10-17 23:03           ` Thomas Gleixner
2016-10-17 23:10             ` Luck, Tony
2016-10-17 23:25               ` Thomas Gleixner
2016-10-18  1:18     ` Fenghua Yu
2016-10-17 23:20       ` Thomas Gleixner
2016-10-17 23:37         ` Luck, Tony
2016-10-18  2:56           ` Fenghua Yu
2016-10-18 10:44           ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 14/18] x86/intel_rdt: Add cpus file Fenghua Yu
2016-10-17 21:27   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 15/18] x86/intel_rdt: Add tasks files Fenghua Yu
2016-10-17 22:01   ` Thomas Gleixner
2016-10-17 22:17     ` Luck, Tony
2016-10-15  2:12 ` [PATCH v4 16/18] x86/intel_rdt: Add schemata file Fenghua Yu
2016-10-17 22:35   ` Thomas Gleixner
2016-10-15  2:12 ` [PATCH v4 17/18] x86/intel_rdt: Add scheduler hook Fenghua Yu
2016-10-15  2:12 ` [PATCH v4 18/18] MAINTAINERS: Add maintainer for Intel RDT resource allocation Fenghua Yu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1476497548-11169-14-git-send-email-fenghua.yu@intel.com \
    --to=fenghua.yu@intel.com \
    --cc=bp@suse.de \
    --cc=dave.hansen@intel.com \
    --cc=davidcc@google.com \
    --cc=eranian@google.com \
    --cc=h.peter.anvin@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=nilayvaish@gmail.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=sai.praneeth.prakhya@intel.com \
    --cc=shli@fb.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@linux.intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.