All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <fweisbec@gmail.com>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Christoph Lameter <cl@linux.com>,
	Kevin Hilman <khilman@linaro.org>,
	Mike Galbraith <bitbucket@online.de>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Tejun Heo <tj@kernel.org>, Viresh Kumar <viresh.kumar@linaro.org>
Subject: [PATCH 3/3] workqueue: Add anon workqueue sysfs hierarchy
Date: Fri, 14 Mar 2014 17:38:51 +0100	[thread overview]
Message-ID: <1394815131-17271-4-git-send-email-fweisbec@gmail.com> (raw)
In-Reply-To: <1394815131-17271-1-git-send-email-fweisbec@gmail.com>

We call "anon workqueues" the set of unbound workqueues that don't
carry the WQ_SYSFS flag.

They are a problem nowaday because people who work on CPU isolation
(HPC, Real time, etc...) want to be able to migrate all the unbound
workqueues away to a single CPU. This control is possible through sysfs
but only with WQ_SYSFS workqueues.

Now we need to deal with the other unbound workqueues. There is two
possible solutions:

1) Implement a sysfs directory for each unbound !WQ_SYSFS. This could
be done with a specific Kconfig to make sure that these workqueue
won't be considered as a stable ABI. But we all know that all distros
will enable this Kconfig symbol and that a warning in the Kconfig help
text won't protect against anything.

2) Implement a single sysfs directory containing only the cpumask file
to the control the affinity of all the !WQ_SYSFS workqueues.

This patch implements the second solution.

Two issues I have seen though:

* This triggers the following warning in apply_workqueue_attrs():

	/* creating multiple pwqs breaks ordering guarantee */
	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
		return -EINVAL;

  I haven't yet checked into the details.

* wq_calc_node_cpumask() tells that if NUMA affinity is not enabled,
  cpumask is always used. Which suggest that if NUMA affinity is enabled
  the cpumask may be ignored?

Cc: Christoph Lameter <cl@linux.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Not-Yet-Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/workqueue.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad8f727..aabee1f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -289,7 +289,8 @@ static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
 
 static DEFINE_MUTEX(wq_pool_mutex);	/* protects pools and workqueues list */
-static DEFINE_MUTEX(wq_unbound_mutex);	/* protects list of unbound workqueues */
+/* protects list of unbound workqueues and wq_anon_cpumask*/
+static DEFINE_MUTEX(wq_unbound_mutex);
 static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 
 static LIST_HEAD(workqueues);		/* PL: list of all workqueues */
@@ -3311,13 +3312,122 @@ static struct device_attribute wq_sysfs_unbound_attrs[] = {
 	__ATTR_NULL,
 };
 
+/* Protected by wq_unbound_mutex */
+static cpumask_t wq_anon_cpumask;
+static ssize_t wq_anon_cpumask_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	int written;
+
+	mutex_lock(&wq_unbound_mutex);
+	written = cpumask_scnprintf(buf, PAGE_SIZE, &wq_anon_cpumask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+
+	return written;
+}
+
+/* Must be called with wq_unbound_mutex held */
+static int wq_anon_cpumask_set(cpumask_var_t cpumask)
+{
+	struct workqueue_attrs *attrs;
+	struct workqueue_struct *wq;
+	int ret;
+
+	list_for_each_entry(wq, &workqueues_unbound, unbound_list) {
+		if (wq->flags & WQ_SYSFS)
+			continue;
+		attrs = wq_sysfs_prep_attrs(wq);
+		if (!attrs)
+			return -ENOMEM;
+
+		cpumask_copy(attrs->cpumask, cpumask);
+		ret = apply_workqueue_attrs(wq, attrs);
+		free_workqueue_attrs(attrs);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static ssize_t wq_anon_cpumask_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	cpumask_var_t cpumask;
+	int ret = -EINVAL;
+
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, cpumask);
+	if (ret)
+		goto out;
+
+	get_online_cpus();
+	if (cpumask_intersects(cpumask, cpu_online_mask)) {
+		mutex_lock(&wq_unbound_mutex);
+		ret = wq_anon_cpumask_set(cpumask);
+		if (!ret)
+			cpumask_copy(&wq_anon_cpumask, cpumask);
+		mutex_unlock(&wq_unbound_mutex);
+	}
+	put_online_cpus();
+out:
+	free_cpumask_var(cpumask);
+	return ret ? ret : count;
+}
+
+static void device_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct device_attribute wq_sysfs_anon_attr =
+	__ATTR(cpumask, 0644, wq_anon_cpumask_show, wq_anon_cpumask_store);
+
 static struct bus_type wq_subsys = {
 	.name				= "workqueue",
 };
 
 static int __init wq_sysfs_init(void)
 {
-	return subsys_virtual_register(&wq_subsys, NULL);
+	struct device *anon_dev;
+	int ret;
+
+	ret = subsys_virtual_register(&wq_subsys, NULL);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&wq_unbound_mutex);
+	cpumask_copy(&wq_anon_cpumask, cpu_possible_mask);
+	mutex_unlock(&wq_unbound_mutex);
+
+	anon_dev = kzalloc(sizeof(*anon_dev), GFP_KERNEL);
+	if (!anon_dev)
+		return -ENOMEM;
+
+	anon_dev->bus = &wq_subsys;
+	anon_dev->init_name = "anon_wqs";
+	anon_dev->release = device_release;
+
+	ret = device_register(anon_dev);
+	if (ret) {
+		kfree(anon_dev);
+		return ret;
+	}
+
+	ret = device_create_file(anon_dev, &wq_sysfs_anon_attr);
+	if (ret) {
+		device_unregister(anon_dev);
+		return ret;
+	}
+
+	kobject_uevent(&anon_dev->kobj, KOBJ_ADD);
+
+	return 0;
 }
 core_initcall(wq_sysfs_init);
 
-- 
1.8.3.1


  parent reply	other threads:[~2014-03-14 16:39 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-14 16:38 [RFC PATCH 0/3] workqueue: Control cpu affinity of !WQ_SYSFS unbound workqueues Frederic Weisbecker
2014-03-14 16:38 ` [PATCH 1/3] workqueue: Move workqueue bus attr to device attribute Frederic Weisbecker
2014-03-14 16:38 ` [PATCH 2/3] workqueues: Account unbound workqueue in a seperate list Frederic Weisbecker
2014-03-14 18:17   ` Kevin Hilman
2014-03-15 12:40     ` Frederic Weisbecker
2014-03-14 16:38 ` Frederic Weisbecker [this message]
2014-03-14 19:08   ` [PATCH 3/3] workqueue: Add anon workqueue sysfs hierarchy Kevin Hilman
2014-03-17 14:02     ` Frederic Weisbecker
2014-03-22 17:01     ` Frederic Weisbecker
2014-03-22 18:55       ` Tejun Heo
2014-03-22 22:04         ` Frederic Weisbecker
2014-03-14 18:06 ` [RFC PATCH 0/3] workqueue: Control cpu affinity of !WQ_SYSFS unbound workqueues Kevin Hilman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1394815131-17271-4-git-send-email-fweisbec@gmail.com \
    --to=fweisbec@gmail.com \
    --cc=bitbucket@online.de \
    --cc=cl@linux.com \
    --cc=khilman@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=tj@kernel.org \
    --cc=viresh.kumar@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.