* [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-18 1:16 ` Li Zefan
0 siblings, 0 replies; 6+ messages in thread
From: Li Zefan @ 2013-02-18 1:16 UTC (permalink / raw)
To: Tejun Heo; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
rename() will change dentry->d_name. The result of this race can
be worse than seeing partially rewritten name, but we might access
a stale pointer because rename() will re-allocate memory to hold
a longer name.
As accessing dentry->name must be protected by dentry->d_lock or
parent inode's i_mutex, while on the other hand cgroup-path() can
be called with some irq-safe spinlocks held, we can't generate
cgroup path using dentry->d_name.
Alternatively we make a copy of dentry->d_name and save it in
cgrp->name when a cgroup is created, and update cgrp->name at
rename().
v2: make cgrp->name RCU safe.
Signed-off-by: Li Zefan <lizefan@huawei.com>
---
block/blk-cgroup.h | 2 -
include/linux/cgroup.h | 1 +
kernel/cgroup.c | 113 ++++++++++++++++++++++++++++++++++++-------------
3 files changed, 85 insertions(+), 31 deletions(-)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2459730..e2e3404 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -216,9 +216,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
int ret;
- rcu_read_lock();
ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
- rcu_read_unlock();
if (ret)
strncpy(buf, "<unavailable>", buflen);
return ret;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 900af59..a3d87d9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -171,6 +171,7 @@ struct cgroup {
struct cgroup *parent; /* my parent */
struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ char __rcu *name; /* a copy of dentry->d_name */
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5d4c92e..26c071c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -890,6 +890,7 @@ static void cgroup_free_fn(struct work_struct *work)
simple_xattrs_free(&cgrp->xattrs);
ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+ kfree(cgrp->name);
kfree(cgrp);
}
@@ -1565,6 +1566,18 @@ static int cgroup_get_rootdir(struct super_block *sb)
return 0;
}
+static char *cgroup_alloc_name(struct dentry *dentry)
+{
+ char *name;
+
+ name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ memcpy(name, dentry->d_name.name, dentry->d_name.len);
+ name[dentry->d_name.len] = '\0';
+ return name;
+}
+
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
@@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int i;
struct hlist_node *node;
struct css_set *cg;
+ struct dentry *dentry;
BUG_ON(sb->s_root != NULL);
ret = cgroup_get_rootdir(sb);
if (ret)
goto drop_new_super;
- inode = sb->s_root->d_inode;
+ dentry = sb->s_root;
+ inode = dentry->d_inode;
+
+ root_cgrp->name = cgroup_alloc_name(dentry);
+ if (!root_cgrp->name)
+ goto drop_new_super;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
@@ -1660,8 +1679,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
list_add(&root->root_list, &roots);
root_count++;
- sb->s_root->d_fsdata = root_cgrp;
- root->top_cgroup.dentry = sb->s_root;
+ dentry->d_fsdata = root_cgrp;
+ root->top_cgroup.dentry = dentry;
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
@@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
+ synchronize_rcu();
+ kfree(cgrp->name);
simple_xattrs_free(&cgrp->xattrs);
kill_litter_super(sb);
@@ -1771,49 +1792,47 @@ static struct kobject *cgroup_kobj;
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
- * Called with cgroup_mutex held or else with an RCU-protected cgroup
- * reference. Writes path of cgroup into buf. Returns 0 on success,
- * -errno on error.
+ * Writes path of cgroup into buf. Returns 0 on success, -errno on error.
+ *
+ * We can't generate cgroup path using dentry->d_name, as accessing
+ * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
+ * inode's i_mutex, while on the other hand cgroup_path() can be called
+ * with some irq-safe spinlocks held.
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
- struct dentry *dentry = cgrp->dentry;
+ int ret = -ENAMETOOLONG;
char *start;
- rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
- "cgroup_path() called without proper locking");
-
- if (cgrp == dummytop) {
- /*
- * Inactive subsystems have no dentry for their root
- * cgroup
- */
- strcpy(buf, "/");
- return 0;
- }
-
start = buf + buflen - 1;
-
*start = '\0';
- for (;;) {
- int len = dentry->d_name.len;
+ rcu_read_lock();
+ while (true) {
+ char *p;
+ int len;
+
+ p = rcu_dereference(cgrp->name);
+ len = strlen(p);
if ((start -= len) < buf)
- return -ENAMETOOLONG;
- memcpy(start, dentry->d_name.name, len);
+ goto fail;
+ memcpy(start, p, len);
+
cgrp = cgrp->parent;
if (!cgrp)
break;
- dentry = cgrp->dentry;
if (!cgrp->parent)
continue;
if (--start < buf)
- return -ENAMETOOLONG;
+ goto fail;
*start = '/';
}
+ ret = 0;
memmove(buf, start, buf + buflen - start);
- return 0;
+fail:
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL_GPL(cgroup_path);
@@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
+ int ret;
+ char *name, *old_name;
+ struct cgroup *cgrp;
+
+ /*
+ * It's convinient to use parent dir's i_mutex to protected
+ * cgrp->name.
+ */
+ lockdep_assert_held(&old_dir->i_mutex);
+
if (!S_ISDIR(old_dentry->d_inode->i_mode))
return -ENOTDIR;
if (new_dentry->d_inode)
return -EEXIST;
if (old_dir != new_dir)
return -EIO;
- return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+
+ cgrp = __d_cgrp(old_dentry);
+
+ name = cgroup_alloc_name(new_dentry);
+ if (!name)
+ return -ENOMEM;
+
+ ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+ if (ret) {
+ kfree(name);
+ return ret;
+ }
+
+ old_name = cgrp->name;
+ rcu_assign_pointer(cgrp->name, name);
+
+ synchronize_rcu();
+ kfree(old_name);
+ return 0;
}
static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
@@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
if (!cgrp)
return -ENOMEM;
+ cgrp->name = cgroup_alloc_name(dentry);
+ if (!cgrp->name)
+ goto err_free_cgrp;
+
cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
- goto err_free_cgrp;
+ goto err_free_name;
/*
* Only live parents can have children. Note that the liveliness
@@ -4252,6 +4303,8 @@ err_free_all:
deactivate_super(sb);
err_free_id:
ida_simple_remove(&root->cgroup_ida, cgrp->id);
+err_free_name:
+ kfree(cgrp->name);
err_free_cgrp:
kfree(cgrp);
return err;
@@ -4656,6 +4709,8 @@ int __init cgroup_init_early(void)
root_count = 1;
init_task.cgroups = &init_css_set;
+ dummytop->name = "/";
+
init_css_set_link.cg = &init_css_set;
init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
--
1.8.0.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-18 1:16 ` Li Zefan
0 siblings, 0 replies; 6+ messages in thread
From: Li Zefan @ 2013-02-18 1:16 UTC (permalink / raw)
To: Tejun Heo; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
rename() will change dentry->d_name. The result of this race can
be worse than seeing partially rewritten name, but we might access
a stale pointer because rename() will re-allocate memory to hold
a longer name.
As accessing dentry->name must be protected by dentry->d_lock or
parent inode's i_mutex, while on the other hand cgroup-path() can
be called with some irq-safe spinlocks held, we can't generate
cgroup path using dentry->d_name.
Alternatively we make a copy of dentry->d_name and save it in
cgrp->name when a cgroup is created, and update cgrp->name at
rename().
v2: make cgrp->name RCU safe.
Signed-off-by: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
block/blk-cgroup.h | 2 -
include/linux/cgroup.h | 1 +
kernel/cgroup.c | 113 ++++++++++++++++++++++++++++++++++++-------------
3 files changed, 85 insertions(+), 31 deletions(-)
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2459730..e2e3404 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -216,9 +216,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
int ret;
- rcu_read_lock();
ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
- rcu_read_unlock();
if (ret)
strncpy(buf, "<unavailable>", buflen);
return ret;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 900af59..a3d87d9 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -171,6 +171,7 @@ struct cgroup {
struct cgroup *parent; /* my parent */
struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ char __rcu *name; /* a copy of dentry->d_name */
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5d4c92e..26c071c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -890,6 +890,7 @@ static void cgroup_free_fn(struct work_struct *work)
simple_xattrs_free(&cgrp->xattrs);
ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+ kfree(cgrp->name);
kfree(cgrp);
}
@@ -1565,6 +1566,18 @@ static int cgroup_get_rootdir(struct super_block *sb)
return 0;
}
+static char *cgroup_alloc_name(struct dentry *dentry)
+{
+ char *name;
+
+ name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
+ if (!name)
+ return NULL;
+ memcpy(name, dentry->d_name.name, dentry->d_name.len);
+ name[dentry->d_name.len] = '\0';
+ return name;
+}
+
static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data)
@@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int i;
struct hlist_node *node;
struct css_set *cg;
+ struct dentry *dentry;
BUG_ON(sb->s_root != NULL);
ret = cgroup_get_rootdir(sb);
if (ret)
goto drop_new_super;
- inode = sb->s_root->d_inode;
+ dentry = sb->s_root;
+ inode = dentry->d_inode;
+
+ root_cgrp->name = cgroup_alloc_name(dentry);
+ if (!root_cgrp->name)
+ goto drop_new_super;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
@@ -1660,8 +1679,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
list_add(&root->root_list, &roots);
root_count++;
- sb->s_root->d_fsdata = root_cgrp;
- root->top_cgroup.dentry = sb->s_root;
+ dentry->d_fsdata = root_cgrp;
+ root->top_cgroup.dentry = dentry;
/* Link the top cgroup in this hierarchy into all
* the css_set objects */
@@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
mutex_unlock(&cgroup_root_mutex);
mutex_unlock(&cgroup_mutex);
+ synchronize_rcu();
+ kfree(cgrp->name);
simple_xattrs_free(&cgrp->xattrs);
kill_litter_super(sb);
@@ -1771,49 +1792,47 @@ static struct kobject *cgroup_kobj;
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
- * Called with cgroup_mutex held or else with an RCU-protected cgroup
- * reference. Writes path of cgroup into buf. Returns 0 on success,
- * -errno on error.
+ * Writes path of cgroup into buf. Returns 0 on success, -errno on error.
+ *
+ * We can't generate cgroup path using dentry->d_name, as accessing
+ * dentry->name must be protected by irq-unsafe dentry->d_lock or parent
+ * inode's i_mutex, while on the other hand cgroup_path() can be called
+ * with some irq-safe spinlocks held.
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
- struct dentry *dentry = cgrp->dentry;
+ int ret = -ENAMETOOLONG;
char *start;
- rcu_lockdep_assert(rcu_read_lock_held() || cgroup_lock_is_held(),
- "cgroup_path() called without proper locking");
-
- if (cgrp == dummytop) {
- /*
- * Inactive subsystems have no dentry for their root
- * cgroup
- */
- strcpy(buf, "/");
- return 0;
- }
-
start = buf + buflen - 1;
-
*start = '\0';
- for (;;) {
- int len = dentry->d_name.len;
+ rcu_read_lock();
+ while (true) {
+ char *p;
+ int len;
+
+ p = rcu_dereference(cgrp->name);
+ len = strlen(p);
if ((start -= len) < buf)
- return -ENAMETOOLONG;
- memcpy(start, dentry->d_name.name, len);
+ goto fail;
+ memcpy(start, p, len);
+
cgrp = cgrp->parent;
if (!cgrp)
break;
- dentry = cgrp->dentry;
if (!cgrp->parent)
continue;
if (--start < buf)
- return -ENAMETOOLONG;
+ goto fail;
*start = '/';
}
+ ret = 0;
memmove(buf, start, buf + buflen - start);
- return 0;
+fail:
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL_GPL(cgroup_path);
@@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
+ int ret;
+ char *name, *old_name;
+ struct cgroup *cgrp;
+
+ /*
+ * It's convinient to use parent dir's i_mutex to protected
+ * cgrp->name.
+ */
+ lockdep_assert_held(&old_dir->i_mutex);
+
if (!S_ISDIR(old_dentry->d_inode->i_mode))
return -ENOTDIR;
if (new_dentry->d_inode)
return -EEXIST;
if (old_dir != new_dir)
return -EIO;
- return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+
+ cgrp = __d_cgrp(old_dentry);
+
+ name = cgroup_alloc_name(new_dentry);
+ if (!name)
+ return -ENOMEM;
+
+ ret = simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+ if (ret) {
+ kfree(name);
+ return ret;
+ }
+
+ old_name = cgrp->name;
+ rcu_assign_pointer(cgrp->name, name);
+
+ synchronize_rcu();
+ kfree(old_name);
+ return 0;
}
static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
@@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
if (!cgrp)
return -ENOMEM;
+ cgrp->name = cgroup_alloc_name(dentry);
+ if (!cgrp->name)
+ goto err_free_cgrp;
+
cgrp->id = ida_simple_get(&root->cgroup_ida, 1, 0, GFP_KERNEL);
if (cgrp->id < 0)
- goto err_free_cgrp;
+ goto err_free_name;
/*
* Only live parents can have children. Note that the liveliness
@@ -4252,6 +4303,8 @@ err_free_all:
deactivate_super(sb);
err_free_id:
ida_simple_remove(&root->cgroup_ida, cgrp->id);
+err_free_name:
+ kfree(cgrp->name);
err_free_cgrp:
kfree(cgrp);
return err;
@@ -4656,6 +4709,8 @@ int __init cgroup_init_early(void)
root_count = 1;
init_task.cgroups = &init_css_set;
+ dummytop->name = "/";
+
init_css_set_link.cg = &init_css_set;
init_css_set_link.cgrp = dummytop;
list_add(&init_css_set_link.cgrp_link_list,
--
1.8.0.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-18 17:30 ` Tejun Heo
0 siblings, 0 replies; 6+ messages in thread
From: Tejun Heo @ 2013-02-18 17:30 UTC (permalink / raw)
To: Li Zefan; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
Hello, Li.
On Mon, Feb 18, 2013 at 09:16:48AM +0800, Li Zefan wrote:
> @@ -171,6 +171,7 @@ struct cgroup {
>
> struct cgroup *parent; /* my parent */
> struct dentry *dentry; /* cgroup fs entry, RCU protected */
> + char __rcu *name; /* a copy of dentry->d_name */
A brief explanation of why this is necessary and how rcu is used would
be nice.
> +static char *cgroup_alloc_name(struct dentry *dentry)
> +{
> + char *name;
> +
> + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
> + if (!name)
> + return NULL;
> + memcpy(name, dentry->d_name.name, dentry->d_name.len);
> + name[dentry->d_name.len] = '\0';
> + return name;
> +}
While d_name has length field, it's always properly NULL terminated,
so kstrdup() should suffice here. Right, Al?
> @@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
...
> - inode = sb->s_root->d_inode;
> + dentry = sb->s_root;
> + inode = dentry->d_inode;
> +
> + root_cgrp->name = cgroup_alloc_name(dentry);
> + if (!root_cgrp->name)
> + goto drop_new_super;
Don't we need an RCU assignment? Is it safe because it isn't online
yet? But wouldn't this still trigger sparse warning?
> @@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
> mutex_unlock(&cgroup_root_mutex);
> mutex_unlock(&cgroup_mutex);
>
> + synchronize_rcu();
An explanation on what we're synchronizing would be nice. Barriers
without explanation sucks because there's nothing directly linking the
barriers to the things which are being protected.
> @@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
> static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
> struct inode *new_dir, struct dentry *new_dentry)
> {
...
> + old_name = cgrp->name;
> + rcu_assign_pointer(cgrp->name, name);
> +
> + synchronize_rcu();
Please don't call synchronize_rcu() from interface which is directly
visible to userland. It leads to sporadic difficult-to-reproduce
latencies which hurt enough in corner cases and this is kmalloc
memory. It's not like kfree_rcu() is difficult to use or anything.
> + kfree(old_name);
> + return 0;
> }
>
> static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
> @@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
> if (!cgrp)
> return -ENOMEM;
>
> + cgrp->name = cgroup_alloc_name(dentry);
> + if (!cgrp->name)
> + goto err_free_cgrp;
Ditto with assignment.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-18 17:30 ` Tejun Heo
0 siblings, 0 replies; 6+ messages in thread
From: Tejun Heo @ 2013-02-18 17:30 UTC (permalink / raw)
To: Li Zefan; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
Hello, Li.
On Mon, Feb 18, 2013 at 09:16:48AM +0800, Li Zefan wrote:
> @@ -171,6 +171,7 @@ struct cgroup {
>
> struct cgroup *parent; /* my parent */
> struct dentry *dentry; /* cgroup fs entry, RCU protected */
> + char __rcu *name; /* a copy of dentry->d_name */
A brief explanation of why this is necessary and how rcu is used would
be nice.
> +static char *cgroup_alloc_name(struct dentry *dentry)
> +{
> + char *name;
> +
> + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
> + if (!name)
> + return NULL;
> + memcpy(name, dentry->d_name.name, dentry->d_name.len);
> + name[dentry->d_name.len] = '\0';
> + return name;
> +}
While d_name has length field, it's always properly NULL terminated,
so kstrdup() should suffice here. Right, Al?
> @@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
...
> - inode = sb->s_root->d_inode;
> + dentry = sb->s_root;
> + inode = dentry->d_inode;
> +
> + root_cgrp->name = cgroup_alloc_name(dentry);
> + if (!root_cgrp->name)
> + goto drop_new_super;
Don't we need an RCU assignment? Is it safe because it isn't online
yet? But wouldn't this still trigger sparse warning?
> @@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
> mutex_unlock(&cgroup_root_mutex);
> mutex_unlock(&cgroup_mutex);
>
> + synchronize_rcu();
An explanation on what we're synchronizing would be nice. Barriers
without explanation sucks because there's nothing directly linking the
barriers to the things which are being protected.
> @@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
> static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
> struct inode *new_dir, struct dentry *new_dentry)
> {
...
> + old_name = cgrp->name;
> + rcu_assign_pointer(cgrp->name, name);
> +
> + synchronize_rcu();
Please don't call synchronize_rcu() from interface which is directly
visible to userland. It leads to sporadic difficult-to-reproduce
latencies which hurt enough in corner cases and this is kmalloc
memory. It's not like kfree_rcu() is difficult to use or anything.
> + kfree(old_name);
> + return 0;
> }
>
> static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
> @@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
> if (!cgrp)
> return -ENOMEM;
>
> + cgrp->name = cgroup_alloc_name(dentry);
> + if (!cgrp->name)
> + goto err_free_cgrp;
Ditto with assignment.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-19 1:44 ` Li Zefan
0 siblings, 0 replies; 6+ messages in thread
From: Li Zefan @ 2013-02-19 1:44 UTC (permalink / raw)
To: Tejun Heo; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
On 2013/2/19 1:30, Tejun Heo wrote:
> Hello, Li.
>
> On Mon, Feb 18, 2013 at 09:16:48AM +0800, Li Zefan wrote:
>> @@ -171,6 +171,7 @@ struct cgroup {
>>
>> struct cgroup *parent; /* my parent */
>> struct dentry *dentry; /* cgroup fs entry, RCU protected */
>> + char __rcu *name; /* a copy of dentry->d_name */
>
> A brief explanation of why this is necessary and how rcu is used would
> be nice.
>
The comments in cgroup_path() explains why we can't use dentry->d_name,
which suggests why cgrp->name is needed. I'll revise the comments there,
and add comments on the rcu thing.
>> +static char *cgroup_alloc_name(struct dentry *dentry)
>> +{
>> + char *name;
>> +
>> + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
>> + if (!name)
>> + return NULL;
>> + memcpy(name, dentry->d_name.name, dentry->d_name.len);
>> + name[dentry->d_name.len] = '\0';
>> + return name;
>> +}
>
> While d_name has length field, it's always properly NULL terminated,
> so kstrdup() should suffice here. Right, Al?
>
Oh you're right. We pass dentry->d_name.name to printk %s in other places.
>> @@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
> ...
>> - inode = sb->s_root->d_inode;
>> + dentry = sb->s_root;
>> + inode = dentry->d_inode;
>> +
>> + root_cgrp->name = cgroup_alloc_name(dentry);
>> + if (!root_cgrp->name)
>> + goto drop_new_super;
>
> Don't we need an RCU assignment? Is it safe because it isn't online
> yet? But wouldn't this still trigger sparse warning?
>
Yeah, it's safe.
To be frank, I haven't used sparse for years. Will check.
>> @@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
>> mutex_unlock(&cgroup_root_mutex);
>> mutex_unlock(&cgroup_mutex);
>>
>> + synchronize_rcu();
>
> An explanation on what we're synchronizing would be nice. Barriers
> without explanation sucks because there's nothing directly linking the
> barriers to the things which are being protected.
>
>> @@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
>> static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
>> struct inode *new_dir, struct dentry *new_dentry)
>> {
> ...
>> + old_name = cgrp->name;
>> + rcu_assign_pointer(cgrp->name, name);
>> +
>> + synchronize_rcu();
>
> Please don't call synchronize_rcu() from interface which is directly
> visible to userland. It leads to sporadic difficult-to-reproduce
> latencies which hurt enough in corner cases and this is kmalloc
> memory. It's not like kfree_rcu() is difficult to use or anything.
>
ok
>> + kfree(old_name);
>> + return 0;
>> }
>>
>> static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
>> @@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
>> if (!cgrp)
>> return -ENOMEM;
>>
>> + cgrp->name = cgroup_alloc_name(dentry);
>> + if (!cgrp->name)
>> + goto err_free_cgrp;
>
> Ditto with assignment.
>
> Thanks.
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2
@ 2013-02-19 1:44 ` Li Zefan
0 siblings, 0 replies; 6+ messages in thread
From: Li Zefan @ 2013-02-19 1:44 UTC (permalink / raw)
To: Tejun Heo; +Cc: Al Viro, Sasha Levin, LKML, Cgroups
On 2013/2/19 1:30, Tejun Heo wrote:
> Hello, Li.
>
> On Mon, Feb 18, 2013 at 09:16:48AM +0800, Li Zefan wrote:
>> @@ -171,6 +171,7 @@ struct cgroup {
>>
>> struct cgroup *parent; /* my parent */
>> struct dentry *dentry; /* cgroup fs entry, RCU protected */
>> + char __rcu *name; /* a copy of dentry->d_name */
>
> A brief explanation of why this is necessary and how rcu is used would
> be nice.
>
The comments in cgroup_path() explains why we can't use dentry->d_name,
which suggests why cgrp->name is needed. I'll revise the comments there,
and add comments on the rcu thing.
>> +static char *cgroup_alloc_name(struct dentry *dentry)
>> +{
>> + char *name;
>> +
>> + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
>> + if (!name)
>> + return NULL;
>> + memcpy(name, dentry->d_name.name, dentry->d_name.len);
>> + name[dentry->d_name.len] = '\0';
>> + return name;
>> +}
>
> While d_name has length field, it's always properly NULL terminated,
> so kstrdup() should suffice here. Right, Al?
>
Oh you're right. We pass dentry->d_name.name to printk %s in other places.
>> @@ -1613,13 +1626,19 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
> ...
>> - inode = sb->s_root->d_inode;
>> + dentry = sb->s_root;
>> + inode = dentry->d_inode;
>> +
>> + root_cgrp->name = cgroup_alloc_name(dentry);
>> + if (!root_cgrp->name)
>> + goto drop_new_super;
>
> Don't we need an RCU assignment? Is it safe because it isn't online
> yet? But wouldn't this still trigger sparse warning?
>
Yeah, it's safe.
To be frank, I haven't used sparse for years. Will check.
>> @@ -1751,6 +1770,8 @@ static void cgroup_kill_sb(struct super_block *sb) {
>> mutex_unlock(&cgroup_root_mutex);
>> mutex_unlock(&cgroup_mutex);
>>
>> + synchronize_rcu();
>
> An explanation on what we're synchronizing would be nice. Barriers
> without explanation sucks because there's nothing directly linking the
> barriers to the things which are being protected.
>
>> @@ -2539,13 +2558,41 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
>> static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
>> struct inode *new_dir, struct dentry *new_dentry)
>> {
> ...
>> + old_name = cgrp->name;
>> + rcu_assign_pointer(cgrp->name, name);
>> +
>> + synchronize_rcu();
>
> Please don't call synchronize_rcu() from interface which is directly
> visible to userland. It leads to sporadic difficult-to-reproduce
> latencies which hurt enough in corner cases and this is kmalloc
> memory. It's not like kfree_rcu() is difficult to use or anything.
>
ok
>> + kfree(old_name);
>> + return 0;
>> }
>>
>> static struct simple_xattrs *__d_xattrs(struct dentry *dentry)
>> @@ -4144,9 +4191,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
>> if (!cgrp)
>> return -ENOMEM;
>>
>> + cgrp->name = cgroup_alloc_name(dentry);
>> + if (!cgrp->name)
>> + goto err_free_cgrp;
>
> Ditto with assignment.
>
> Thanks.
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2013-02-19 1:45 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-18 1:16 [PATCH v2 2/2] cgroup: fix cgroup_path() vs rename() race, take 2 Li Zefan
2013-02-18 1:16 ` Li Zefan
2013-02-18 17:30 ` Tejun Heo
2013-02-18 17:30 ` Tejun Heo
2013-02-19 1:44 ` Li Zefan
2013-02-19 1:44 ` Li Zefan
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.