* [PATCH 01/11] sysfs: Support for preventing unmounts.
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
@ 2008-05-06 17:30 ` Benjamin Thery
2008-05-06 17:30 ` [PATCH 02/11] sysfs: sysfs_get_dentry add a sb parameter Benjamin Thery
` (10 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:30 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: Support for preventing unmounts.
To support mounting multiple instances of sysfs occassionally I
need to walk through all of the currently present sysfs super blocks.
To allow this iteration this patch adds sysfs_grab_supers
and sysfs_release_supers. While a piece of code is in
a section surrounded by these no more sysfs super blocks
will be either created or destroyed.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/mount.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++------
fs/sysfs/sysfs.h | 10 ++++++
2 files changed, 81 insertions(+), 8 deletions(-)
Index: linux-vanilla/fs/sysfs/mount.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/mount.c
+++ linux-vanilla/fs/sysfs/mount.c
@@ -41,47 +41,110 @@ struct sysfs_dirent sysfs_root = {
static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
{
- struct inode *inode;
- struct dentry *root;
+ struct sysfs_super_info *info = NULL;
+ struct inode *inode = NULL;
+ struct dentry *root = NULL;
+ int error;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
sb->s_magic = SYSFS_MAGIC;
sb->s_op = &sysfs_ops;
sb->s_time_gran = 1;
- sysfs_sb = sb;
+ if (!sysfs_sb)
+ sysfs_sb = sb;
+
+ error = -ENOMEM;
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ goto out_err;
/* get root inode, initialize and unlock it */
+ error = -ENOMEM;
inode = sysfs_get_inode(&sysfs_root);
if (!inode) {
pr_debug("sysfs: could not get root inode\n");
- return -ENOMEM;
+ goto out_err;
}
/* instantiate and link root dentry */
+ error = -ENOMEM;
root = d_alloc_root(inode);
if (!root) {
pr_debug("%s: could not get root dentry!\n",__func__);
- iput(inode);
- return -ENOMEM;
+ goto out_err;
}
root->d_fsdata = &sysfs_root;
sb->s_root = root;
+ sb->s_fs_info = info;
return 0;
+
+out_err:
+ dput(root);
+ iput(inode);
+ kfree(info);
+ if (sysfs_sb == sb)
+ sysfs_sb = NULL;
+ return error;
}
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ int rc;
+ mutex_lock(&sysfs_rename_mutex);
+ rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ mutex_unlock(&sysfs_rename_mutex);
+ return rc;
}
-static struct file_system_type sysfs_fs_type = {
+struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
.kill_sb = kill_anon_super,
};
+void sysfs_grab_supers(void)
+{
+ /* must hold sysfs_rename_mutex */
+ struct super_block *sb;
+ /* Loop until I have taken s_umount on all sysfs superblocks */
+restart:
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ if (sysfs_info(sb)->grabbed)
+ continue;
+ /* Wait for unmount activity to complete. */
+ if (sb->s_count < S_BIAS) {
+ sb->s_count += 1;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+ drop_super(sb);
+ goto restart;
+ }
+ atomic_inc(&sb->s_active);
+ sysfs_info(sb)->grabbed = 1;
+ }
+ spin_unlock(&sb_lock);
+}
+
+void sysfs_release_supers(void)
+{
+ /* must hold sysfs_rename_mutex */
+ struct super_block *sb;
+restart:
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ if (!sysfs_info(sb)->grabbed)
+ continue;
+ sysfs_info(sb)->grabbed = 0;
+ spin_unlock(&sb_lock);
+ deactivate_super(sb);
+ goto restart;
+ }
+ spin_unlock(&sb_lock);
+}
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
Index: linux-vanilla/fs/sysfs/sysfs.h
===================================================================
--- linux-vanilla.orig/fs/sysfs/sysfs.h
+++ linux-vanilla/fs/sysfs/sysfs.h
@@ -85,12 +85,22 @@ struct sysfs_addrm_cxt {
int cnt;
};
+struct sysfs_super_info {
+ int grabbed;
+};
+
+#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
+
/*
* mount.c
*/
extern struct sysfs_dirent sysfs_root;
extern struct super_block *sysfs_sb;
extern struct kmem_cache *sysfs_dir_cachep;
+extern struct file_system_type sysfs_fs_type;
+
+void sysfs_grab_supers(void);
+void sysfs_release_supers(void);
/*
* dir.c
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 02/11] sysfs: sysfs_get_dentry add a sb parameter
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
2008-05-06 17:30 ` [PATCH 01/11] sysfs: Support for preventing unmounts Benjamin Thery
@ 2008-05-06 17:30 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 03/11] sysfs: Implement __sysfs_get_dentry Benjamin Thery
` (9 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:30 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: sysfs_get_dentry add a sb parameter
In preparation for multiple mounts of sysfs add a superblock parameter to
sysfs_get_dentry.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 12 +++++++-----
fs/sysfs/file.c | 2 +-
fs/sysfs/sysfs.h | 3 ++-
3 files changed, 10 insertions(+), 7 deletions(-)
Index: linux-vanilla/fs/sysfs/dir.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/dir.c
+++ linux-vanilla/fs/sysfs/dir.c
@@ -85,6 +85,7 @@ static void sysfs_unlink_sibling(struct
/**
* sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
* @sd: sysfs_dirent of interest
*
* Get dentry for @sd. Dentry is looked up if currently not
@@ -97,9 +98,10 @@ static void sysfs_unlink_sibling(struct
* RETURNS:
* Pointer to found dentry on success, ERR_PTR() value on error.
*/
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+struct dentry *sysfs_get_dentry(struct super_block *sb,
+ struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sysfs_sb->s_root);
+ struct dentry *dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
@@ -781,7 +783,7 @@ int sysfs_rename_dir(struct kobject * ko
goto out; /* nothing to rename */
/* get the original dentry */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
old_dentry = NULL;
@@ -849,7 +851,7 @@ int sysfs_move_dir(struct kobject *kobj,
goto out; /* nothing to move */
/* get dentries */
- old_dentry = sysfs_get_dentry(sd);
+ old_dentry = sysfs_get_dentry(sysfs_sb, sd);
if (IS_ERR(old_dentry)) {
error = PTR_ERR(old_dentry);
old_dentry = NULL;
@@ -857,7 +859,7 @@ int sysfs_move_dir(struct kobject *kobj,
}
old_parent = old_dentry->d_parent;
- new_parent = sysfs_get_dentry(new_parent_sd);
+ new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd);
if (IS_ERR(new_parent)) {
error = PTR_ERR(new_parent);
new_parent = NULL;
Index: linux-vanilla/fs/sysfs/file.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/file.c
+++ linux-vanilla/fs/sysfs/file.c
@@ -571,7 +571,7 @@ int sysfs_chmod_file(struct kobject *kob
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(victim_sd);
+ victim = sysfs_get_dentry(sysfs_sb, victim_sd);
mutex_unlock(&sysfs_rename_mutex);
if (IS_ERR(victim)) {
rc = PTR_ERR(victim);
Index: linux-vanilla/fs/sysfs/sysfs.h
===================================================================
--- linux-vanilla.orig/fs/sysfs/sysfs.h
+++ linux-vanilla/fs/sysfs/sysfs.h
@@ -112,7 +112,8 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+struct dentry *sysfs_get_dentry(struct super_block *sb,
+ struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
void sysfs_put_active_two(struct sysfs_dirent *sd);
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 03/11] sysfs: Implement __sysfs_get_dentry
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
2008-05-06 17:30 ` [PATCH 01/11] sysfs: Support for preventing unmounts Benjamin Thery
2008-05-06 17:30 ` [PATCH 02/11] sysfs: sysfs_get_dentry add a sb parameter Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 04/11] sysfs: Rename Support multiple superblocks Benjamin Thery
` (8 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: Implement __sysfs_get_dentry
This function is similar but much simpler to sysfs_get_dentry
returns a sysfs dentry if one curently exists.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
Index: linux-vanilla/fs/sysfs/dir.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/dir.c
+++ linux-vanilla/fs/sysfs/dir.c
@@ -768,6 +768,45 @@ void sysfs_remove_dir(struct kobject * k
__sysfs_remove_dir(sd);
}
+/**
+ * __sysfs_get_dentry - get dentry for the given sysfs_dirent
+ * @sb: superblock of the dentry to return
+ * @sd: sysfs_dirent of interest
+ *
+ * Get dentry for @sd. Only return a dentry if one currently
+ * exists.
+ *
+ * LOCKING:
+ * Kernel thread context (may sleep)
+ *
+ * RETURNS:
+ * Pointer to found dentry on success, NULL on failure.
+ */
+static struct dentry *__sysfs_get_dentry(struct super_block *sb,
+ struct sysfs_dirent *sd)
+{
+ struct inode *inode;
+ struct dentry *dentry = NULL;
+
+ inode = ilookup5_nowait(sysfs_sb, sd->s_ino, sysfs_ilookup_test, sd);
+ if (inode && !(inode->i_state & I_NEW)) {
+ struct dentry *alias;
+ spin_lock(&dcache_lock);
+ list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ if (!IS_ROOT(alias) && d_unhashed(alias))
+ continue;
+ if (alias->d_sb != sb)
+ continue;
+ dentry = alias;
+ dget_locked(dentry);
+ break;
+ }
+ spin_unlock(&dcache_lock);
+ }
+ iput(inode);
+ return dentry;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 04/11] sysfs: Rename Support multiple superblocks
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (2 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 03/11] sysfs: Implement __sysfs_get_dentry Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 05/11] sysfs: sysfs_chmod_file handle " Benjamin Thery
` (7 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: Rename Support multiple superblocks
This patch modifies the sysfs_rename_dir and sysfs_move_dir routines
to support multiple sysfs dentry tries rooted in different
sysfs superblocks.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 194 +++++++++++++++++++++++++++++++++++++++------------------
1 file changed, 136 insertions(+), 58 deletions(-)
Index: linux-vanilla/fs/sysfs/dir.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/dir.c
+++ linux-vanilla/fs/sysfs/dir.c
@@ -807,43 +807,112 @@ static struct dentry *__sysfs_get_dentry
return dentry;
}
+struct sysfs_rename_struct {
+ struct list_head list;
+ struct dentry *old_dentry;
+ struct dentry *new_dentry;
+ struct dentry *old_parent;
+ struct dentry *new_parent;
+};
+
+static void post_rename(struct list_head *head)
+{
+ struct sysfs_rename_struct *srs;
+ while (!list_empty(head)) {
+ srs = list_entry(head->next, struct sysfs_rename_struct, list);
+ dput(srs->old_dentry);
+ dput(srs->new_dentry);
+ dput(srs->old_parent);
+ dput(srs->new_parent);
+ list_del(&srs->list);
+ kfree(srs);
+ }
+}
+
+static int prep_rename(struct list_head *head,
+ struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd,
+ const char *name)
+{
+ struct sysfs_rename_struct *srs;
+ struct super_block *sb;
+ struct dentry *dentry;
+ int error;
+
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = sysfs_get_dentry(sb, sd);
+ if (dentry == ERR_PTR(-EXDEV))
+ continue;
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+
+ srs = kzalloc(sizeof(*srs), GFP_KERNEL);
+ if (!srs) {
+ dput(dentry);
+ goto err_out;
+ }
+
+ INIT_LIST_HEAD(&srs->list);
+ list_add(head, &srs->list);
+ srs->old_dentry = dentry;
+ srs->old_parent = dget(dentry->d_parent);
+
+ dentry = sysfs_get_dentry(sb, new_parent_sd);
+ if (IS_ERR(dentry)) {
+ error = PTR_ERR(dentry);
+ goto err_out;
+ }
+ srs->new_parent = dentry;
+
+ error = -ENOMEM;
+ dentry = d_alloc_name(srs->new_parent, name);
+ if (!dentry)
+ goto err_out;
+ srs->new_dentry = dentry;
+ }
+ return 0;
+
+err_out:
+ post_rename(head);
+ return error;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
- struct dentry *parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ struct list_head todo;
+ struct sysfs_rename_struct *srs;
+ struct inode *parent_inode = NULL;
const char *dup_name = NULL;
int error;
+ INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
error = 0;
if (strcmp(sd->s_name, new_name) == 0)
goto out; /* nothing to rename */
- /* get the original dentry */
- old_dentry = sysfs_get_dentry(sysfs_sb, sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
+ sysfs_grab_supers();
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
- parent = old_dentry->d_parent;
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ parent_inode = sysfs_get_inode(sd->s_parent);
+ mutex_unlock(&sysfs_mutex);
+ if (!parent_inode)
+ goto out_release;
- /* lock parent and get dentry for new name */
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&parent_inode->i_mutex);
mutex_lock(&sysfs_mutex);
error = -EEXIST;
if (sysfs_find_dirent(sd->s_parent, new_name))
goto out_unlock;
- error = -ENOMEM;
- new_dentry = d_alloc_name(parent, new_name);
- if (!new_dentry)
- goto out_unlock;
-
/* rename kobject and sysfs_dirent */
error = -ENOMEM;
new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
@@ -858,17 +927,21 @@ int sysfs_rename_dir(struct kobject * ko
sd->s_name = new_name;
/* rename */
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
+ list_for_each_entry(srs, &todo, list) {
+ d_add(srs->new_dentry, NULL);
+ d_move(srs->old_dentry, srs->new_dentry);
+ }
error = 0;
- out_unlock:
+out_unlock:
mutex_unlock(&sysfs_mutex);
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&parent_inode->i_mutex);
kfree(dup_name);
- dput(old_dentry);
- dput(new_dentry);
- out:
+out_release:
+ iput(parent_inode);
+ post_rename(&todo);
+ sysfs_release_supers();
+out:
mutex_unlock(&sysfs_rename_mutex);
return error;
}
@@ -877,10 +950,12 @@ int sysfs_move_dir(struct kobject *kobj,
{
struct sysfs_dirent *sd = kobj->sd;
struct sysfs_dirent *new_parent_sd;
- struct dentry *old_parent, *new_parent = NULL;
- struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ struct list_head todo;
+ struct sysfs_rename_struct *srs;
+ struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
@@ -889,26 +964,29 @@ int sysfs_move_dir(struct kobject *kobj,
if (sd->s_parent == new_parent_sd)
goto out; /* nothing to move */
- /* get dentries */
- old_dentry = sysfs_get_dentry(sysfs_sb, sd);
- if (IS_ERR(old_dentry)) {
- error = PTR_ERR(old_dentry);
- old_dentry = NULL;
- goto out;
- }
- old_parent = old_dentry->d_parent;
-
- new_parent = sysfs_get_dentry(sysfs_sb, new_parent_sd);
- if (IS_ERR(new_parent)) {
- error = PTR_ERR(new_parent);
- new_parent = NULL;
- goto out;
- }
+ sysfs_grab_supers();
+ error = prep_rename(&todo, sd, new_parent_sd, sd->s_name);
+ if (error)
+ goto out_release;
+
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ old_parent_inode = sysfs_get_inode(sd->s_parent);
+ mutex_unlock(&sysfs_mutex);
+ if (!old_parent_inode)
+ goto out_release;
+
+ error = -ENOMEM;
+ mutex_lock(&sysfs_mutex);
+ new_parent_inode = sysfs_get_inode(new_parent_sd);
+ mutex_unlock(&sysfs_mutex);
+ if (!new_parent_inode)
+ goto out_release;
again:
- mutex_lock(&old_parent->d_inode->i_mutex);
- if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
- mutex_unlock(&old_parent->d_inode->i_mutex);
+ mutex_lock(&old_parent_inode->i_mutex);
+ if (!mutex_trylock(&new_parent_inode->i_mutex)) {
+ mutex_unlock(&old_parent_inode->i_mutex);
goto again;
}
mutex_lock(&sysfs_mutex);
@@ -917,14 +995,11 @@ again:
if (sysfs_find_dirent(new_parent_sd, sd->s_name))
goto out_unlock;
- error = -ENOMEM;
- new_dentry = d_alloc_name(new_parent, sd->s_name);
- if (!new_dentry)
- goto out_unlock;
-
error = 0;
- d_add(new_dentry, NULL);
- d_move(old_dentry, new_dentry);
+ list_for_each_entry(srs, &todo, list) {
+ d_add(srs->new_dentry, NULL);
+ d_move(srs->old_dentry, srs->new_dentry);
+ }
/* Remove from old parent's list and insert into new parent's list. */
sysfs_unlink_sibling(sd);
@@ -933,14 +1008,17 @@ again:
sd->s_parent = new_parent_sd;
sysfs_link_sibling(sd);
- out_unlock:
+out_unlock:
mutex_unlock(&sysfs_mutex);
- mutex_unlock(&new_parent->d_inode->i_mutex);
- mutex_unlock(&old_parent->d_inode->i_mutex);
- out:
- dput(new_parent);
- dput(old_dentry);
- dput(new_dentry);
+ mutex_unlock(&new_parent_inode->i_mutex);
+ mutex_unlock(&old_parent_inode->i_mutex);
+
+out_release:
+ iput(new_parent_inode);
+ iput(old_parent_inode);
+ post_rename(&todo);
+ sysfs_release_supers();
+out:
mutex_unlock(&sysfs_rename_mutex);
return error;
}
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 05/11] sysfs: sysfs_chmod_file handle multiple superblocks
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (3 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 04/11] sysfs: Rename Support multiple superblocks Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 06/11] sysfs: Implement sysfs tagged directory support Benjamin Thery
` (6 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: sysfs_chmod_file handle multiple superblocks
Teach sysfs_chmod_file how to handle multiple sysfs
superblocks.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/file.c | 54 ++++++++++++++++++++++++++++++------------------------
1 file changed, 30 insertions(+), 24 deletions(-)
Index: linux-vanilla/fs/sysfs/file.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/file.c
+++ linux-vanilla/fs/sysfs/file.c
@@ -560,7 +560,8 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_grou
int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
{
struct sysfs_dirent *victim_sd = NULL;
- struct dentry *victim = NULL;
+ struct super_block *sb;
+ struct dentry *victim;
struct inode * inode;
struct iattr newattrs;
int rc;
@@ -571,31 +572,36 @@ int sysfs_chmod_file(struct kobject *kob
goto out;
mutex_lock(&sysfs_rename_mutex);
- victim = sysfs_get_dentry(sysfs_sb, victim_sd);
- mutex_unlock(&sysfs_rename_mutex);
- if (IS_ERR(victim)) {
- rc = PTR_ERR(victim);
- victim = NULL;
- goto out;
- }
-
- inode = victim->d_inode;
+ sysfs_grab_supers();
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ victim = sysfs_get_dentry(sb, victim_sd);
+ if (victim == ERR_PTR(-EXDEV))
+ continue;
+ if (IS_ERR(victim)) {
+ rc = PTR_ERR(victim);
+ victim = NULL;
+ goto out_unlock;
+ }
+
+ inode = victim->d_inode;
+ mutex_lock(&inode->i_mutex);
+ newattrs.ia_mode = (mode & S_IALLUGO) |
+ (inode->i_mode & ~S_IALLUGO);
+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+ rc = notify_change(victim, &newattrs);
+ if (rc == 0) {
+ mutex_lock(&sysfs_mutex);
+ victim_sd->s_mode = newattrs.ia_mode;
+ mutex_unlock(&sysfs_mutex);
+ }
+ mutex_unlock(&inode->i_mutex);
- mutex_lock(&inode->i_mutex);
-
- newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
- newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- rc = notify_change(victim, &newattrs);
-
- if (rc == 0) {
- mutex_lock(&sysfs_mutex);
- victim_sd->s_mode = newattrs.ia_mode;
- mutex_unlock(&sysfs_mutex);
+ dput(victim);
}
-
- mutex_unlock(&inode->i_mutex);
- out:
- dput(victim);
+out_unlock:
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+out:
sysfs_put(victim_sd);
return rc;
}
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (4 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 05/11] sysfs: sysfs_chmod_file handle " Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 07/11] sysfs: Implement sysfs_delete_link and sysfs_rename_link Benjamin Thery
` (5 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: Implement sysfs tagged directory support.
The problem. When implementing a network namespace I need to be able
to have multiple network devices with the same name. Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.
What this patch does is to add an additional tag field to the
sysfs dirent structure. For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible. Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.
I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.
For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded. Which means I need
a simple race free way to setup those directories as tagged.
To achieve a race free design all tagged directories are created
and managed by sysfs itself. The upper level code that knows what
tagged directories we need provides just two methods that enable
this:
sb_tag() - that returns a "void *" tag that identifies the context of
the process that mounted sysfs.
kobject_tag(kobj) - that returns a "void *" tag that identifies the context
a kobject should be in.
Everything else is left up to sysfs.
For the network namespace sb_tag and kobject_tag are essentially
one line functions, and look to remain that.
The work needed in sysfs is more extensive. At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent. Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.
Currently only directories which hold kobjects, and
symlinks are supported. There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/bin.c | 2
fs/sysfs/dir.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++----
fs/sysfs/file.c | 8 +-
fs/sysfs/group.c | 4 -
fs/sysfs/inode.c | 7 +
fs/sysfs/mount.c | 44 ++++++++++-
fs/sysfs/symlink.c | 2
fs/sysfs/sysfs.h | 17 ++++
include/linux/sysfs.h | 17 ++++
9 files changed, 257 insertions(+), 29 deletions(-)
Index: linux-vanilla/fs/sysfs/bin.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/bin.c
+++ linux-vanilla/fs/sysfs/bin.c
@@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject
void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
Index: linux-vanilla/fs/sysfs/dir.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/dir.c
+++ linux-vanilla/fs/sysfs/dir.c
@@ -101,8 +101,17 @@ static void sysfs_unlink_sibling(struct
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sb->s_root);
+ struct dentry *dentry;
+
+ /* Bail if this sd won't show up in this superblock */
+ if (sd->s_parent && sd->s_parent->s_flags & SYSFS_FLAG_TAGGED) {
+ const void *tag;
+ tag = sysfs_lookup_tag(sd->s_parent, sb);
+ if (sd->s_tag.tag != tag)
+ return ERR_PTR(-EXDEV);
+ }
+ dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
struct dentry *parent;
@@ -421,7 +430,11 @@ void sysfs_addrm_start(struct sysfs_addr
*/
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
- if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) {
+ const void *tag = NULL;
+
+ tag = sysfs_creation_tag(acxt->parent_sd, sd);
+
+ if (sysfs_find_dirent(acxt->parent_sd, tag, sd->s_name)) {
printk(KERN_WARNING "sysfs: duplicate filename '%s' "
"can not be created\n", sd->s_name);
WARN_ON(1);
@@ -430,6 +443,9 @@ int sysfs_add_one(struct sysfs_addrm_cxt
sd->s_parent = sysfs_get(acxt->parent_sd);
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
+
if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
inc_nlink(acxt->parent_inode);
@@ -576,13 +592,18 @@ void sysfs_addrm_finish(struct sysfs_add
* Pointer to sysfs_dirent if found, NULL if not.
*/
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name)
{
struct sysfs_dirent *sd;
- for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
+ for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (sd->s_tag.tag != tag))
+ continue;
if (!strcmp(sd->s_name, name))
return sd;
+ }
return NULL;
}
@@ -606,7 +627,7 @@ struct sysfs_dirent *sysfs_get_dirent(st
struct sysfs_dirent *sd;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name);
+ sd = sysfs_find_dirent(parent_sd, NULL, name);
sysfs_get(sd);
mutex_unlock(&sysfs_mutex);
@@ -672,13 +693,16 @@ static struct dentry * sysfs_lookup(stru
struct nameidata *nd)
{
struct dentry *ret = NULL;
- struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct dentry *parent = dentry->d_parent;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *sd;
struct inode *inode;
+ const void *tag;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+ sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
/* no such entry */
if (!sd) {
@@ -885,19 +909,24 @@ int sysfs_rename_dir(struct kobject * ko
struct sysfs_rename_struct *srs;
struct inode *parent_inode = NULL;
const char *dup_name = NULL;
+ const void *old_tag, *tag;
int error;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
+ old_tag = sysfs_dirent_tag(sd);
+ tag = sysfs_creation_tag(sd->s_parent, sd);
error = 0;
- if (strcmp(sd->s_name, new_name) == 0)
+ if ((old_tag == tag) && (strcmp(sd->s_name, new_name) == 0))
goto out; /* nothing to rename */
sysfs_grab_supers();
- error = prep_rename(&todo, sd, sd->s_parent, new_name);
- if (error)
- goto out_release;
+ if (old_tag == tag) {
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
+ }
error = -ENOMEM;
mutex_lock(&sysfs_mutex);
@@ -910,7 +939,7 @@ int sysfs_rename_dir(struct kobject * ko
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(sd->s_parent, new_name))
+ if (sysfs_find_dirent(sd->s_parent, tag, new_name))
goto out_unlock;
/* rename kobject and sysfs_dirent */
@@ -925,6 +954,8 @@ int sysfs_rename_dir(struct kobject * ko
dup_name = sd->s_name;
sd->s_name = new_name;
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
/* rename */
list_for_each_entry(srs, &todo, list) {
@@ -932,6 +963,20 @@ int sysfs_rename_dir(struct kobject * ko
d_move(srs->old_dentry, srs->new_dentry);
}
+ /* If we are moving across superblocks drop the dcache entries */
+ if (old_tag != tag) {
+ struct super_block *sb;
+ struct dentry *dentry;
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = __sysfs_get_dentry(sb, sd);
+ if (!dentry)
+ continue;
+ shrink_dcache_parent(dentry);
+ d_drop(dentry);
+ dput(dentry);
+ }
+ }
+
error = 0;
out_unlock:
mutex_unlock(&sysfs_mutex);
@@ -954,11 +999,13 @@ int sysfs_move_dir(struct kobject *kobj,
struct sysfs_rename_struct *srs;
struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ const void *tag;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+ tag = sysfs_dirent_tag(sd);
error = 0;
if (sd->s_parent == new_parent_sd)
@@ -992,7 +1039,7 @@ again:
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, sd->s_name))
+ if (sysfs_find_dirent(new_parent_sd, tag, sd->s_name))
goto out_unlock;
error = 0;
@@ -1031,10 +1078,11 @@ static inline unsigned char dt_type(stru
static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ struct dentry *parent = filp->f_path.dentry;
+ struct sysfs_dirent * parent_sd = parent->d_fsdata;
struct sysfs_dirent *pos;
ino_t ino;
+ const void *tag;
if (filp->f_pos == 0) {
ino = parent_sd->s_ino;
@@ -1052,6 +1100,8 @@ static int sysfs_readdir(struct file * f
if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
mutex_lock(&sysfs_mutex);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+
/* Skip the dentries we have already reported */
pos = parent_sd->s_dir.children;
while (pos && (filp->f_pos > pos->s_ino))
@@ -1061,6 +1111,10 @@ static int sysfs_readdir(struct file * f
const char * name;
int len;
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (pos->s_tag.tag != tag))
+ continue;
+
name = pos->s_name;
len = strlen(name);
filp->f_pos = ino = pos->s_ino;
@@ -1081,3 +1135,106 @@ const struct file_operations sysfs_dir_o
.read = generic_read_dir,
.readdir = sysfs_readdir,
};
+
+const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (parent_sd->s_flags & SYSFS_FLAG_TAGGED) {
+ struct kobject *kobj;
+ switch (sysfs_type(sd)) {
+ case SYSFS_DIR:
+ kobj = sd->s_dir.kobj;
+ break;
+ case SYSFS_KOBJ_LINK:
+ kobj = sd->s_symlink.target_sd->s_dir.kobj;
+ break;
+ default:
+ BUG();
+ }
+ tag = parent_sd->s_tag.ops->kobject_tag(kobj);
+ }
+ return tag;
+}
+
+const void *sysfs_removal_tag(struct kobject *kobj, struct sysfs_dirent *dir_sd)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = kobj->sd->s_tag.tag;
+
+ return tag;
+}
+
+const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = dir_sd->s_tag.ops->sb_tag(&sysfs_info(sb)->tag);
+
+ return tag;
+}
+
+const void *sysfs_dirent_tag(struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED))
+ tag = sd->s_tag.tag;
+
+ return tag;
+}
+
+/**
+ * sysfs_enable_tagging - Automatically tag all of the children in a
+ * directory.
+ * @kobj: object whose children should be filtered by tags
+ *
+ * Once tagging has been enabled on a directory the contents
+ * of the directory become dependent upon context captured when
+ * sysfs was mounted.
+ *
+ * tag_ops->sb_tag() returns the context for a given superblock.
+ *
+ * tag_ops->kobject_tag() returns the context that a given kobj
+ * resides in.
+ *
+ * Using those methods the sysfs code on tagged directories
+ * carefully stores the files so that when we lookup files
+ * we get the proper answer for our context.
+ *
+ * If the context of a kobject is changed it is expected that
+ * the kobject will be renamed so the appopriate sysfs data structures
+ * can be updated.
+ */
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ struct sysfs_dirent *sd;
+ int err;
+
+ err = -ENOENT;
+ sd = kobj->sd;
+
+ mutex_lock(&sysfs_mutex);
+ err = -EINVAL;
+ /* We can only enable tagging on empty directories
+ * where tagging is not already enabled, and
+ * who are not subdirectories of directories where tagging is
+ * enabled.
+ */
+ if (!sd->s_dir.children && (sysfs_type(sd) == SYSFS_DIR) &&
+ !(sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ sd->s_parent &&
+ !(sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)) {
+ err = 0;
+ sd->s_flags |= SYSFS_FLAG_TAGGED;
+ sd->s_tag.ops = tag_ops;
+ }
+ mutex_unlock(&sysfs_mutex);
+ return err;
+}
Index: linux-vanilla/fs/sysfs/file.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/file.c
+++ linux-vanilla/fs/sysfs/file.c
@@ -447,9 +447,9 @@ void sysfs_notify(struct kobject *k, cha
mutex_lock(&sysfs_mutex);
if (sd && dir)
- sd = sysfs_find_dirent(sd, dir);
+ sd = sysfs_find_dirent(sd, NULL, dir);
if (sd && attr)
- sd = sysfs_find_dirent(sd, attr);
+ sd = sysfs_find_dirent(sd, NULL, attr);
if (sd) {
struct sysfs_open_dirent *od;
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
}
@@ -638,7 +638,7 @@ void sysfs_remove_file_from_group(struct
else
dir_sd = sysfs_get(kobj->sd);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name);
+ sysfs_hash_and_remove(kobj, dir_sd, attr->name);
sysfs_put(dir_sd);
}
}
Index: linux-vanilla/fs/sysfs/group.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/group.c
+++ linux-vanilla/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_di
int i;
for (i = 0, attr = grp->attrs; *attr; i++, attr++)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
}
static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dir
* visibility. Do this by first removing then
* re-adding (if required) the file */
if (update)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
Index: linux-vanilla/fs/sysfs/inode.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/inode.c
+++ linux-vanilla/fs/sysfs/inode.c
@@ -217,17 +217,20 @@ struct inode * sysfs_get_inode(struct sy
return inode;
}
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name)
{
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ const void *tag;
if (!dir_sd)
return -ENOENT;
sysfs_addrm_start(&acxt, dir_sd);
+ tag = sysfs_removal_tag(kobj, dir_sd);
- sd = sysfs_find_dirent(dir_sd, name);
+ sd = sysfs_find_dirent(dir_sd, tag, name);
if (sd)
sysfs_remove_one(&acxt, sd);
Index: linux-vanilla/fs/sysfs/mount.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/mount.c
+++ linux-vanilla/fs/sysfs/mount.c
@@ -75,6 +75,7 @@ static int sysfs_fill_super(struct super
goto out_err;
}
root->d_fsdata = &sysfs_root;
+ root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
return 0;
@@ -88,20 +89,55 @@ out_err:
return error;
}
+static int sysfs_test_super(struct super_block *sb, void *ptr)
+{
+ struct task_struct *task = ptr;
+ struct sysfs_super_info *info = sysfs_info(sb);
+ int found = 1;
+
+ return found;
+}
+
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- int rc;
+ struct super_block *sb;
+ int error;
mutex_lock(&sysfs_rename_mutex);
- rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ sb = sget(fs_type, sysfs_test_super, set_anon_super, current);
+ if (IS_ERR(sb)) {
+ error = PTR_ERR(sb);
+ goto out;
+ }
+ if (!sb->s_root) {
+ sb->s_flags = flags;
+ error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+ if (error) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ goto out;
+ }
+ sb->s_flags |= MS_ACTIVE;
+ }
+ do_remount_sb(sb, flags, data, 0);
+ error = simple_set_mnt(mnt, sb);
+out:
mutex_unlock(&sysfs_rename_mutex);
- return rc;
+ return error;
+}
+
+static void sysfs_kill_sb(struct super_block *sb)
+{
+ struct sysfs_super_info *info = sysfs_info(sb);
+
+ kill_anon_super(sb);
+ kfree(info);
}
struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = sysfs_kill_sb,
};
void sysfs_grab_supers(void)
Index: linux-vanilla/fs/sysfs/symlink.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/symlink.c
+++ linux-vanilla/fs/sysfs/symlink.c
@@ -94,7 +94,7 @@ void sysfs_remove_link(struct kobject *
else
parent_sd = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name);
+ sysfs_hash_and_remove(kobj, parent_sd, name);
}
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
Index: linux-vanilla/fs/sysfs/sysfs.h
===================================================================
--- linux-vanilla.orig/fs/sysfs/sysfs.h
+++ linux-vanilla/fs/sysfs/sysfs.h
@@ -46,6 +46,10 @@ struct sysfs_dirent {
const char *s_name;
union {
+ const struct sysfs_tagged_dir_operations *ops;
+ const void *tag;
+ } s_tag;
+ union {
struct sysfs_elem_dir s_dir;
struct sysfs_elem_symlink s_symlink;
struct sysfs_elem_attr s_attr;
@@ -69,6 +73,7 @@ struct sysfs_dirent {
#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
#define SYSFS_FLAG_REMOVED 0x0200
+#define SYSFS_FLAG_TAGGED 0x0400
static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
{
@@ -87,6 +92,7 @@ struct sysfs_addrm_cxt {
struct sysfs_super_info {
int grabbed;
+ struct sysfs_tag_info tag;
};
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
@@ -112,6 +118,13 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
+extern const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd);
+extern const void *sysfs_removal_tag(struct kobject *kobj,
+ struct sysfs_dirent *dir_sd);
+extern const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb);
+extern const void *sysfs_dirent_tag(struct sysfs_dirent *sd);
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
@@ -123,6 +136,7 @@ void sysfs_remove_one(struct sysfs_addrm
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name);
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
const unsigned char *name);
@@ -154,7 +168,8 @@ static inline void sysfs_put(struct sysf
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name);
int sysfs_inode_init(void);
/*
Index: linux-vanilla/include/linux/sysfs.h
===================================================================
--- linux-vanilla.orig/include/linux/sysfs.h
+++ linux-vanilla/include/linux/sysfs.h
@@ -78,6 +78,14 @@ struct sysfs_ops {
ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
};
+struct sysfs_tag_info {
+};
+
+struct sysfs_tagged_dir_operations {
+ const void *(*sb_tag)(struct sysfs_tag_info *info);
+ const void *(*kobject_tag)(struct kobject *kobj);
+};
+
#ifdef CONFIG_SYSFS
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -116,6 +124,9 @@ void sysfs_remove_file_from_group(struct
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
+int sysfs_enable_tagging(struct kobject *,
+ const struct sysfs_tagged_dir_operations *);
+
extern int __must_check sysfs_init(void);
#else /* CONFIG_SYSFS */
@@ -216,6 +227,12 @@ static inline void sysfs_notify(struct k
{
}
+static inline int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ return 0;
+}
+
static inline int __must_check sysfs_init(void)
{
return 0;
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 07/11] sysfs: Implement sysfs_delete_link and sysfs_rename_link
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (5 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 06/11] sysfs: Implement sysfs tagged directory support Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:31 ` [PATCH 08/11] driver core: Implement tagged directory support for device classes Benjamin Thery
` (4 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
sysfs: Implement sysfs_delete_link and sysfs_rename_link
When removing a symlink sysfs_remove_link does not provide
enough information to figure out which tagged directory the symlink
falls in. So I need sysfs_delete_link which is passed the target
of the symlink to delete.
Further half the time when we are removing a symlink the code is
actually renaming the symlink but not doing so explicitly because
we don't have a symlink rename method. So I have added sysfs_rename_link
as well.
Both of these functions now have enough information to find a symlink
in a tagged directory. The only restriction is that they must be called
before the target kobject is renamed or deleted. If they are called
later I loose track of which tag the target kobject was marked with
and can no longer find the old symlink to remove it.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/symlink.c | 31 +++++++++++++++++++++++++++++++
include/linux/sysfs.h | 17 +++++++++++++++++
2 files changed, 48 insertions(+)
Index: linux-vanilla/fs/sysfs/symlink.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/symlink.c
+++ linux-vanilla/fs/sysfs/symlink.c
@@ -80,6 +80,21 @@ int sysfs_create_link(struct kobject * k
}
/**
+ * sysfs_delete_link - remove symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @targ: object we're pointing to.
+ * @name: name of the symlink to remove.
+ *
+ * Unlike sysfs_remove_link sysfs_delete_link has enough information
+ * to successfully delete symlinks in tagged directories.
+ */
+void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
+ const char *name)
+{
+ sysfs_hash_and_remove(targ, kobj->sd, name);
+}
+
+/**
* sysfs_remove_link - remove symlink in object's directory.
* @kobj: object we're acting for.
* @name: name of the symlink to remove.
@@ -97,6 +112,22 @@ void sysfs_remove_link(struct kobject *
sysfs_hash_and_remove(kobj, parent_sd, name);
}
+/**
+ * sysfs_rename_link - rename symlink in object's directory.
+ * @kobj: object we're acting for.
+ * @targ: object we're pointing to.
+ * @old: previous name of the symlink.
+ * @new: new name of the symlink.
+ *
+ * A helper function for the common rename symlink idiom.
+ */
+int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
+ const char *old, const char *new)
+{
+ sysfs_delete_link(kobj, targ, old);
+ return sysfs_create_link(kobj, targ, new);
+}
+
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
struct sysfs_dirent *target_sd, char *path)
{
Index: linux-vanilla/include/linux/sysfs.h
===================================================================
--- linux-vanilla.orig/include/linux/sysfs.h
+++ linux-vanilla/include/linux/sysfs.h
@@ -111,6 +111,12 @@ int __must_check sysfs_create_link(struc
const char *name);
void sysfs_remove_link(struct kobject *kobj, const char *name);
+int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
+ const char *old_name, const char *new_name);
+
+void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
+ const char *name);
+
int __must_check sysfs_create_group(struct kobject *kobj,
const struct attribute_group *grp);
int sysfs_update_group(struct kobject *kobj,
@@ -195,6 +201,17 @@ static inline void sysfs_remove_link(str
{
}
+static inline int sysfs_rename_link(struct kobject *k, struct kobject *t,
+ const char *old_name, const char *new_name)
+{
+ return 0;
+}
+
+static inline void sysfs_delete_link(struct kobject *k, struct kobject *t,
+ const char *name)
+{
+}
+
static inline int sysfs_create_group(struct kobject *kobj,
const struct attribute_group *grp)
{
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 08/11] driver core: Implement tagged directory support for device classes.
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (6 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 07/11] sysfs: Implement sysfs_delete_link and sysfs_rename_link Benjamin Thery
@ 2008-05-06 17:31 ` Benjamin Thery
2008-05-06 17:32 ` [PATCH 09/11] netns: Enable tagging for net_class directories in sysfs Benjamin Thery
` (3 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:31 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
driver core: Implement tagged directory support for device classes.
This patch enables tagging on every class directory if struct class
has tag_ops.
In addition device_del and device_rename were modified to use
sysfs_delete_link and sysfs_rename_link respectively to ensure
when these operations happen on devices whose classes have
tag_ops that they work properly.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
drivers/base/class.c | 30 ++++++++++++++++++++++++++----
drivers/base/core.c | 41 ++++++++++++++++++++++-------------------
include/linux/device.h | 2 ++
3 files changed, 50 insertions(+), 23 deletions(-)
Index: linux-vanilla/drivers/base/class.c
===================================================================
--- linux-vanilla.orig/drivers/base/class.c
+++ linux-vanilla/drivers/base/class.c
@@ -134,6 +134,17 @@ static void remove_class_attrs(struct cl
}
}
+static int class_setup_tagging(struct class *cls)
+{
+ const struct sysfs_tagged_dir_operations *tag_ops;
+
+ tag_ops = cls->tag_ops;
+ if (!tag_ops)
+ return 0;
+
+ return sysfs_enable_tagging(&cls->subsys.kobj, tag_ops);
+}
+
int class_register(struct class *cls)
{
int error;
@@ -159,11 +170,22 @@ int class_register(struct class *cls)
cls->subsys.kobj.ktype = &class_ktype;
error = kset_register(&cls->subsys);
- if (!error) {
- error = add_class_attrs(class_get(cls));
- class_put(cls);
- }
+ if (error)
+ goto out;
+
+ error = class_setup_tagging(cls);
+ if (error)
+ goto out_unregister;
+
+ error = add_class_attrs(cls);
+ if (error)
+ goto out_unregister;
+
+out:
return error;
+out_unregister:
+ kset_unregister(&cls->subsys);
+ goto out;
}
void class_unregister(struct class *cls)
Index: linux-vanilla/drivers/base/core.c
===================================================================
--- linux-vanilla.orig/drivers/base/core.c
+++ linux-vanilla/drivers/base/core.c
@@ -614,6 +614,10 @@ static struct kobject *get_device_parent
kobject_put(k);
return NULL;
}
+ /* If we created a new class-directory setup tagging */
+ if (dev->class->tag_ops)
+ sysfs_enable_tagging(k, dev->class->tag_ops);
+
/* do not emit an uevent for this simple "glue" directory */
return k;
}
@@ -748,12 +752,13 @@ static void device_remove_class_symlinks
if (dev->kobj.parent != &dev->class->subsys.kobj &&
device_is_not_partition(dev))
- sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
+ sysfs_delete_link(&dev->class->subsys.kobj, &dev->kobj,
+ dev->bus_id);
#else
if (dev->parent && device_is_not_partition(dev))
sysfs_remove_link(&dev->kobj, "device");
- sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
+ sysfs_delete_link(&dev->class->subsys.kobj, &dev->kobj, dev->bus_id);
#endif
sysfs_remove_link(&dev->kobj, "subsystem");
@@ -1199,6 +1204,15 @@ int device_rename(struct device *dev, ch
strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+#ifndef CONFIG_SYSFS_DEPRECATED
+ if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) {
+ error = sysfs_rename_link(&dev->class->subsys.kobj,
+ &dev->kobj, old_device_name, new_name);
+ if (error)
+ goto out;
+ }
+#endif
+
error = kobject_rename(&dev->kobj, new_name);
if (error) {
strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
@@ -1207,24 +1221,13 @@ int device_rename(struct device *dev, ch
#ifdef CONFIG_SYSFS_DEPRECATED
if (old_class_name) {
+ error = -ENOMEM;
new_class_name = make_class_name(dev->class->name, &dev->kobj);
- if (new_class_name) {
- error = sysfs_create_link(&dev->parent->kobj,
- &dev->kobj, new_class_name);
- if (error)
- goto out;
- sysfs_remove_link(&dev->parent->kobj, old_class_name);
- }
- }
-#else
- if (dev->class) {
- sysfs_remove_link(&dev->class->subsys.kobj, old_device_name);
- error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
- dev->bus_id);
- if (error) {
- dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n",
- __func__, error);
- }
+ if (new_class_name)
+ error = sysfs_rename_link(&dev->parent->kobj,
+ &dev->kobj,
+ old_class_name,
+ new_class_name);
}
#endif
Index: linux-vanilla/include/linux/device.h
===================================================================
--- linux-vanilla.orig/include/linux/device.h
+++ linux-vanilla/include/linux/device.h
@@ -198,6 +198,8 @@ struct class {
int (*suspend)(struct device *dev, pm_message_t state);
int (*resume)(struct device *dev);
+
+ const struct sysfs_tagged_dir_operations *tag_ops;
};
extern int __must_check class_register(struct class *class);
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 09/11] netns: Enable tagging for net_class directories in sysfs
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (7 preceding siblings ...)
2008-05-06 17:31 ` [PATCH 08/11] driver core: Implement tagged directory support for device classes Benjamin Thery
@ 2008-05-06 17:32 ` Benjamin Thery
2008-05-06 17:32 ` [PATCH 10/11] avoid kobject name conflict with different namespaces Benjamin Thery
` (2 subsequent siblings)
11 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
net: Enable tagging for net_class directories in sysfs
The problem. Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!
To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.
By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/mount.c | 36 ++++++++++++++++++++++++++++++++++++
include/linux/sysfs.h | 2 ++
net/Kconfig | 2 +-
net/core/net-sysfs.c | 20 ++++++++++++++++++++
4 files changed, 59 insertions(+), 1 deletion(-)
Index: linux-vanilla/fs/sysfs/mount.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/mount.c
+++ linux-vanilla/fs/sysfs/mount.c
@@ -16,6 +16,8 @@
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
#include "sysfs.h"
@@ -78,6 +80,7 @@ static int sysfs_fill_super(struct super
root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
+ info->tag.net_ns = hold_net(current->nsproxy->net_ns);
return 0;
out_err:
@@ -95,6 +98,9 @@ static int sysfs_test_super(struct super
struct sysfs_super_info *info = sysfs_info(sb);
int found = 1;
+ if (task->nsproxy->net_ns != info->tag.net_ns)
+ found = 0;
+
return found;
}
@@ -131,6 +137,8 @@ static void sysfs_kill_sb(struct super_b
struct sysfs_super_info *info = sysfs_info(sb);
kill_anon_super(sb);
+ if (info->tag.net_ns)
+ release_net(info->tag.net_ns);
kfree(info);
}
@@ -181,6 +189,31 @@ restart:
spin_unlock(&sb_lock);
}
+#ifdef CONFIG_NET
+static void sysfs_net_exit(struct net *net)
+{
+ /* Allow the net namespace to go away while sysfs is still mounted. */
+ struct super_block *sb;
+ mutex_lock(&sysfs_rename_mutex);
+ sysfs_grab_supers();
+ mutex_lock(&sysfs_mutex);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ struct sysfs_super_info *info = sysfs_info(sb);
+ if (info->tag.net_ns != net)
+ continue;
+ release_net(info->tag.net_ns);
+ info->tag.net_ns = NULL;
+ }
+ mutex_unlock(&sysfs_mutex);
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+ .exit = sysfs_net_exit,
+};
+#endif
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
@@ -205,6 +238,9 @@ int __init sysfs_init(void)
unregister_filesystem(&sysfs_fs_type);
goto out_err;
}
+#ifdef CONFIG_NET
+ register_pernet_subsys(&sysfs_net_ops);
+#endif
} else
goto out_err;
out:
Index: linux-vanilla/include/linux/sysfs.h
===================================================================
--- linux-vanilla.orig/include/linux/sysfs.h
+++ linux-vanilla/include/linux/sysfs.h
@@ -19,6 +19,7 @@
struct kobject;
struct module;
+struct net;
/* FIXME
* The *owner field is no longer used, but leave around
@@ -79,6 +80,7 @@ struct sysfs_ops {
};
struct sysfs_tag_info {
+ struct net *net_ns;
};
struct sysfs_tagged_dir_operations {
Index: linux-vanilla/net/Kconfig
===================================================================
--- linux-vanilla.orig/net/Kconfig
+++ linux-vanilla/net/Kconfig
@@ -30,7 +30,7 @@ menu "Networking options"
config NET_NS
bool "Network namespace support"
default n
- depends on EXPERIMENTAL && !SYSFS && NAMESPACES
+ depends on EXPERIMENTAL && NAMESPACES
help
Allow user space to create what appear to be multiple instances
of the network stack.
Index: linux-vanilla/net/core/net-sysfs.c
===================================================================
--- linux-vanilla.orig/net/core/net-sysfs.c
+++ linux-vanilla/net/core/net-sysfs.c
@@ -13,7 +13,9 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
+#include <linux/nsproxy.h>
#include <net/sock.h>
+#include <net/net_namespace.h>
#include <linux/rtnetlink.h>
#include <linux/wireless.h>
#include <net/iw_handler.h>
@@ -421,6 +423,23 @@ static void netdev_release(struct device
kfree((char *)dev - dev->padded);
}
+static const void *net_sb_tag(struct sysfs_tag_info *info)
+{
+ return info->net_ns;
+}
+
+static const void *net_kobject_tag(struct kobject *kobj)
+{
+ struct net_device *dev;
+ dev = container_of(kobj, struct net_device, dev.kobj);
+ return dev_net(dev);
+}
+
+static const struct sysfs_tagged_dir_operations net_tagged_dir_operations = {
+ .sb_tag = net_sb_tag,
+ .kobject_tag = net_kobject_tag,
+};
+
static struct class net_class = {
.name = "net",
.dev_release = netdev_release,
@@ -430,6 +449,7 @@ static struct class net_class = {
#ifdef CONFIG_HOTPLUG
.dev_uevent = netdev_uevent,
#endif
+ .tag_ops = &net_tagged_dir_operations,
};
/* Delete sysfs entries but hold kobject reference until after all
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (8 preceding siblings ...)
2008-05-06 17:32 ` [PATCH 09/11] netns: Enable tagging for net_class directories in sysfs Benjamin Thery
@ 2008-05-06 17:32 ` Benjamin Thery
2008-05-07 18:49 ` Eric W. Biederman
2008-05-06 17:32 ` [PATCH 11/11] sysfs: user namespaces: add ns to user_struct Benjamin Thery
2008-05-06 17:53 ` [RESEND][PATCH 00/11] sysfs tagged directories Greg KH
11 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
The renaming of a kobject will fail if there is another kobject
with the same name belonging to another namespace.
This patch makes the kobject lookup in kobject_rename to check if
the object exists _and_ belongs to the same namespace.
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/dir.c | 10 ++++++++++
include/linux/sysfs.h | 7 +++++++
lib/kobject.c | 2 +-
3 files changed, 18 insertions(+), 1 deletion(-)
Index: linux-vanilla/fs/sysfs/dir.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/dir.c
+++ linux-vanilla/fs/sysfs/dir.c
@@ -902,6 +902,16 @@ err_out:
return error;
}
+int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2)
+{
+ struct sysfs_dirent *sd1 = kobj1->sd;
+ struct sysfs_dirent *sd2 = kobj2->sd;
+ const void *tag1 = sysfs_dirent_tag(sd1);
+ const void *tag2 = sysfs_dirent_tag(sd2);
+
+ return tag1 != tag2;
+}
+
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
{
struct sysfs_dirent *sd = kobj->sd;
Index: linux-vanilla/include/linux/sysfs.h
===================================================================
--- linux-vanilla.orig/include/linux/sysfs.h
+++ linux-vanilla/include/linux/sysfs.h
@@ -95,6 +95,8 @@ int sysfs_schedule_callback(struct kobje
int __must_check sysfs_create_dir(struct kobject *kobj);
void sysfs_remove_dir(struct kobject *kobj);
+int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2);
+
int __must_check sysfs_rename_dir(struct kobject *kobj, const char *new_name);
int __must_check sysfs_move_dir(struct kobject *kobj,
struct kobject *new_parent_kobj);
@@ -154,6 +156,11 @@ static inline void sysfs_remove_dir(stru
{
}
+static inline int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2)
+{
+ return 0;
+}
+
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
return 0;
Index: linux-vanilla/lib/kobject.c
===================================================================
--- linux-vanilla.orig/lib/kobject.c
+++ linux-vanilla/lib/kobject.c
@@ -401,7 +401,7 @@ int kobject_rename(struct kobject *kobj,
if (kobj->kset) {
struct kobject *temp_kobj;
temp_kobj = kset_find_obj(kobj->kset, new_name);
- if (temp_kobj) {
+ if (temp_kobj && !sysfs_tag_cmp(temp_kobj, kobj)) {
printk(KERN_WARNING "kobject '%s' cannot be renamed "
"to '%s' as '%s' is already in existence.\n",
kobject_name(kobj), new_name, new_name);
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-06 17:32 ` [PATCH 10/11] avoid kobject name conflict with different namespaces Benjamin Thery
@ 2008-05-07 18:49 ` Eric W. Biederman
2008-05-07 19:08 ` Greg KH
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-07 18:49 UTC (permalink / raw)
To: Benjamin Thery
Cc: linux-kernel, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
Benjamin Thery <benjamin.thery@bull.net> writes:
> The renaming of a kobject will fail if there is another kobject
> with the same name belonging to another namespace.
>
> This patch makes the kobject lookup in kobject_rename to check if
> the object exists _and_ belongs to the same namespace.
Ok so we are dealing with fallout from:
commit 34358c26a2c96b2a068dc44e0ac602106a466bce
Author: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed Oct 24 16:52:31 2007 -0700
kobject: check for duplicate names in kobject_rename
This should catch any duplicate names before we try to tell sysfs to
rename the object. This happens a lot with older versions of udev and
the network rename scripts.
Cc: David Miller <davem@davemloft.net>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Tejun Heo <htejun@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Which added the check in kobject_rename to prevent problems, and
seems to be causing a few.
I believe this earlier? patch addresses the problem:
commit c8d90dca3211966ba5189e0f3d4bccd558d9ae08
Author: Stephen Hemminger <shemminger@linux-foundation.org>
Date: Fri Oct 26 03:53:42 2007 -0700
[NET] dev_change_name: ignore changes to same name
Prevent error/backtrace from dev_rename() when changing
name of network device to the same name. This is a common
situation with udev and other scripts that bind addr to device.
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
And the challenge is that we are getting false positives in the check
to see if renames will fail.
/* see if this name is already in use */
if (kobj->kset) {
struct kobject *temp_kobj;
temp_kobj = kset_find_obj(kobj->kset, new_name);
if (temp_kobj) {
printk(KERN_WARNING "kobject '%s' can not be renamed
"to '%s' as '%s' is already in existance.\n",
kobject_name(kobj), new_name, new_name);
kobject_put(temp_kobj);
return -EINVAL;
}
}
If the kobject layer wants to perform the test above how do we support
it by giving it enough information to perform the test without false
positives.
Certainly we can go to the sysfs layer but that has the problem
of being a layering violation and not working when sysfs is not
compiled in. Ouch!
I believe what the sanity check should look like is:
/* see if this name is already in use */
if (kobj->kset) {
struct kobject *temp_kobj;
* void *tag;
* tag = kobject_tag(kobj);
* temp_kobj = kset_find_tagged_obj(kobj->kset, tag, new_name);
* if (temp_kobj && (temp_kobj != kobj)) {
printk(KERN_WARNING "kobject '%s' can not be renamed
"to '%s' as '%s' is already in existance.\n",
kobject_name(kobj), new_name, new_name);
kobject_put(temp_kobj);
return -EINVAL;
}
}
The tricky part is how do we get to kobject_tag (from the
sysfs_tagged_dir_operations).
Unless there is another path I think placing an additional pointer in
kobj_type so we can find it through ktype is the simplest solution.
Although using the kset is also sane.
The easiest and most trivially correct thing to do would be to simply
remove the unnecessary check from kobject_rename. We perform the
check at the upper levels in the network anyway. And kobject_rename
is only used by the network stack.
....
As for the actual patch itself I have two nits to pick.
> Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
> Acked-by: Benjamin Thery <benjamin.thery@bull.net>
> ---
> fs/sysfs/dir.c | 10 ++++++++++
> include/linux/sysfs.h | 7 +++++++
> lib/kobject.c | 2 +-
> 3 files changed, 18 insertions(+), 1 deletion(-)
>
> Index: linux-vanilla/fs/sysfs/dir.c
> ===================================================================
> --- linux-vanilla.orig/fs/sysfs/dir.c
> +++ linux-vanilla/fs/sysfs/dir.c
> @@ -902,6 +902,16 @@ err_out:
> return error;
> }
>
> +int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2)
> +{
> + struct sysfs_dirent *sd1 = kobj1->sd;
> + struct sysfs_dirent *sd2 = kobj2->sd;
> + const void *tag1 = sysfs_dirent_tag(sd1);
> + const void *tag2 = sysfs_dirent_tag(sd2);
The new name should be compared with sysfs_creation_tag in
case we are dealing with the case of renaming across network
namespaces. We could use sysfs_creation_tag for both as
the only time the dirent_tag and creation_tag should differ
is during a rename operation.
> +
> + return tag1 != tag2;
> +}
> +
> int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
> {
> struct sysfs_dirent *sd = kobj->sd;
> Index: linux-vanilla/include/linux/sysfs.h
> ===================================================================
> --- linux-vanilla.orig/include/linux/sysfs.h
> +++ linux-vanilla/include/linux/sysfs.h
> @@ -95,6 +95,8 @@ int sysfs_schedule_callback(struct kobje
>
> int __must_check sysfs_create_dir(struct kobject *kobj);
> void sysfs_remove_dir(struct kobject *kobj);
> +int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2);
> +
> int __must_check sysfs_rename_dir(struct kobject *kobj, const char *new_name);
> int __must_check sysfs_move_dir(struct kobject *kobj,
> struct kobject *new_parent_kobj);
> @@ -154,6 +156,11 @@ static inline void sysfs_remove_dir(stru
> {
> }
>
> +static inline int sysfs_tag_cmp(struct kobject *kobj1, struct kobject *kobj2)
> +{
> + return 0;
> +}
> +
Either I am blind or this implementation breaks when we are using
kobjects and sysfs support is not compiled in. It might be that
we don't do this work but still in principle this is a small bug.
> static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
> {
> return 0;
> Index: linux-vanilla/lib/kobject.c
> ===================================================================
> --- linux-vanilla.orig/lib/kobject.c
> +++ linux-vanilla/lib/kobject.c
> @@ -401,7 +401,7 @@ int kobject_rename(struct kobject *kobj,
> if (kobj->kset) {
> struct kobject *temp_kobj;
> temp_kobj = kset_find_obj(kobj->kset, new_name);
> - if (temp_kobj) {
> + if (temp_kobj && !sysfs_tag_cmp(temp_kobj, kobj)) {
> printk(KERN_WARNING "kobject '%s' cannot be renamed "
> "to '%s' as '%s' is already in existence.\n",
> kobject_name(kobj), new_name, new_name);
>
> --
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-07 18:49 ` Eric W. Biederman
@ 2008-05-07 19:08 ` Greg KH
2008-05-07 20:54 ` Eric W. Biederman
` (3 more replies)
0 siblings, 4 replies; 75+ messages in thread
From: Greg KH @ 2008-05-07 19:08 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
On Wed, May 07, 2008 at 11:49:19AM -0700, Eric W. Biederman wrote:
> Unless there is another path I think placing an additional pointer in
> kobj_type so we can find it through ktype is the simplest solution.
> Although using the kset is also sane.
Ick, ick, ick :)
> The easiest and most trivially correct thing to do would be to simply
> remove the unnecessary check from kobject_rename. We perform the
> check at the upper levels in the network anyway. And kobject_rename
> is only used by the network stack.
Wireless uses it also for some things, and it requires that it fail if a
duplicate is found. I thought that s390 also used it, but I don't see
that usage in the tree anymore, perhaps they switched to something else.
good luck,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-07 19:08 ` Greg KH
@ 2008-05-07 20:54 ` Eric W. Biederman
2008-05-08 8:28 ` Cornelia Huck
2008-05-08 19:25 ` Eric W. Biederman
` (2 subsequent siblings)
3 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-07 20:54 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Greg KH <gregkh@suse.de> writes:
> On Wed, May 07, 2008 at 11:49:19AM -0700, Eric W. Biederman wrote:
>> Unless there is another path I think placing an additional pointer in
>> kobj_type so we can find it through ktype is the simplest solution.
>> Although using the kset is also sane.
>
> Ick, ick, ick :)
Well yes we are dealing with a pile of seemingly unnecessary layers,
that are attempting to add uniformity where no uniformity existed.
That is an easy recipe for ick.
>> The easiest and most trivially correct thing to do would be to simply
>> remove the unnecessary check from kobject_rename. We perform the
>> check at the upper levels in the network anyway. And kobject_rename
>> is only used by the network stack.
>
> Wireless uses it also for some things, and it requires that it fail if a
> duplicate is found.
Looks like it, that use is brand new, and at first glance I thought
it was another instance of the device_rename case that kobject uses.
Apparently not.
It looks like getting the wireless devices into the network namespace
is going to be interesting. Since this phy name is user controllable
and shows up in rtnetlink messages it definitely appears to belong in
the network namespace.
Virtual/logical devices are such a pain.
....
Given that kobject_rename is growing users we definitely need to
fix it so a noop rename does not return -EINVAL.
i.e. if (temp_kobj && (temp_kobj != kobj)) return -EINVAL.
instead of just if (temp_kobj) return -EINVAL;
> I thought that s390 also used it, but I don't see
> that usage in the tree anymore, perhaps they switched to something else.
That is the device_move -> kobject_move case. Very similar (and nice
if we can figure out how to combine them).
> good luck,
Thanks. It looks like we are just about there.
Hopefully we can get this merged soon so there won't be many more sets
of shifting requirements to chase.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-07 20:54 ` Eric W. Biederman
@ 2008-05-08 8:28 ` Cornelia Huck
2008-05-08 19:28 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Cornelia Huck @ 2008-05-08 8:28 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Greg KH, Benjamin Thery, linux-kernel, Tejun Heo, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
On Wed, 07 May 2008 13:54:27 -0700,
ebiederm@xmission.com (Eric W. Biederman) wrote:
> > I thought that s390 also used it, but I don't see
> > that usage in the tree anymore, perhaps they switched to something else.
>
> That is the device_move -> kobject_move case.
Yes. bluetooth also uses it, IIRC.
> Very similar (and nice
> if we can figure out how to combine them).
Sounds like a good idea. I can test the _move() stuff (after I've
managed to find some time to try this patchset...)
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-08 8:28 ` Cornelia Huck
@ 2008-05-08 19:28 ` Eric W. Biederman
2008-05-09 5:35 ` Cornelia Huck
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-08 19:28 UTC (permalink / raw)
To: Cornelia Huck
Cc: Greg KH, Benjamin Thery, linux-kernel, Tejun Heo, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
Cornelia Huck <cornelia.huck@de.ibm.com> writes:
> On Wed, 07 May 2008 13:54:27 -0700,
> ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> > I thought that s390 also used it, but I don't see
>> > that usage in the tree anymore, perhaps they switched to something else.
>>
>> That is the device_move -> kobject_move case.
>
> Yes. bluetooth also uses it, IIRC.
Yes.
>> Very similar (and nice
>> if we can figure out how to combine them).
>
> Sounds like a good idea. I can test the _move() stuff (after I've
> managed to find some time to try this patchset...)
Do you remember if we have ever sorted out the race between _move
and module unload/directory teardown at the sysfs level?
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-08 19:28 ` Eric W. Biederman
@ 2008-05-09 5:35 ` Cornelia Huck
2008-05-09 18:16 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Cornelia Huck @ 2008-05-09 5:35 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Greg KH, Benjamin Thery, linux-kernel, Tejun Heo, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
On Thu, 08 May 2008 12:28:19 -0700,
ebiederm@xmission.com (Eric W. Biederman) wrote:
> Do you remember if we have ever sorted out the race between _move
> and module unload/directory teardown at the sysfs level?
Unfortunately I can't remember anything about this, do you have a
pointer to the original discussion?
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-09 5:35 ` Cornelia Huck
@ 2008-05-09 18:16 ` Eric W. Biederman
0 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-09 18:16 UTC (permalink / raw)
To: Cornelia Huck
Cc: Eric W. Biederman, Greg KH, Benjamin Thery, linux-kernel,
Tejun Heo, Al Viro, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev
Cornelia Huck <cornelia.huck@de.ibm.com> writes:
> On Thu, 08 May 2008 12:28:19 -0700,
> ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> Do you remember if we have ever sorted out the race between _move
>> and module unload/directory teardown at the sysfs level?
>
> Unfortunately I can't remember anything about this, do you have a
> pointer to the original discussion?
Ok the conversation was:
"[Bluez-devel] Oops involving RFCOMM and sysfs"
http://lkml.org/lkml/2007/12/28/87
Following the thread through it appears the bug was fixed
and it wasn't the race Al Viro was concerned about.
http://lkml.org/lkml/2008/1/9/59
And it looks like all of the symptoms have been corrected.
Yeah!
Which implies to me that kobject_move has the same locking
restrictions that kobject_rename has. I.e. the upper level
better do what it takes to make the operation safe.
Which says that at least for now not worrying about the locking
between sysfs_move_dir/sysfs_rename_dir and sysfs_remove_dir is ok.
As we require the upper levels not to do that.
Which says that we may be able to figure out how to merge
sysfs_move_dir and sysfs_rename_dir. It is still a nasty business
though.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 10/11] avoid kobject name conflict with different namespaces
2008-05-07 19:08 ` Greg KH
2008-05-07 20:54 ` Eric W. Biederman
@ 2008-05-08 19:25 ` Eric W. Biederman
2008-05-08 21:30 ` [PATCH] wireless: Add missing locking to cfg80211_dev_rename Eric W. Biederman
2008-05-08 21:41 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS Eric W. Biederman
3 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-08 19:25 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Greg KH <gregkh@suse.de> writes:
> On Wed, May 07, 2008 at 11:49:19AM -0700, Eric W. Biederman wrote:
>> Unless there is another path I think placing an additional pointer in
>> kobj_type so we can find it through ktype is the simplest solution.
>> Although using the kset is also sane.
>
> Ick, ick, ick :)
>
>> The easiest and most trivially correct thing to do would be to simply
>> remove the unnecessary check from kobject_rename. We perform the
>> check at the upper levels in the network anyway. And kobject_rename
>> is only used by the network stack.
>
> Wireless uses it also for some things, and it requires that it fail if a
> duplicate is found. I thought that s390 also used it, but I don't see
> that usage in the tree anymore, perhaps they switched to something else.
Looking at this a little further kobject_rename is a complete noop
when sysfs support is not compiled in. That is the kobject does not
get renamed, even if the higher level objects do.
This makes the wireless dependency on an error code from kobject_rename
completely bogus as the kobject layer is not prepared to give any kind of
reliable result, and it makes kobject_rename completely bogus if sysfs
support is not compiled in.
Further there is no locking to guarantee renames are atomic
or mutually exclusive at the kobject level.
With no locking and code that does nothing in the absence of sysfs
attempting to check renames for validity at the kobject level (when
renames don't happen at the kobject level) is totally bogus.
Since renames don't happen at the kobject level checking them for
sanity at the kobject level makes no sense.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH] wireless: Add missing locking to cfg80211_dev_rename
2008-05-07 19:08 ` Greg KH
2008-05-07 20:54 ` Eric W. Biederman
2008-05-08 19:25 ` Eric W. Biederman
@ 2008-05-08 21:30 ` Eric W. Biederman
2008-05-08 22:12 ` Serge E. Hallyn
2008-05-08 22:18 ` Johannes Berg
2008-05-08 21:41 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS Eric W. Biederman
3 siblings, 2 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-08 21:30 UTC (permalink / raw)
To: Johannes Berg, John W. Linville, David S. Miller
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev, Greg KH
device_rename only performs useful and race free validity
checking at the optional sysfs level so depending on it
for all of the validity checking in cfg80211_dev_rename
is racy.
Instead implement all of the needed validity checking
and locking in cfg80211_dev_rename.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
net/wireless/core.c | 33 ++++++++++++++++++++++++++++-----
1 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 80afacd..f1da0b9 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv)
int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
char *newname)
{
+ struct cfg80211_registered_device *drv;
int idx, taken = -1, result, digits;
+ mutex_lock(&cfg80211_drv_mutex);
+
/* prohibit calling the thing phy%d when %d is not its number */
sscanf(newname, PHY_NAME "%d%n", &idx, &taken);
if (taken == strlen(newname) && idx != rdev->idx) {
@@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
* deny the name if it is phy<idx> where <idx> is printed
* without leading zeroes. taken == strlen(newname) here
*/
+ result = -EINVAL;
if (taken == strlen(PHY_NAME) + digits)
- return -EINVAL;
+ goto out_unlock;
+ }
+
+
+ /* Ignore nop renames */
+ result = 0;
+ if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0)
+ goto out_unlock;
+
+ /* Ensure another device does not already have this name. */
+ list_for_each_entry(drv, &cfg80211_drv_list, list) {
+ result = -EINVAL;
+ if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0)
+ goto out_unlock;
}
- /* this will check for collisions */
+ /* this will only check for collisions in sysfs
+ * which is not even always compiled in.
+ */
result = device_rename(&rdev->wiphy.dev, newname);
if (result)
- return result;
+ goto out_unlock;
if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
rdev->wiphy.debugfsdir,
@@ -172,9 +191,13 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
newname);
- nl80211_notify_dev_rename(rdev);
+ result = 0;
+out_unlock:
+ mutex_unlock(&cfg80211_drv_mutex);
+ if (result == 0)
+ nl80211_notify_dev_rename(rdev);
- return 0;
+ return result;
}
/* exported functions */
--
1.5.3.rc6.17.g1911
^ permalink raw reply related [flat|nested] 75+ messages in thread
* Re: [PATCH] wireless: Add missing locking to cfg80211_dev_rename
2008-05-08 21:30 ` [PATCH] wireless: Add missing locking to cfg80211_dev_rename Eric W. Biederman
@ 2008-05-08 22:12 ` Serge E. Hallyn
2008-05-08 22:18 ` Johannes Berg
1 sibling, 0 replies; 75+ messages in thread
From: Serge E. Hallyn @ 2008-05-08 22:12 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Johannes Berg, John W. Linville, David S. Miller, Benjamin Thery,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev, Greg KH
Quoting Eric W. Biederman (ebiederm@xmission.com):
>
> device_rename only performs useful and race free validity
> checking at the optional sysfs level so depending on it
> for all of the validity checking in cfg80211_dev_rename
> is racy.
>
> Instead implement all of the needed validity checking
> and locking in cfg80211_dev_rename.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
I'm about as far as you can get from either a sysfs/kobject or network
expert, but both of these patches as well as your general concept that
renames should be handled at sysfs seem to me to make perfect sense.
thanks,
-serge
> ---
> net/wireless/core.c | 33 ++++++++++++++++++++++++++++-----
> 1 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/net/wireless/core.c b/net/wireless/core.c
> index 80afacd..f1da0b9 100644
> --- a/net/wireless/core.c
> +++ b/net/wireless/core.c
> @@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv)
> int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> char *newname)
> {
> + struct cfg80211_registered_device *drv;
> int idx, taken = -1, result, digits;
>
> + mutex_lock(&cfg80211_drv_mutex);
> +
> /* prohibit calling the thing phy%d when %d is not its number */
> sscanf(newname, PHY_NAME "%d%n", &idx, &taken);
> if (taken == strlen(newname) && idx != rdev->idx) {
> @@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> * deny the name if it is phy<idx> where <idx> is printed
> * without leading zeroes. taken == strlen(newname) here
> */
> + result = -EINVAL;
> if (taken == strlen(PHY_NAME) + digits)
> - return -EINVAL;
> + goto out_unlock;
> + }
> +
> +
> + /* Ignore nop renames */
> + result = 0;
> + if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0)
> + goto out_unlock;
> +
> + /* Ensure another device does not already have this name. */
> + list_for_each_entry(drv, &cfg80211_drv_list, list) {
> + result = -EINVAL;
> + if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0)
> + goto out_unlock;
> }
>
> - /* this will check for collisions */
> + /* this will only check for collisions in sysfs
> + * which is not even always compiled in.
> + */
> result = device_rename(&rdev->wiphy.dev, newname);
> if (result)
> - return result;
> + goto out_unlock;
>
> if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
> rdev->wiphy.debugfsdir,
> @@ -172,9 +191,13 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
> newname);
>
> - nl80211_notify_dev_rename(rdev);
> + result = 0;
> +out_unlock:
> + mutex_unlock(&cfg80211_drv_mutex);
> + if (result == 0)
> + nl80211_notify_dev_rename(rdev);
>
> - return 0;
> + return result;
> }
>
> /* exported functions */
> --
> 1.5.3.rc6.17.g1911
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] wireless: Add missing locking to cfg80211_dev_rename
2008-05-08 21:30 ` [PATCH] wireless: Add missing locking to cfg80211_dev_rename Eric W. Biederman
2008-05-08 22:12 ` Serge E. Hallyn
@ 2008-05-08 22:18 ` Johannes Berg
1 sibling, 0 replies; 75+ messages in thread
From: Johannes Berg @ 2008-05-08 22:18 UTC (permalink / raw)
To: Eric W. Biederman
Cc: John W. Linville, David S. Miller, Benjamin Thery, linux-kernel,
Tejun Heo, Al Viro, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev, Greg KH
[-- Attachment #1: Type: text/plain, Size: 2873 bytes --]
On Thu, 2008-05-08 at 14:30 -0700, Eric W. Biederman wrote:
> device_rename only performs useful and race free validity
> checking at the optional sysfs level so depending on it
> for all of the validity checking in cfg80211_dev_rename
> is racy.
>
> Instead implement all of the needed validity checking
> and locking in cfg80211_dev_rename.
Makes sense, thanks, I didn't really think about it not being compiled
in.
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
> ---
> net/wireless/core.c | 33 ++++++++++++++++++++++++++++-----
> 1 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/net/wireless/core.c b/net/wireless/core.c
> index 80afacd..f1da0b9 100644
> --- a/net/wireless/core.c
> +++ b/net/wireless/core.c
> @@ -143,8 +143,11 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv)
> int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> char *newname)
> {
> + struct cfg80211_registered_device *drv;
> int idx, taken = -1, result, digits;
>
> + mutex_lock(&cfg80211_drv_mutex);
> +
> /* prohibit calling the thing phy%d when %d is not its number */
> sscanf(newname, PHY_NAME "%d%n", &idx, &taken);
> if (taken == strlen(newname) && idx != rdev->idx) {
> @@ -156,14 +159,30 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> * deny the name if it is phy<idx> where <idx> is printed
> * without leading zeroes. taken == strlen(newname) here
> */
> + result = -EINVAL;
> if (taken == strlen(PHY_NAME) + digits)
> - return -EINVAL;
> + goto out_unlock;
> + }
> +
> +
> + /* Ignore nop renames */
> + result = 0;
> + if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0)
> + goto out_unlock;
> +
> + /* Ensure another device does not already have this name. */
> + list_for_each_entry(drv, &cfg80211_drv_list, list) {
> + result = -EINVAL;
> + if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0)
> + goto out_unlock;
> }
>
> - /* this will check for collisions */
> + /* this will only check for collisions in sysfs
> + * which is not even always compiled in.
> + */
> result = device_rename(&rdev->wiphy.dev, newname);
> if (result)
> - return result;
> + goto out_unlock;
>
> if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
> rdev->wiphy.debugfsdir,
> @@ -172,9 +191,13 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
> printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
> newname);
>
> - nl80211_notify_dev_rename(rdev);
> + result = 0;
> +out_unlock:
> + mutex_unlock(&cfg80211_drv_mutex);
> + if (result == 0)
> + nl80211_notify_dev_rename(rdev);
>
> - return 0;
> + return result;
> }
>
> /* exported functions */
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 828 bytes --]
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH] Fix kobject_rename and !CONFIG_SYSFS
2008-05-07 19:08 ` Greg KH
` (2 preceding siblings ...)
2008-05-08 21:30 ` [PATCH] wireless: Add missing locking to cfg80211_dev_rename Eric W. Biederman
@ 2008-05-08 21:41 ` Eric W. Biederman
2008-05-12 22:02 ` kobject: " Greg KH
3 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-08 21:41 UTC (permalink / raw)
To: Greg KH, Andrew Morton
Cc: Benjamin Thery, linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
When looking at kobject_rename I found two bugs with
that exist when sysfs support is disabled in the kernel.
kobject_rename does not change the name on the kobject when
sysfs support is not compiled in.
kobject_rename without locking attempts to check the
validity of a rename operation, which the kobject layer
simply does not have the infrastructure to do so.
This patch documents the previously unstated requirement of
kobject_rename that is the responsibility of the caller to
provide mutual exclusion and to be certain that the new_name
for the kobject is valid.
This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
to call kobject_set_name to actually change the kobject_name.
This patch removes the bogus and misleading check in kobject_rename
that attempts to see if a rename is valid. The check is bogus
because we do not have the proper locking. The check is misleading
because it looks like we can and do perform useful checking at the
kobject level that we don't.
The users in net/core/dev.c already have all of the necessary checks
in place and don't care. The other use in net/core/wireless.c
as originally implemented is incorrect because it performs no locking
and a simple patch has been sent that adds the necessary locking
and sanity checks there. Ensuring this patch will not have an
effect on users of kobject_rename or device_rename.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
Documentation/kobject.txt | 4 ++++
drivers/base/core.c | 5 +++++
include/linux/sysfs.h | 2 +-
lib/kobject.c | 18 +++++-------------
4 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bf3256e..79184b4 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
int kobject_rename(struct kobject *kobj, const char *new_name);
+Note kobject_rename does perform any locking or have a solid notion of
+what names are valid so the provide must provide their own sanity checking
+and serialization.
+
There is a function called kobject_set_name() but that is legacy cruft and
is being removed. If your code needs to call this function, it is
incorrect and needs to be fixed.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index be288b5..ad68f4c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1171,6 +1171,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
* device_rename - renames a device
* @dev: the pointer to the struct device to be renamed
* @new_name: the new name of the device
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of device_rename
+ * on the same device to ensure that new_name is valid and
+ * won't conflict with other devices.
*/
int device_rename(struct device *dev, char *new_name)
{
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7858eac..db66fa5 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -137,7 +137,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
- return 0;
+ return kobject_set_name(kobj, "%s", new_name);
}
static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index 718e510..c7fb092 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -383,6 +383,11 @@ EXPORT_SYMBOL_GPL(kobject_init_and_add);
* kobject_rename - change the name of an object
* @kobj: object in question.
* @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
*/
int kobject_rename(struct kobject *kobj, const char *new_name)
{
@@ -397,19 +402,6 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
if (!kobj->parent)
return -EINVAL;
- /* see if this name is already in use */
- if (kobj->kset) {
- struct kobject *temp_kobj;
- temp_kobj = kset_find_obj(kobj->kset, new_name);
- if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
- kobject_put(temp_kobj);
- return -EINVAL;
- }
- }
-
devpath = kobject_get_path(kobj, GFP_KERNEL);
if (!devpath) {
error = -ENOMEM;
--
1.5.3.rc6.17.g1911
^ permalink raw reply related [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-08 21:41 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS Eric W. Biederman
@ 2008-05-12 22:02 ` Greg KH
2008-05-13 7:00 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Greg KH @ 2008-05-12 22:02 UTC (permalink / raw)
To: Eric W. Biederman, Randy Dunlap
Cc: Greg KH, Andrew Morton, Benjamin Thery, linux-kernel, Tejun Heo,
Al Viro, Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov,
netdev
On Thu, May 08, 2008 at 02:41:00PM -0700, Eric W. Biederman wrote:
>
> From: Eric W. Biederman <ebiederm@xmission.com>
>
> When looking at kobject_rename I found two bugs with
> that exist when sysfs support is disabled in the kernel.
>
> kobject_rename does not change the name on the kobject when
> sysfs support is not compiled in.
>
> kobject_rename without locking attempts to check the
> validity of a rename operation, which the kobject layer
> simply does not have the infrastructure to do so.
>
> This patch documents the previously unstated requirement of
> kobject_rename that is the responsibility of the caller to
> provide mutual exclusion and to be certain that the new_name
> for the kobject is valid.
>
> This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
> to call kobject_set_name to actually change the kobject_name.
>
> This patch removes the bogus and misleading check in kobject_rename
> that attempts to see if a rename is valid. The check is bogus
> because we do not have the proper locking. The check is misleading
> because it looks like we can and do perform useful checking at the
> kobject level that we don't.
>
> The users in net/core/dev.c already have all of the necessary checks
> in place and don't care. The other use in net/core/wireless.c
> as originally implemented is incorrect because it performs no locking
> and a simple patch has been sent that adds the necessary locking
> and sanity checks there. Ensuring this patch will not have an
> effect on users of kobject_rename or device_rename.
Eric, Randy Dunlap has found that this patch breaks the build when
CONFIG_SYSFS is not enabled. Can you please fix it up before I send it
to Linus?
The exact error is:
In file included from /local/linsrc/next-20080509/include/linux/kobject.h:21,
from /local/linsrc/next-20080509/include/linux/module.h:16,
from /local/linsrc/next-20080509/include/linux/crypto.h:21,
from /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets_64.c:7,
from /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets.c:4:
/local/linsrc/next-20080509/include/linux/sysfs.h: In function 'sysfs_rename_dir':
/local/linsrc/next-20080509/include/linux/sysfs.h:142: error: implicit declaration of function 'kobject_set_name'
thanks,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-12 22:02 ` kobject: " Greg KH
@ 2008-05-13 7:00 ` Eric W. Biederman
2008-05-13 14:25 ` Benjamin Thery
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-13 7:00 UTC (permalink / raw)
To: Greg KH
Cc: Randy Dunlap, Greg KH, Andrew Morton, Benjamin Thery,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Greg KH <greg@kroah.com> writes:
>
> Eric, Randy Dunlap has found that this patch breaks the build when
> CONFIG_SYSFS is not enabled. Can you please fix it up before I send it
> to Linus?
>
> The exact error is:
> In file included from /local/linsrc/next-20080509/include/linux/kobject.h:21,
> from /local/linsrc/next-20080509/include/linux/module.h:16,
> from /local/linsrc/next-20080509/include/linux/crypto.h:21,
> from
> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets_64.c:7,
> from
> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets.c:4:
> /local/linsrc/next-20080509/include/linux/sysfs.h: In function
> 'sysfs_rename_dir':
> /local/linsrc/next-20080509/include/linux/sysfs.h:142: error: implicit
> declaration of function 'kobject_set_name'
I will take a look in the morning and see if I can see what is wrong.
Which tree was this error against? I thought I tested this case,
and I'm wondering if there might be another patch that is hiding
kobject_set_name.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-13 7:00 ` Eric W. Biederman
@ 2008-05-13 14:25 ` Benjamin Thery
2008-05-13 16:44 ` Greg KH
0 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-05-13 14:25 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Greg KH, Randy Dunlap, Greg KH, Andrew Morton, linux-kernel,
Tejun Heo, Al Viro, Daniel Lezcano, Serge E. Hallyn,
Pavel Emelyanov, netdev
Eric W. Biederman wrote:
> Greg KH <greg@kroah.com> writes:
>
>> Eric, Randy Dunlap has found that this patch breaks the build when
>> CONFIG_SYSFS is not enabled. Can you please fix it up before I send it
>> to Linus?
>>
>> The exact error is:
>> In file included from /local/linsrc/next-20080509/include/linux/kobject.h:21,
>> from /local/linsrc/next-20080509/include/linux/module.h:16,
>> from /local/linsrc/next-20080509/include/linux/crypto.h:21,
>> from
>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets_64.c:7,
>> from
>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets.c:4:
>> /local/linsrc/next-20080509/include/linux/sysfs.h: In function
>> 'sysfs_rename_dir':
>> /local/linsrc/next-20080509/include/linux/sysfs.h:142: error: implicit
>> declaration of function 'kobject_set_name'
>
> I will take a look in the morning and see if I can see what is wrong.
>
> Which tree was this error against? I thought I tested this case,
> and I'm wondering if there might be another patch that is hiding
> kobject_set_name.
Argh, headers "cross-dependencies":
* linux/kobject.h includes linux/sysfs.h before defining
kobject_set_name()
* linux/sysfs.h needs to include linux/kobject.h to find
kobject_set_name() definition (for inlined sysfs_rename_dir() when
CONFIG_SYSFS=n)
sysfs_rename_dir() is only called by kobject.c, kobject_rename().
I guess this kind of patch is not acceptable to fix the depency?
Index: linux-2.6/lib/kobject.c
===================================================================
--- linux-2.6.orig/lib/kobject.c 2008-05-13 15:14:38.000000000 +0200
+++ linux-2.6/lib/kobject.c 2008-05-13 15:58:37.000000000 +0200
@@ -416,8 +416,11 @@ int kobject_rename(struct kobject *kobj,
envp[0] = devpath_string;
envp[1] = NULL;
+#ifdef CONFIG_SYSFS
error = sysfs_rename_dir(kobj, new_name);
-
+#else
+ error = kobject_set_name(kobj, "%s", new_name);
+#endif
/* This function is mostly/only used for network interface.
* Some hotplug package track interfaces by their name and
* therefore want to know when the name is changed by the user. */
-Benjamin
>
> Eric
>
>
--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D
http://www.bull.com
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-13 14:25 ` Benjamin Thery
@ 2008-05-13 16:44 ` Greg KH
2008-05-13 17:55 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2 Eric W. Biederman
2008-05-13 19:33 ` kobject: Fix kobject_rename and !CONFIG_SYSFS Benjamin Thery
0 siblings, 2 replies; 75+ messages in thread
From: Greg KH @ 2008-05-13 16:44 UTC (permalink / raw)
To: Benjamin Thery
Cc: Eric W. Biederman, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
On Tue, May 13, 2008 at 04:25:01PM +0200, Benjamin Thery wrote:
> Eric W. Biederman wrote:
>> Greg KH <greg@kroah.com> writes:
>>> Eric, Randy Dunlap has found that this patch breaks the build when
>>> CONFIG_SYSFS is not enabled. Can you please fix it up before I send it
>>> to Linus?
>>>
>>> The exact error is:
>>> In file included from
>>> /local/linsrc/next-20080509/include/linux/kobject.h:21,
>>> from
>>> /local/linsrc/next-20080509/include/linux/module.h:16,
>>> from
>>> /local/linsrc/next-20080509/include/linux/crypto.h:21,
>>> from
>>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets_64.c:7,
>>> from
>>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets.c:4:
>>> /local/linsrc/next-20080509/include/linux/sysfs.h: In function
>>> 'sysfs_rename_dir':
>>> /local/linsrc/next-20080509/include/linux/sysfs.h:142: error: implicit
>>> declaration of function 'kobject_set_name'
>> I will take a look in the morning and see if I can see what is wrong.
>> Which tree was this error against? I thought I tested this case,
>> and I'm wondering if there might be another patch that is hiding
>> kobject_set_name.
>
> Argh, headers "cross-dependencies":
>
> * linux/kobject.h includes linux/sysfs.h before defining
> kobject_set_name()
>
> * linux/sysfs.h needs to include linux/kobject.h to find
> kobject_set_name() definition (for inlined sysfs_rename_dir() when
> CONFIG_SYSFS=n)
>
>
> sysfs_rename_dir() is only called by kobject.c, kobject_rename().
> I guess this kind of patch is not acceptable to fix the depency?
Ick, no. I'd rather add a kobject_set_name() function prototype to
sysfs.h in this case, that should remove the error, right?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2
2008-05-13 16:44 ` Greg KH
@ 2008-05-13 17:55 ` Eric W. Biederman
2008-05-13 18:23 ` Randy.Dunlap
2008-05-13 20:16 ` Greg KH
2008-05-13 19:33 ` kobject: Fix kobject_rename and !CONFIG_SYSFS Benjamin Thery
1 sibling, 2 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-13 17:55 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
When looking at kobject_rename I found two bugs with
that exist when sysfs support is disabled in the kernel.
kobject_rename does not change the name on the kobject when
sysfs support is not compiled in.
kobject_rename without locking attempts to check the
validity of a rename operation, which the kobject layer
simply does not have the infrastructure to do.
This patch documents the previously unstated requirement of
kobject_rename that is the responsibility of the caller to
provide mutual exclusion and to be certain that the new_name
for the kobject is valid.
This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
to call kobject_set_name to actually change the kobject_name.
This patch removes the bogus and misleading check in kobject_rename
that attempts to see if a rename is valid. The check is bogus
because we do not have the proper locking. The check is misleading
because it looks like we can and do perform checking at the kobject
level that we don't.
Changelog:
v2: Added a declaration of kboject_set_name to sysfs.h
so the code actually compiles with !CONFIG_SYSFS.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
Documentation/kobject.txt | 4 ++++
drivers/base/core.c | 5 +++++
include/linux/sysfs.h | 4 +++-
lib/kobject.c | 18 +++++-------------
4 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bf3256e..79184b4 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
int kobject_rename(struct kobject *kobj, const char *new_name);
+Note kobject_rename does perform any locking or have a solid notion of
+what names are valid so the provide must provide their own sanity checking
+and serialization.
+
There is a function called kobject_set_name() but that is legacy cruft and
is being removed. If your code needs to call this function, it is
incorrect and needs to be fixed.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index be288b5..ad68f4c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1171,6 +1171,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
* device_rename - renames a device
* @dev: the pointer to the struct device to be renamed
* @new_name: the new name of the device
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of device_rename
+ * on the same device to ensure that new_name is valid and
+ * won't conflict with other devices.
*/
int device_rename(struct device *dev, char *new_name)
{
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7858eac..6e61033 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,6 +20,8 @@
struct kobject;
struct module;
+extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
+ __attribute__((format(printf, 2, 3)));
/* FIXME
* The *owner field is no longer used, but leave around
* until the tree gets cleaned up fully.
@@ -137,7 +139,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
- return 0;
+ return kobject_set_name(kobj, "%s", new_name);
}
static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index 718e510..c7fb092 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -383,6 +383,11 @@ EXPORT_SYMBOL_GPL(kobject_init_and_add);
* kobject_rename - change the name of an object
* @kobj: object in question.
* @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
*/
int kobject_rename(struct kobject *kobj, const char *new_name)
{
@@ -397,19 +402,6 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
if (!kobj->parent)
return -EINVAL;
- /* see if this name is already in use */
- if (kobj->kset) {
- struct kobject *temp_kobj;
- temp_kobj = kset_find_obj(kobj->kset, new_name);
- if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
- kobject_put(temp_kobj);
- return -EINVAL;
- }
- }
-
devpath = kobject_get_path(kobj, GFP_KERNEL);
if (!devpath) {
error = -ENOMEM;
--
1.5.3.rc6.17.g1911
^ permalink raw reply related [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2
2008-05-13 17:55 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2 Eric W. Biederman
@ 2008-05-13 18:23 ` Randy.Dunlap
2008-05-13 20:43 ` Eric W. Biederman
2008-05-13 20:16 ` Greg KH
1 sibling, 1 reply; 75+ messages in thread
From: Randy.Dunlap @ 2008-05-13 18:23 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Greg KH, Benjamin Thery, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
On Tue, 13 May 2008, Eric W. Biederman wrote:
> Changelog:
> v2: Added a declaration of kboject_set_name to sysfs.h
> so the code actually compiles with !CONFIG_SYSFS.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> Documentation/kobject.txt | 4 ++++
> drivers/base/core.c | 5 +++++
> include/linux/sysfs.h | 4 +++-
> lib/kobject.c | 18 +++++-------------
> 4 files changed, 17 insertions(+), 14 deletions(-)
>
> diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
> index bf3256e..79184b4 100644
> --- a/Documentation/kobject.txt
> +++ b/Documentation/kobject.txt
> @@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
>
> int kobject_rename(struct kobject *kobj, const char *new_name);
>
> +Note kobject_rename does perform any locking or have a solid notion of
^not
> +what names are valid so the provide must provide their own sanity checking
~~~~~~~
caller
> +and serialization.
> +
> There is a function called kobject_set_name() but that is legacy cruft and
> is being removed. If your code needs to call this function, it is
> incorrect and needs to be fixed.
--
~Randy
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2
2008-05-13 18:23 ` Randy.Dunlap
@ 2008-05-13 20:43 ` Eric W. Biederman
0 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-13 20:43 UTC (permalink / raw)
To: Randy.Dunlap
Cc: Eric W. Biederman, Greg KH, Benjamin Thery, Randy Dunlap,
Greg KH, Andrew Morton, linux-kernel, Tejun Heo, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
"Randy.Dunlap" <rdunlap@xenotime.net> writes:
> On Tue, 13 May 2008, Eric W. Biederman wrote:
>
>> Changelog:
>> v2: Added a declaration of kboject_set_name to sysfs.h
>> so the code actually compiles with !CONFIG_SYSFS.
>>
>> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
>> ---
>> Documentation/kobject.txt | 4 ++++
>> drivers/base/core.c | 5 +++++
>> include/linux/sysfs.h | 4 +++-
>> lib/kobject.c | 18 +++++-------------
>> 4 files changed, 17 insertions(+), 14 deletions(-)
>>
>> diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
>> index bf3256e..79184b4 100644
>> --- a/Documentation/kobject.txt
>> +++ b/Documentation/kobject.txt
>> @@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
>>
>> int kobject_rename(struct kobject *kobj, const char *new_name);
>>
>> +Note kobject_rename does perform any locking or have a solid notion of
>
> ^not
>
>> +what names are valid so the provide must provide their own sanity checking
> ~~~~~~~
> caller
>
>> +and serialization.
>> +
>> There is a function called kobject_set_name() but that is legacy cruft and
>> is being removed. If your code needs to call this function, it is
>> incorrect and needs to be fixed.
Good catch.
Thanks Randy.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2
2008-05-13 17:55 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2 Eric W. Biederman
2008-05-13 18:23 ` Randy.Dunlap
@ 2008-05-13 20:16 ` Greg KH
2008-05-13 20:45 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v3 Eric W. Biederman
1 sibling, 1 reply; 75+ messages in thread
From: Greg KH @ 2008-05-13 20:16 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
On Tue, May 13, 2008 at 10:55:45AM -0700, Eric W. Biederman wrote:
>
> When looking at kobject_rename I found two bugs with
> that exist when sysfs support is disabled in the kernel.
<snip>
Better, but can you address Randy's objections?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH] Fix kobject_rename and !CONFIG_SYSFS v3
2008-05-13 20:16 ` Greg KH
@ 2008-05-13 20:45 ` Eric W. Biederman
2008-05-13 21:18 ` Randy Dunlap
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-13 20:45 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
When looking at kobject_rename I found two bugs with
that exist when sysfs support is disabled in the kernel.
kobject_rename does not change the name on the kobject when
sysfs support is not compiled in.
kobject_rename without locking attempts to check the
validity of a rename operation, which the kobject layer
simply does not have the infrastructure to do.
This patch documents the previously unstated requirement of
kobject_rename that is the responsibility of the caller to
provide mutual exclusion and to be certain that the new_name
for the kobject is valid.
This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
to call kobject_set_name to actually change the kobject_name.
This patch removes the bogus and misleading check in kobject_rename
that attempts to see if a rename is valid. The check is bogus
because we do not have the proper locking. The check is misleading
because it looks like we can and do perform checking at the kobject
level that we don't.
Changelog:
v3: Documentation typo fixes
v2: Added a declaration of kboject_set_name to sysfs.h
so the code actually compiles with !CONFIG_SYSFS.
Unscrambling the header dependencies so everything looks
beautiful is a project for another day.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
Documentation/kobject.txt | 4 ++++
drivers/base/core.c | 5 +++++
include/linux/sysfs.h | 4 +++-
lib/kobject.c | 18 +++++-------------
4 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bf3256e..79184b4 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
int kobject_rename(struct kobject *kobj, const char *new_name);
+Note kobject_rename does perform any locking or have a solid notion of
+what names are valid so the provide must provide their own sanity checking
+and serialization.
+
There is a function called kobject_set_name() but that is legacy cruft and
is being removed. If your code needs to call this function, it is
incorrect and needs to be fixed.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index be288b5..ad68f4c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1171,6 +1171,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
* device_rename - renames a device
* @dev: the pointer to the struct device to be renamed
* @new_name: the new name of the device
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of device_rename
+ * on the same device to ensure that new_name is valid and
+ * won't conflict with other devices.
*/
int device_rename(struct device *dev, char *new_name)
{
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7858eac..6e61033 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,6 +20,8 @@
struct kobject;
struct module;
+extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
+ __attribute__((format(printf, 2, 3)));
/* FIXME
* The *owner field is no longer used, but leave around
* until the tree gets cleaned up fully.
@@ -137,7 +139,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
- return 0;
+ return kobject_set_name(kobj, "%s", new_name);
}
static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index 718e510..c7fb092 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -383,6 +383,11 @@ EXPORT_SYMBOL_GPL(kobject_init_and_add);
* kobject_rename - change the name of an object
* @kobj: object in question.
* @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
*/
int kobject_rename(struct kobject *kobj, const char *new_name)
{
@@ -397,19 +402,6 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
if (!kobj->parent)
return -EINVAL;
- /* see if this name is already in use */
- if (kobj->kset) {
- struct kobject *temp_kobj;
- temp_kobj = kset_find_obj(kobj->kset, new_name);
- if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
- kobject_put(temp_kobj);
- return -EINVAL;
- }
- }
-
devpath = kobject_get_path(kobj, GFP_KERNEL);
if (!devpath) {
error = -ENOMEM;
--
1.5.3.rc6.17.g1911
^ permalink raw reply related [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v3
2008-05-13 20:45 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v3 Eric W. Biederman
@ 2008-05-13 21:18 ` Randy Dunlap
2008-05-14 4:39 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4 Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Randy Dunlap @ 2008-05-13 21:18 UTC (permalink / raw)
To: Eric W. Biederman, Greg KH
Cc: Al Viro, Benjamin Thery, Greg KH, linux-kernel, netdev,
Daniel Lezcano, Andrew Morton, Pavel Emelyanov, Serge E. Hallyn,
Tejun Heo
> Changelog:
> v3: Documentation typo fixes
Inserted/attached wrong patch file?
I don't see any text changes.
> diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
> index bf3256e..79184b4 100644
> --- a/Documentation/kobject.txt
> +++ b/Documentation/kobject.txt
> @@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
>
> int kobject_rename(struct kobject *kobj, const char
> *new_name);
>
> +Note kobject_rename does perform any locking or have a solid
> notion of
> +what names are valid so the provide must provide their own
> sanity checking
> +and serialization.
> +
> There is a function called kobject_set_name() but that is
> legacy cruft and
> is being removed. If your code needs to call this function,
> it is
> incorrect and needs to be fixed.
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-13 21:18 ` Randy Dunlap
@ 2008-05-14 4:39 ` Eric W. Biederman
2008-05-14 5:03 ` Andrew Morton
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-14 4:39 UTC (permalink / raw)
To: Randy Dunlap
Cc: Greg KH, Al Viro, Benjamin Thery, Greg KH, linux-kernel, netdev,
Daniel Lezcano, Andrew Morton, Pavel Emelyanov, Serge E. Hallyn,
Tejun Heo
When looking at kobject_rename I found two bugs with
that exist when sysfs support is disabled in the kernel.
kobject_rename does not change the name on the kobject when
sysfs support is not compiled in.
kobject_rename without locking attempts to check the
validity of a rename operation, which the kobject layer
simply does not have the infrastructure to do.
This patch documents the previously unstated requirement of
kobject_rename that is the responsibility of the caller to
provide mutual exclusion and to be certain that the new_name
for the kobject is valid.
This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
to call kobject_set_name to actually change the kobject_name.
This patch removes the bogus and misleading check in kobject_rename
that attempts to see if a rename is valid. The check is bogus
because we do not have the proper locking. The check is misleading
because it looks like we can and do perform checking at the kobject
level that we don't.
Changelog:
v4: Documentation typo fixes
v2: Added a declaration of kboject_set_name to sysfs.h
so the code actually compiles with !CONFIG_SYSFS.
Unscrambling the header dependencies so everything looks
beautiful is a project for another day.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
Documentation/kobject.txt | 4 ++++
drivers/base/core.c | 5 +++++
include/linux/sysfs.h | 4 +++-
lib/kobject.c | 18 +++++-------------
4 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
index bf3256e..ac80d82 100644
--- a/Documentation/kobject.txt
+++ b/Documentation/kobject.txt
@@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
int kobject_rename(struct kobject *kobj, const char *new_name);
+Note kobject_rename does not perform any locking or have a solid notion of
+what names are valid so the caller must provide their own sanity checking
+and serialization.
+
There is a function called kobject_set_name() but that is legacy cruft and
is being removed. If your code needs to call this function, it is
incorrect and needs to be fixed.
diff --git a/drivers/base/core.c b/drivers/base/core.c
index be288b5..ad68f4c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1171,6 +1171,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
* device_rename - renames a device
* @dev: the pointer to the struct device to be renamed
* @new_name: the new name of the device
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of device_rename
+ * on the same device to ensure that new_name is valid and
+ * won't conflict with other devices.
*/
int device_rename(struct device *dev, char *new_name)
{
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 7858eac..6e61033 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,6 +20,8 @@
struct kobject;
struct module;
+extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
+ __attribute__((format(printf, 2, 3)));
/* FIXME
* The *owner field is no longer used, but leave around
* until the tree gets cleaned up fully.
@@ -137,7 +139,7 @@ static inline void sysfs_remove_dir(struct kobject *kobj)
static inline int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
{
- return 0;
+ return kobject_set_name(kobj, "%s", new_name);
}
static inline int sysfs_move_dir(struct kobject *kobj,
diff --git a/lib/kobject.c b/lib/kobject.c
index 718e510..c7fb092 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -383,6 +383,11 @@ EXPORT_SYMBOL_GPL(kobject_init_and_add);
* kobject_rename - change the name of an object
* @kobj: object in question.
* @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
*/
int kobject_rename(struct kobject *kobj, const char *new_name)
{
@@ -397,19 +402,6 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
if (!kobj->parent)
return -EINVAL;
- /* see if this name is already in use */
- if (kobj->kset) {
- struct kobject *temp_kobj;
- temp_kobj = kset_find_obj(kobj->kset, new_name);
- if (temp_kobj) {
- printk(KERN_WARNING "kobject '%s' cannot be renamed "
- "to '%s' as '%s' is already in existence.\n",
- kobject_name(kobj), new_name, new_name);
- kobject_put(temp_kobj);
- return -EINVAL;
- }
- }
-
devpath = kobject_get_path(kobj, GFP_KERNEL);
if (!devpath) {
error = -ENOMEM;
--
1.5.3.rc6.17.g1911
^ permalink raw reply related [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-14 4:39 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4 Eric W. Biederman
@ 2008-05-14 5:03 ` Andrew Morton
2008-05-14 9:01 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Andrew Morton @ 2008-05-14 5:03 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Randy Dunlap, Greg KH, Al Viro, Benjamin Thery, Greg KH,
linux-kernel, netdev, Daniel Lezcano, Pavel Emelyanov,
Serge E. Hallyn, Tejun Heo
On Tue, 13 May 2008 21:39:45 -0700 ebiederm@xmission.com (Eric W. Biederman) wrote:
>
> When looking at kobject_rename I found two bugs with
> that exist when sysfs support is disabled in the kernel.
>
> kobject_rename does not change the name on the kobject when
> sysfs support is not compiled in.
>
> kobject_rename without locking attempts to check the
> validity of a rename operation, which the kobject layer
> simply does not have the infrastructure to do.
>
> This patch documents the previously unstated requirement of
> kobject_rename that is the responsibility of the caller to
> provide mutual exclusion and to be certain that the new_name
> for the kobject is valid.
>
> This patch modifies sysfs_rename_dir in !CONFIG_SYSFS case
> to call kobject_set_name to actually change the kobject_name.
>
> This patch removes the bogus and misleading check in kobject_rename
> that attempts to see if a rename is valid. The check is bogus
> because we do not have the proper locking. The check is misleading
> because it looks like we can and do perform checking at the kobject
> level that we don't.
>
> Changelog:
> v4: Documentation typo fixes
>
> v2: Added a declaration of kboject_set_name to sysfs.h
> so the code actually compiles with !CONFIG_SYSFS.
>
> Unscrambling the header dependencies so everything looks
> beautiful is a project for another day.
>
> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
> ---
> Documentation/kobject.txt | 4 ++++
> drivers/base/core.c | 5 +++++
> include/linux/sysfs.h | 4 +++-
> lib/kobject.c | 18 +++++-------------
> 4 files changed, 17 insertions(+), 14 deletions(-)
>
> diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
> index bf3256e..ac80d82 100644
> --- a/Documentation/kobject.txt
> +++ b/Documentation/kobject.txt
> @@ -118,6 +118,10 @@ the name of the kobject, call kobject_rename():
>
> int kobject_rename(struct kobject *kobj, const char *new_name);
>
> +Note kobject_rename does not perform any locking or have a solid notion of
> +what names are valid so the caller must provide their own sanity checking
> +and serialization.
> +
> There is a function called kobject_set_name() but that is legacy cruft and
> is being removed. If your code needs to call this function, it is
> incorrect and needs to be fixed.
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index be288b5..ad68f4c 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -1171,6 +1171,11 @@ EXPORT_SYMBOL_GPL(device_destroy);
> * device_rename - renames a device
> * @dev: the pointer to the struct device to be renamed
> * @new_name: the new name of the device
> + *
> + * It is the responsibility of the caller to provide mutual
> + * exclusion between two different calls of device_rename
> + * on the same device to ensure that new_name is valid and
> + * won't conflict with other devices.
> */
> int device_rename(struct device *dev, char *new_name)
> {
> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
> index 7858eac..6e61033 100644
> --- a/include/linux/sysfs.h
> +++ b/include/linux/sysfs.h
> @@ -20,6 +20,8 @@
> struct kobject;
> struct module;
>
> +extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
> + __attribute__((format(printf, 2, 3)));
Duplicating the kobject_set_name() declaration in sysfs.h is rather a hack.
It'd be better to move it into a new header file, included by both
sysfs.h and kobject.h. Perhaps there are other declarations which can
be moved with it.
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-14 5:03 ` Andrew Morton
@ 2008-05-14 9:01 ` Eric W. Biederman
2008-05-14 9:20 ` Andrew Morton
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-14 9:01 UTC (permalink / raw)
To: Andrew Morton
Cc: Randy Dunlap, Greg KH, Al Viro, Benjamin Thery, Greg KH,
linux-kernel, netdev, Daniel Lezcano, Pavel Emelyanov,
Serge E. Hallyn, Tejun Heo
Andrew Morton <akpm@linux-foundation.org> writes:
>> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
>> index 7858eac..6e61033 100644
>> --- a/include/linux/sysfs.h
>> +++ b/include/linux/sysfs.h
>> @@ -20,6 +20,8 @@
>> struct kobject;
>> struct module;
>>
>> +extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
>> + __attribute__((format(printf, 2, 3)));
>
> Duplicating the kobject_set_name() declaration in sysfs.h is rather a hack.
A bit. It really isn't worse then a struct declaration like struct kobject;
> It'd be better to move it into a new header file, included by both
> sysfs.h and kobject.h. Perhaps there are other declarations which can
> be moved with it.
My gut feel says that sysfs.h should include kobject.h instead of the
other way around.
However it gets reorganized, it is an entirely separate problem
from the one this patch sets out to solve and so should go in
a different patch.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-14 9:01 ` Eric W. Biederman
@ 2008-05-14 9:20 ` Andrew Morton
2008-05-14 9:51 ` Benjamin Thery
0 siblings, 1 reply; 75+ messages in thread
From: Andrew Morton @ 2008-05-14 9:20 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Randy Dunlap, Greg KH, Al Viro, Benjamin Thery, Greg KH,
linux-kernel, netdev, Daniel Lezcano, Pavel Emelyanov,
Serge E. Hallyn, Tejun Heo
On Wed, 14 May 2008 02:01:37 -0700 ebiederm@xmission.com (Eric W. Biederman) wrote:
> Andrew Morton <akpm@linux-foundation.org> writes:
>
> >> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
> >> index 7858eac..6e61033 100644
> >> --- a/include/linux/sysfs.h
> >> +++ b/include/linux/sysfs.h
> >> @@ -20,6 +20,8 @@
> >> struct kobject;
> >> struct module;
> >>
> >> +extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
> >> + __attribute__((format(printf, 2, 3)));
> >
> > Duplicating the kobject_set_name() declaration in sysfs.h is rather a hack.
> A bit. It really isn't worse then a struct declaration like struct kobject;
>
> > It'd be better to move it into a new header file, included by both
> > sysfs.h and kobject.h. Perhaps there are other declarations which can
> > be moved with it.
>
> My gut feel says that sysfs.h should include kobject.h instead of the
> other way around.
>
> However it gets reorganized, it is an entirely separate problem
> from the one this patch sets out to solve and so should go in
> a different patch.
>
umm, well, actually, it's a problem which your patch introduces, by adding a
new dependency.
uninlining sysfs_rename_dir() would be a sensible solution.
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-14 9:20 ` Andrew Morton
@ 2008-05-14 9:51 ` Benjamin Thery
2008-05-14 9:56 ` Andrew Morton
0 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-05-14 9:51 UTC (permalink / raw)
To: Andrew Morton
Cc: Eric W. Biederman, Randy Dunlap, Greg KH, Al Viro, Greg KH,
linux-kernel, netdev, Daniel Lezcano, Pavel Emelyanov,
Serge E. Hallyn, Tejun Heo
Andrew Morton wrote:
> On Wed, 14 May 2008 02:01:37 -0700 ebiederm@xmission.com (Eric W. Biederman) wrote:
>
>> Andrew Morton <akpm@linux-foundation.org> writes:
>>
>>>> diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
>>>> index 7858eac..6e61033 100644
>>>> --- a/include/linux/sysfs.h
>>>> +++ b/include/linux/sysfs.h
>>>> @@ -20,6 +20,8 @@
>>>> struct kobject;
>>>> struct module;
>>>>
>>>> +extern int kobject_set_name(struct kobject *kobj, const char *name, ...)
>>>> + __attribute__((format(printf, 2, 3)));
>>> Duplicating the kobject_set_name() declaration in sysfs.h is rather a hack.
>> A bit. It really isn't worse then a struct declaration like struct kobject;
>>
>>> It'd be better to move it into a new header file, included by both
>>> sysfs.h and kobject.h. Perhaps there are other declarations which can
>>> be moved with it.
>> My gut feel says that sysfs.h should include kobject.h instead of the
>> other way around.
This is also my feeling.
To do this, we need to move "struct attributes" related definitions to
kboject.h, and fix some .c files that don't include sysfs.h as they
should (sysfs.h was included indirectly via kojbect.h in these files: eg
kernel/sched.c, fs/partitions/check.c).
>>
>> However it gets reorganized, it is an entirely separate problem
>> from the one this patch sets out to solve and so should go in
>> a different patch.
>>
>
> umm, well, actually, it's a problem which your patch introduces, by adding a
> new dependency.
>
> uninlining sysfs_rename_dir() would be a sensible solution.
It is inlined only when CONFIG_SYSFS=n. When sysfs is enabled
sysfs_rename_dir() is compiled from fs/sysfs/dir.c
Uninlining it will require us to find an appropriate .c file to put it
in: we can't put it in fs/sysfs/dir.c. It is not built if CONFIG_SYSFS
is disabled.
-Benjamin
--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D
http://www.bull.com
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH] Fix kobject_rename and !CONFIG_SYSFS v4
2008-05-14 9:51 ` Benjamin Thery
@ 2008-05-14 9:56 ` Andrew Morton
0 siblings, 0 replies; 75+ messages in thread
From: Andrew Morton @ 2008-05-14 9:56 UTC (permalink / raw)
To: Benjamin Thery
Cc: Eric W. Biederman, Randy Dunlap, Greg KH, Al Viro, Greg KH,
linux-kernel, netdev, Daniel Lezcano, Pavel Emelyanov,
Serge E. Hallyn, Tejun Heo
On Wed, 14 May 2008 11:51:17 +0200 Benjamin Thery <benjamin.thery@bull.net> wrote:
> > uninlining sysfs_rename_dir() would be a sensible solution.
>
> It is inlined only when CONFIG_SYSFS=n. When sysfs is enabled
> sysfs_rename_dir() is compiled from fs/sysfs/dir.c
>
> Uninlining it will require us to find an appropriate .c file to put it
> in: we can't put it in fs/sysfs/dir.c. It is not built if CONFIG_SYSFS
> is disabled.
Well yes, that's the problem. I stuck it in lib/kobject.c but ugh.
I guess we could make it a macro and quickly run away.
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-13 16:44 ` Greg KH
2008-05-13 17:55 ` [PATCH] Fix kobject_rename and !CONFIG_SYSFS v2 Eric W. Biederman
@ 2008-05-13 19:33 ` Benjamin Thery
2008-05-13 20:42 ` Eric W. Biederman
1 sibling, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-05-13 19:33 UTC (permalink / raw)
To: Greg KH
Cc: Eric W. Biederman, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
Quoting Greg KH <greg@kroah.com>:
> On Tue, May 13, 2008 at 04:25:01PM +0200, Benjamin Thery wrote:
>> Eric W. Biederman wrote:
>>> Greg KH <greg@kroah.com> writes:
>>>> Eric, Randy Dunlap has found that this patch breaks the build when
>>>> CONFIG_SYSFS is not enabled. Can you please fix it up before I send it
>>>> to Linus?
>>>>
>>>> The exact error is:
>>>> In file included from
>>>> /local/linsrc/next-20080509/include/linux/kobject.h:21,
>>>> from
>>>> /local/linsrc/next-20080509/include/linux/module.h:16,
>>>> from
>>>> /local/linsrc/next-20080509/include/linux/crypto.h:21,
>>>> from
>>>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets_64.c:7,
>>>> from
>>>> /local/linsrc/next-20080509/arch/x86/kernel/asm-offsets.c:4:
>>>> /local/linsrc/next-20080509/include/linux/sysfs.h: In function
>>>> 'sysfs_rename_dir':
>>>> /local/linsrc/next-20080509/include/linux/sysfs.h:142: error: implicit
>>>> declaration of function 'kobject_set_name'
>>> I will take a look in the morning and see if I can see what is wrong.
>>> Which tree was this error against? I thought I tested this case,
>>> and I'm wondering if there might be another patch that is hiding
>>> kobject_set_name.
>>
>> Argh, headers "cross-dependencies":
>>
>> * linux/kobject.h includes linux/sysfs.h before defining
>> kobject_set_name()
>>
>> * linux/sysfs.h needs to include linux/kobject.h to find
>> kobject_set_name() definition (for inlined sysfs_rename_dir() when
>> CONFIG_SYSFS=n)
>>
>>
>> sysfs_rename_dir() is only called by kobject.c, kobject_rename().
>> I guess this kind of patch is not acceptable to fix the depency?
>
> Ick, no. I'd rather add a kobject_set_name() function prototype to
> sysfs.h in this case, that should remove the error, right?
That's what I did first. But I thought it was worse than the above
solution.
Anyway, adding kobject_set_name() to sysfs.h did fix the error.
-Benjamin
>
> thanks,
>
> greg k-h
>
>
----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: kobject: Fix kobject_rename and !CONFIG_SYSFS
2008-05-13 19:33 ` kobject: Fix kobject_rename and !CONFIG_SYSFS Benjamin Thery
@ 2008-05-13 20:42 ` Eric W. Biederman
0 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-05-13 20:42 UTC (permalink / raw)
To: Benjamin Thery
Cc: Greg KH, Eric W. Biederman, Randy Dunlap, Greg KH, Andrew Morton,
linux-kernel, Tejun Heo, Al Viro, Daniel Lezcano,
Serge E. Hallyn, Pavel Emelyanov, netdev
"Benjamin Thery " <Benjamin.Thery@bull.net> writes:
> That's what I did first. But I thought it was worse than the above
> solution.
> Anyway, adding kobject_set_name() to sysfs.h did fix the error.
I figure sorting out the headers is worth while, but that is
another project.
Because kobject.h includes sysfs.h we can't get the declarations
out of sync which is the really important part.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 11/11] sysfs: user namespaces: add ns to user_struct
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (9 preceding siblings ...)
2008-05-06 17:32 ` [PATCH 10/11] avoid kobject name conflict with different namespaces Benjamin Thery
@ 2008-05-06 17:32 ` Benjamin Thery
2008-05-06 19:03 ` Serge E. Hallyn
2008-05-06 17:53 ` [RESEND][PATCH 00/11] sysfs tagged directories Greg KH
11 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 17:32 UTC (permalink / raw)
To: linux-kernel
Cc: Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev,
Benjamin Thery
Add the user_namespace to user_struct.
Add a user_namespace to the sysfs_tag struct.
Mark the /sys/kernel/uids directory to be tagged so that processes in
different user namespaces can remount /sys and see their own uid
listings.
Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
unusable, because when you
clone(CLONE_NEWUSER)
it will auto-create the root userid and try to create
/sys/kernel/uids/0. Since that already exists from the parent user
namespace, the create fails, and the clone misleadingly ends up
returning -ENOMEM.
This patch fixes the issue by allowing each user namespace to remount
/sys, and having /sys filter the /sys/kernel/uid/ entries by user
namespace.
Signed-off-by: Serge Hallyn <serue@us.ibm.com>
---
fs/dquot.c | 2 -
fs/ioprio.c | 2 -
fs/sysfs/mount.c | 25 +++++++++++++++++++++
include/linux/sched.h | 3 +-
include/linux/sysfs.h | 9 +++++++
include/linux/types.h | 5 ++++
include/linux/user_namespace.h | 5 ++++
kernel/user.c | 48 ++++++++++++++++++++++++++++++++++-------
kernel/user_namespace.c | 3 +-
security/keys/process_keys.c | 14 +++++------
10 files changed, 97 insertions(+), 19 deletions(-)
Index: linux-vanilla/fs/dquot.c
===================================================================
--- linux-vanilla.orig/fs/dquot.c
+++ linux-vanilla/fs/dquot.c
@@ -968,7 +968,7 @@ static void send_warning(const struct dq
MINOR(dquot->dq_sb->s_dev));
if (ret)
goto attr_err_out;
- ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
+ ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid.uid);
if (ret)
goto attr_err_out;
genlmsg_end(skb, msg_head);
Index: linux-vanilla/fs/ioprio.c
===================================================================
--- linux-vanilla.orig/fs/ioprio.c
+++ linux-vanilla/fs/ioprio.c
@@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which
break;
do_each_thread(g, p) {
- if (p->uid != user->uid)
+ if (!task_user_equiv(p, user))
continue;
tmpio = get_task_ioprio(p);
if (tmpio < 0)
Index: linux-vanilla/fs/sysfs/mount.c
===================================================================
--- linux-vanilla.orig/fs/sysfs/mount.c
+++ linux-vanilla/fs/sysfs/mount.c
@@ -17,6 +17,7 @@
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/nsproxy.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include "sysfs.h"
@@ -81,6 +82,7 @@ static int sysfs_fill_super(struct super
sb->s_root = root;
sb->s_fs_info = info;
info->tag.net_ns = hold_net(current->nsproxy->net_ns);
+ info->tag.user_ns = current->nsproxy->user_ns;
return 0;
out_err:
@@ -100,6 +102,8 @@ static int sysfs_test_super(struct super
if (task->nsproxy->net_ns != info->tag.net_ns)
found = 0;
+ if (task->nsproxy->user_ns != info->tag.user_ns)
+ found = 0;
return found;
}
@@ -214,6 +218,27 @@ static struct pernet_operations sysfs_ne
};
#endif
+#ifdef CONFIG_USER_NS
+void sysfs_userns_exit(struct user_namespace *user_ns)
+{
+ /* Allow the net namespace to go away while sysfs is still mounted. */
+ struct super_block *sb;
+ printk(KERN_NOTICE "sysfs: user namespace exiting\n");
+ mutex_lock(&sysfs_rename_mutex);
+ sysfs_grab_supers();
+ mutex_lock(&sysfs_mutex);
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ struct sysfs_super_info *info = sysfs_info(sb);
+ if (info->tag.user_ns != user_ns)
+ continue;
+ info->tag.user_ns = NULL;
+ }
+ mutex_unlock(&sysfs_mutex);
+ sysfs_release_supers();
+ mutex_unlock(&sysfs_rename_mutex);
+}
+#endif
+
int __init sysfs_init(void)
{
int err = -ENOMEM;
Index: linux-vanilla/include/linux/sched.h
===================================================================
--- linux-vanilla.orig/include/linux/sched.h
+++ linux-vanilla/include/linux/sched.h
@@ -595,7 +595,7 @@ struct user_struct {
/* Hash table maintenance information */
struct hlist_node uidhash_node;
- uid_t uid;
+ struct k_uid_t uid;
#ifdef CONFIG_USER_SCHED
struct task_group *tg;
@@ -1697,6 +1697,7 @@ static inline struct user_struct *get_ui
extern void free_uid(struct user_struct *);
extern void switch_uid(struct user_struct *);
extern void release_uids(struct user_namespace *ns);
+extern int task_user_equiv(struct task_struct *tsk, struct user_struct *u);
#include <asm/current.h>
Index: linux-vanilla/include/linux/sysfs.h
===================================================================
--- linux-vanilla.orig/include/linux/sysfs.h
+++ linux-vanilla/include/linux/sysfs.h
@@ -20,6 +20,7 @@
struct kobject;
struct module;
struct net;
+struct user_namespace;
/* FIXME
* The *owner field is no longer used, but leave around
@@ -81,6 +82,7 @@ struct sysfs_ops {
struct sysfs_tag_info {
struct net *net_ns;
+ struct user_namespace *user_ns;
};
struct sysfs_tagged_dir_operations {
@@ -139,6 +141,9 @@ int sysfs_enable_tagging(struct kobject
extern int __must_check sysfs_init(void);
+struct user_namespace;
+void sysfs_userns_exit(struct user_namespace *user_ns);
+
#else /* CONFIG_SYSFS */
static inline int sysfs_schedule_callback(struct kobject *kobj,
@@ -264,6 +269,10 @@ static inline int __must_check sysfs_ini
return 0;
}
+static inline void sysfs_userns_exit(struct user_namespace *user_ns)
+{
+}
+
#endif /* CONFIG_SYSFS */
#endif /* _SYSFS_H_ */
Index: linux-vanilla/include/linux/types.h
===================================================================
--- linux-vanilla.orig/include/linux/types.h
+++ linux-vanilla/include/linux/types.h
@@ -37,6 +37,11 @@ typedef __kernel_gid32_t gid_t;
typedef __kernel_uid16_t uid16_t;
typedef __kernel_gid16_t gid16_t;
+struct k_uid_t {
+ uid_t uid;
+ struct user_namespace *ns;
+};
+
typedef unsigned long uintptr_t;
#ifdef CONFIG_UID16
Index: linux-vanilla/include/linux/user_namespace.h
===================================================================
--- linux-vanilla.orig/include/linux/user_namespace.h
+++ linux-vanilla/include/linux/user_namespace.h
@@ -12,10 +12,15 @@
struct user_namespace {
struct kref kref;
struct hlist_head uidhash_table[UIDHASH_SZ];
+ struct kset *kset;
struct user_struct *root_user;
};
extern struct user_namespace init_user_ns;
+extern int register_user_ns_kobj(struct user_namespace *ns);
+extern void unregister_user_ns_kobj(struct user_namespace *ns);
+extern int register_user_ns_kobj(struct user_namespace *ns);
+extern void unregister_user_ns_kobj(struct user_namespace *ns);
#ifdef CONFIG_USER_NS
Index: linux-vanilla/kernel/user.c
===================================================================
--- linux-vanilla.orig/kernel/user.c
+++ linux-vanilla/kernel/user.c
@@ -53,6 +53,10 @@ struct user_struct root_user = {
.files = ATOMIC_INIT(0),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
+ .uid = {
+ .uid = 0,
+ .ns = &init_user_ns,
+ },
#ifdef CONFIG_USER_SCHED
.tg = &init_task_group,
#endif
@@ -71,13 +75,23 @@ static void uid_hash_remove(struct user_
hlist_del_init(&up->uidhash_node);
}
-static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
+int task_user_equiv(struct task_struct *tsk, struct user_struct *u)
+{
+ if (tsk->uid != u->uid.uid)
+ return 0;
+ if (tsk->nsproxy->user_ns != u->uid.ns)
+ return 0;
+ return 1;
+}
+
+static struct user_struct *uid_hash_find(uid_t uid,
+ struct hlist_head *hashent)
{
struct user_struct *user;
struct hlist_node *h;
hlist_for_each_entry(user, h, hashent, uidhash_node) {
- if (user->uid == uid) {
+ if (user->uid.uid == uid) {
atomic_inc(&user->__count);
return user;
}
@@ -236,6 +250,23 @@ static void uids_release(struct kobject
return;
}
+static const void *userns_sb_tag(struct sysfs_tag_info *info)
+{
+ return info->user_ns;
+}
+
+static const void *userns_kobject_tag(struct kobject *kobj)
+{
+ struct user_struct *up;
+ up = container_of(kobj, struct user_struct, kobj);
+ return up->uid.ns;
+}
+
+static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
+ .sb_tag = userns_sb_tag,
+ .kobject_tag = userns_kobject_tag,
+};
+
static struct kobj_type uids_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_attrs = uids_attributes,
@@ -246,19 +277,19 @@ static struct kobj_type uids_ktype = {
static int uids_user_create(struct user_struct *up)
{
struct kobject *kobj = &up->kobj;
- int error;
+ int err;
memset(kobj, 0, sizeof(struct kobject));
kobj->kset = uids_kset;
- error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
- if (error) {
+ err = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid.uid);
+ if (err) {
kobject_put(kobj);
goto done;
}
kobject_uevent(kobj, KOBJ_ADD);
done:
- return error;
+ return err;
}
/* create these entries in sysfs:
@@ -271,7 +302,7 @@ int __init uids_sysfs_init(void)
uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
if (!uids_kset)
return -ENOMEM;
-
+ sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
return uids_user_create(&root_user);
}
@@ -403,7 +434,8 @@ struct user_struct *alloc_uid(struct use
if (!new)
goto out_unlock;
- new->uid = uid;
+ new->uid.uid = uid;
+ new->uid.ns = ns;
atomic_set(&new->__count, 1);
if (sched_create_user(new) < 0)
Index: linux-vanilla/kernel/user_namespace.c
===================================================================
--- linux-vanilla.orig/kernel/user_namespace.c
+++ linux-vanilla/kernel/user_namespace.c
@@ -22,7 +22,7 @@ static struct user_namespace *clone_user
struct user_struct *new_user;
int n;
- ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
+ ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
if (!ns)
return ERR_PTR(-ENOMEM);
@@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
struct user_namespace *ns;
ns = container_of(kref, struct user_namespace, kref);
+ sysfs_userns_exit(ns);
release_uids(ns);
kfree(ns);
}
Index: linux-vanilla/security/keys/process_keys.c
===================================================================
--- linux-vanilla.orig/security/keys/process_keys.c
+++ linux-vanilla/security/keys/process_keys.c
@@ -47,7 +47,7 @@ static int install_user_keyrings(struct
char buf[20];
int ret;
- kenter("%p{%u}", user, user->uid);
+ kenter("%p{%u}", user, user->uid.uid);
if (user->uid_keyring) {
kleave(" = 0 [exist]");
@@ -62,13 +62,13 @@ static int install_user_keyrings(struct
* - there may be one in existence already as it may have been
* pinned by a session, but the user_struct pointing to it
* may have been destroyed by setuid */
- sprintf(buf, "_uid.%u", user->uid);
+ sprintf(buf, "_uid.%u", user->uid.uid);
uid_keyring = find_keyring_by_name(buf, true);
if (IS_ERR(uid_keyring)) {
- uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
- tsk, KEY_ALLOC_IN_QUOTA,
- NULL);
+ uid_keyring = keyring_alloc(buf, user->uid.uid,
+ (gid_t) -1, tsk,
+ KEY_ALLOC_IN_QUOTA, NULL);
if (IS_ERR(uid_keyring)) {
ret = PTR_ERR(uid_keyring);
goto error;
@@ -77,12 +77,12 @@ static int install_user_keyrings(struct
/* get a default session keyring (which might also exist
* already) */
- sprintf(buf, "_uid_ses.%u", user->uid);
+ sprintf(buf, "_uid_ses.%u", user->uid.uid);
session_keyring = find_keyring_by_name(buf, true);
if (IS_ERR(session_keyring)) {
session_keyring =
- keyring_alloc(buf, user->uid, (gid_t) -1,
+ keyring_alloc(buf, user->uid.uid, (gid_t) -1,
tsk, KEY_ALLOC_IN_QUOTA, NULL);
if (IS_ERR(session_keyring)) {
ret = PTR_ERR(session_keyring);
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 11/11] sysfs: user namespaces: add ns to user_struct
2008-05-06 17:32 ` [PATCH 11/11] sysfs: user namespaces: add ns to user_struct Benjamin Thery
@ 2008-05-06 19:03 ` Serge E. Hallyn
0 siblings, 0 replies; 75+ messages in thread
From: Serge E. Hallyn @ 2008-05-06 19:03 UTC (permalink / raw)
To: Benjamin Thery
Cc: linux-kernel, Eric W. Biederman, Tejun Heo, Greg Kroah-Hartman,
Al Viro, Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov,
netdev
Quoting Benjamin Thery (benjamin.thery@bull.net):
> Add the user_namespace to user_struct.
>
> Add a user_namespace to the sysfs_tag struct.
>
> Mark the /sys/kernel/uids directory to be tagged so that processes in
> different user namespaces can remount /sys and see their own uid
> listings.
>
> Without this patch, having CONFIG_FAIR_SCHED=y makes user namespaces
> unusable, because when you
> clone(CLONE_NEWUSER)
> it will auto-create the root userid and try to create
> /sys/kernel/uids/0. Since that already exists from the parent user
> namespace, the create fails, and the clone misleadingly ends up
> returning -ENOMEM.
>
> This patch fixes the issue by allowing each user namespace to remount
> /sys, and having /sys filter the /sys/kernel/uid/ entries by user
> namespace.
>
> Signed-off-by: Serge Hallyn <serue@us.ibm.com>
Benjamin,
thanks for reposting. Please do drop this patch from the set, though.
A slimmer version of it really is needed asap, but this version as
Eric had pointed out has some holdouts from my previous approach, which
I need to drop.
thanks,
-serge
> ---
> fs/dquot.c | 2 -
> fs/ioprio.c | 2 -
> fs/sysfs/mount.c | 25 +++++++++++++++++++++
> include/linux/sched.h | 3 +-
> include/linux/sysfs.h | 9 +++++++
> include/linux/types.h | 5 ++++
> include/linux/user_namespace.h | 5 ++++
> kernel/user.c | 48 ++++++++++++++++++++++++++++++++++-------
> kernel/user_namespace.c | 3 +-
> security/keys/process_keys.c | 14 +++++------
> 10 files changed, 97 insertions(+), 19 deletions(-)
>
> Index: linux-vanilla/fs/dquot.c
> ===================================================================
> --- linux-vanilla.orig/fs/dquot.c
> +++ linux-vanilla/fs/dquot.c
> @@ -968,7 +968,7 @@ static void send_warning(const struct dq
> MINOR(dquot->dq_sb->s_dev));
> if (ret)
> goto attr_err_out;
> - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid);
> + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid.uid);
> if (ret)
> goto attr_err_out;
> genlmsg_end(skb, msg_head);
> Index: linux-vanilla/fs/ioprio.c
> ===================================================================
> --- linux-vanilla.orig/fs/ioprio.c
> +++ linux-vanilla/fs/ioprio.c
> @@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which
> break;
>
> do_each_thread(g, p) {
> - if (p->uid != user->uid)
> + if (!task_user_equiv(p, user))
> continue;
> tmpio = get_task_ioprio(p);
> if (tmpio < 0)
> Index: linux-vanilla/fs/sysfs/mount.c
> ===================================================================
> --- linux-vanilla.orig/fs/sysfs/mount.c
> +++ linux-vanilla/fs/sysfs/mount.c
> @@ -17,6 +17,7 @@
> #include <linux/pagemap.h>
> #include <linux/init.h>
> #include <linux/nsproxy.h>
> +#include <linux/user_namespace.h>
> #include <net/net_namespace.h>
>
> #include "sysfs.h"
> @@ -81,6 +82,7 @@ static int sysfs_fill_super(struct super
> sb->s_root = root;
> sb->s_fs_info = info;
> info->tag.net_ns = hold_net(current->nsproxy->net_ns);
> + info->tag.user_ns = current->nsproxy->user_ns;
> return 0;
>
> out_err:
> @@ -100,6 +102,8 @@ static int sysfs_test_super(struct super
>
> if (task->nsproxy->net_ns != info->tag.net_ns)
> found = 0;
> + if (task->nsproxy->user_ns != info->tag.user_ns)
> + found = 0;
>
> return found;
> }
> @@ -214,6 +218,27 @@ static struct pernet_operations sysfs_ne
> };
> #endif
>
> +#ifdef CONFIG_USER_NS
> +void sysfs_userns_exit(struct user_namespace *user_ns)
> +{
> + /* Allow the net namespace to go away while sysfs is still mounted. */
> + struct super_block *sb;
> + printk(KERN_NOTICE "sysfs: user namespace exiting\n");
> + mutex_lock(&sysfs_rename_mutex);
> + sysfs_grab_supers();
> + mutex_lock(&sysfs_mutex);
> + list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
> + struct sysfs_super_info *info = sysfs_info(sb);
> + if (info->tag.user_ns != user_ns)
> + continue;
> + info->tag.user_ns = NULL;
> + }
> + mutex_unlock(&sysfs_mutex);
> + sysfs_release_supers();
> + mutex_unlock(&sysfs_rename_mutex);
> +}
> +#endif
> +
> int __init sysfs_init(void)
> {
> int err = -ENOMEM;
> Index: linux-vanilla/include/linux/sched.h
> ===================================================================
> --- linux-vanilla.orig/include/linux/sched.h
> +++ linux-vanilla/include/linux/sched.h
> @@ -595,7 +595,7 @@ struct user_struct {
>
> /* Hash table maintenance information */
> struct hlist_node uidhash_node;
> - uid_t uid;
> + struct k_uid_t uid;
>
> #ifdef CONFIG_USER_SCHED
> struct task_group *tg;
> @@ -1697,6 +1697,7 @@ static inline struct user_struct *get_ui
> extern void free_uid(struct user_struct *);
> extern void switch_uid(struct user_struct *);
> extern void release_uids(struct user_namespace *ns);
> +extern int task_user_equiv(struct task_struct *tsk, struct user_struct *u);
>
> #include <asm/current.h>
>
> Index: linux-vanilla/include/linux/sysfs.h
> ===================================================================
> --- linux-vanilla.orig/include/linux/sysfs.h
> +++ linux-vanilla/include/linux/sysfs.h
> @@ -20,6 +20,7 @@
> struct kobject;
> struct module;
> struct net;
> +struct user_namespace;
>
> /* FIXME
> * The *owner field is no longer used, but leave around
> @@ -81,6 +82,7 @@ struct sysfs_ops {
>
> struct sysfs_tag_info {
> struct net *net_ns;
> + struct user_namespace *user_ns;
> };
>
> struct sysfs_tagged_dir_operations {
> @@ -139,6 +141,9 @@ int sysfs_enable_tagging(struct kobject
>
> extern int __must_check sysfs_init(void);
>
> +struct user_namespace;
> +void sysfs_userns_exit(struct user_namespace *user_ns);
> +
> #else /* CONFIG_SYSFS */
>
> static inline int sysfs_schedule_callback(struct kobject *kobj,
> @@ -264,6 +269,10 @@ static inline int __must_check sysfs_ini
> return 0;
> }
>
> +static inline void sysfs_userns_exit(struct user_namespace *user_ns)
> +{
> +}
> +
> #endif /* CONFIG_SYSFS */
>
> #endif /* _SYSFS_H_ */
> Index: linux-vanilla/include/linux/types.h
> ===================================================================
> --- linux-vanilla.orig/include/linux/types.h
> +++ linux-vanilla/include/linux/types.h
> @@ -37,6 +37,11 @@ typedef __kernel_gid32_t gid_t;
> typedef __kernel_uid16_t uid16_t;
> typedef __kernel_gid16_t gid16_t;
>
> +struct k_uid_t {
> + uid_t uid;
> + struct user_namespace *ns;
> +};
> +
> typedef unsigned long uintptr_t;
>
> #ifdef CONFIG_UID16
> Index: linux-vanilla/include/linux/user_namespace.h
> ===================================================================
> --- linux-vanilla.orig/include/linux/user_namespace.h
> +++ linux-vanilla/include/linux/user_namespace.h
> @@ -12,10 +12,15 @@
> struct user_namespace {
> struct kref kref;
> struct hlist_head uidhash_table[UIDHASH_SZ];
> + struct kset *kset;
> struct user_struct *root_user;
> };
>
> extern struct user_namespace init_user_ns;
> +extern int register_user_ns_kobj(struct user_namespace *ns);
> +extern void unregister_user_ns_kobj(struct user_namespace *ns);
> +extern int register_user_ns_kobj(struct user_namespace *ns);
> +extern void unregister_user_ns_kobj(struct user_namespace *ns);
>
> #ifdef CONFIG_USER_NS
>
> Index: linux-vanilla/kernel/user.c
> ===================================================================
> --- linux-vanilla.orig/kernel/user.c
> +++ linux-vanilla/kernel/user.c
> @@ -53,6 +53,10 @@ struct user_struct root_user = {
> .files = ATOMIC_INIT(0),
> .sigpending = ATOMIC_INIT(0),
> .locked_shm = 0,
> + .uid = {
> + .uid = 0,
> + .ns = &init_user_ns,
> + },
> #ifdef CONFIG_USER_SCHED
> .tg = &init_task_group,
> #endif
> @@ -71,13 +75,23 @@ static void uid_hash_remove(struct user_
> hlist_del_init(&up->uidhash_node);
> }
>
> -static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
> +int task_user_equiv(struct task_struct *tsk, struct user_struct *u)
> +{
> + if (tsk->uid != u->uid.uid)
> + return 0;
> + if (tsk->nsproxy->user_ns != u->uid.ns)
> + return 0;
> + return 1;
> +}
> +
> +static struct user_struct *uid_hash_find(uid_t uid,
> + struct hlist_head *hashent)
> {
> struct user_struct *user;
> struct hlist_node *h;
>
> hlist_for_each_entry(user, h, hashent, uidhash_node) {
> - if (user->uid == uid) {
> + if (user->uid.uid == uid) {
> atomic_inc(&user->__count);
> return user;
> }
> @@ -236,6 +250,23 @@ static void uids_release(struct kobject
> return;
> }
>
> +static const void *userns_sb_tag(struct sysfs_tag_info *info)
> +{
> + return info->user_ns;
> +}
> +
> +static const void *userns_kobject_tag(struct kobject *kobj)
> +{
> + struct user_struct *up;
> + up = container_of(kobj, struct user_struct, kobj);
> + return up->uid.ns;
> +}
> +
> +static struct sysfs_tagged_dir_operations userns_tagged_dir_operations = {
> + .sb_tag = userns_sb_tag,
> + .kobject_tag = userns_kobject_tag,
> +};
> +
> static struct kobj_type uids_ktype = {
> .sysfs_ops = &kobj_sysfs_ops,
> .default_attrs = uids_attributes,
> @@ -246,19 +277,19 @@ static struct kobj_type uids_ktype = {
> static int uids_user_create(struct user_struct *up)
> {
> struct kobject *kobj = &up->kobj;
> - int error;
> + int err;
>
> memset(kobj, 0, sizeof(struct kobject));
> kobj->kset = uids_kset;
> - error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
> - if (error) {
> + err = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid.uid);
> + if (err) {
> kobject_put(kobj);
> goto done;
> }
>
> kobject_uevent(kobj, KOBJ_ADD);
> done:
> - return error;
> + return err;
> }
>
> /* create these entries in sysfs:
> @@ -271,7 +302,7 @@ int __init uids_sysfs_init(void)
> uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
> if (!uids_kset)
> return -ENOMEM;
> -
> + sysfs_enable_tagging(&uids_kset->kobj, &userns_tagged_dir_operations);
> return uids_user_create(&root_user);
> }
>
> @@ -403,7 +434,8 @@ struct user_struct *alloc_uid(struct use
> if (!new)
> goto out_unlock;
>
> - new->uid = uid;
> + new->uid.uid = uid;
> + new->uid.ns = ns;
> atomic_set(&new->__count, 1);
>
> if (sched_create_user(new) < 0)
> Index: linux-vanilla/kernel/user_namespace.c
> ===================================================================
> --- linux-vanilla.orig/kernel/user_namespace.c
> +++ linux-vanilla/kernel/user_namespace.c
> @@ -22,7 +22,7 @@ static struct user_namespace *clone_user
> struct user_struct *new_user;
> int n;
>
> - ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
> + ns = kzalloc(sizeof(struct user_namespace), GFP_KERNEL);
> if (!ns)
> return ERR_PTR(-ENOMEM);
>
> @@ -71,6 +71,7 @@ void free_user_ns(struct kref *kref)
> struct user_namespace *ns;
>
> ns = container_of(kref, struct user_namespace, kref);
> + sysfs_userns_exit(ns);
> release_uids(ns);
> kfree(ns);
> }
> Index: linux-vanilla/security/keys/process_keys.c
> ===================================================================
> --- linux-vanilla.orig/security/keys/process_keys.c
> +++ linux-vanilla/security/keys/process_keys.c
> @@ -47,7 +47,7 @@ static int install_user_keyrings(struct
> char buf[20];
> int ret;
>
> - kenter("%p{%u}", user, user->uid);
> + kenter("%p{%u}", user, user->uid.uid);
>
> if (user->uid_keyring) {
> kleave(" = 0 [exist]");
> @@ -62,13 +62,13 @@ static int install_user_keyrings(struct
> * - there may be one in existence already as it may have been
> * pinned by a session, but the user_struct pointing to it
> * may have been destroyed by setuid */
> - sprintf(buf, "_uid.%u", user->uid);
> + sprintf(buf, "_uid.%u", user->uid.uid);
>
> uid_keyring = find_keyring_by_name(buf, true);
> if (IS_ERR(uid_keyring)) {
> - uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1,
> - tsk, KEY_ALLOC_IN_QUOTA,
> - NULL);
> + uid_keyring = keyring_alloc(buf, user->uid.uid,
> + (gid_t) -1, tsk,
> + KEY_ALLOC_IN_QUOTA, NULL);
> if (IS_ERR(uid_keyring)) {
> ret = PTR_ERR(uid_keyring);
> goto error;
> @@ -77,12 +77,12 @@ static int install_user_keyrings(struct
>
> /* get a default session keyring (which might also exist
> * already) */
> - sprintf(buf, "_uid_ses.%u", user->uid);
> + sprintf(buf, "_uid_ses.%u", user->uid.uid);
>
> session_keyring = find_keyring_by_name(buf, true);
> if (IS_ERR(session_keyring)) {
> session_keyring =
> - keyring_alloc(buf, user->uid, (gid_t) -1,
> + keyring_alloc(buf, user->uid.uid, (gid_t) -1,
> tsk, KEY_ALLOC_IN_QUOTA, NULL);
> if (IS_ERR(session_keyring)) {
> ret = PTR_ERR(session_keyring);
>
> --
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [RESEND][PATCH 00/11] sysfs tagged directories
2008-05-06 17:30 [RESEND][PATCH 00/11] sysfs tagged directories Benjamin Thery
` (10 preceding siblings ...)
2008-05-06 17:32 ` [PATCH 11/11] sysfs: user namespaces: add ns to user_struct Benjamin Thery
@ 2008-05-06 17:53 ` Greg KH
2008-05-06 18:41 ` Benjamin Thery
2008-05-07 13:19 ` Daniel Lezcano
11 siblings, 2 replies; 75+ messages in thread
From: Greg KH @ 2008-05-06 17:53 UTC (permalink / raw)
To: Benjamin Thery
Cc: linux-kernel, Eric W. Biederman, Tejun Heo, Al Viro,
Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov, netdev
On Tue, May 06, 2008 at 07:30:30PM +0200, Benjamin Thery wrote:
> This is still the same port of Eric Biederman's patchset to implement
> tagged directories in sysfs that was discussed a few days ago.
>
> This time it applies on top of 2.6.26-rc1,
> which includes a fix from Daniel Lezcano to fix net device renaming
> for sysfs (and of course all the network namespaces stuff that was
> committed in net-2.6.26).
>
> This patchset still contains the patch from Serge Hallyn that implements
> tagging for user namespaces.
>
> It also contains a patch from Daniel Lezcano which allows to have net
> devices with the same name in two different network namespaces.
Does this all work properly with both CONFIG_SYSFS_DEPRECATED enabled
and disabled?
Can you show some outputs of 'tree' in both cases on the /sys/class/net/
directory?
thanks,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [RESEND][PATCH 00/11] sysfs tagged directories
2008-05-06 17:53 ` [RESEND][PATCH 00/11] sysfs tagged directories Greg KH
@ 2008-05-06 18:41 ` Benjamin Thery
2008-05-07 13:19 ` Daniel Lezcano
1 sibling, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-06 18:41 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Eric W. Biederman, Tejun Heo,
Al Viro, Daniel Lezcano, Serge E. Hallyn, Pavel Emelyanov,
netdev
On Tue, May 6, 2008 at 7:53 PM, Greg KH <gregkh@suse.de> wrote:
> On Tue, May 06, 2008 at 07:30:30PM +0200, Benjamin Thery wrote:
> > This is still the same port of Eric Biederman's patchset to implement
> > tagged directories in sysfs that was discussed a few days ago.
> >
> > This time it applies on top of 2.6.26-rc1,
> > which includes a fix from Daniel Lezcano to fix net device renaming
> > for sysfs (and of course all the network namespaces stuff that was
> > committed in net-2.6.26).
> >
> > This patchset still contains the patch from Serge Hallyn that implements
> > tagging for user namespaces.
> >
> > It also contains a patch from Daniel Lezcano which allows to have net
> > devices with the same name in two different network namespaces.
>
> Does this all work properly with both CONFIG_SYSFS_DEPRECATED enabled
> and disabled?
Yes.
I tested both cases and with my limited knowledge of sysfs (I'm mostly
interested in /sys/class/net population), this looked fine to me.
>
> Can you show some outputs of 'tree' in both cases on the /sys/class/net/
> directory?
Sure.
I hope you don't mind if I'll send them tomorrow I'm back home right now
and have limited access to my test machines.
-benjamin
>
> thanks,
>
> greg k-h
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [RESEND][PATCH 00/11] sysfs tagged directories
2008-05-06 17:53 ` [RESEND][PATCH 00/11] sysfs tagged directories Greg KH
2008-05-06 18:41 ` Benjamin Thery
@ 2008-05-07 13:19 ` Daniel Lezcano
2008-05-07 13:47 ` Benjamin Thery
2008-05-14 15:07 ` Benjamin Thery
1 sibling, 2 replies; 75+ messages in thread
From: Daniel Lezcano @ 2008-05-07 13:19 UTC (permalink / raw)
To: Greg KH
Cc: Benjamin Thery, linux-kernel, Eric W. Biederman, Tejun Heo,
Al Viro, Serge E. Hallyn, Pavel Emelyanov, netdev
Greg KH wrote:
> On Tue, May 06, 2008 at 07:30:30PM +0200, Benjamin Thery wrote:
>> This is still the same port of Eric Biederman's patchset to implement
>> tagged directories in sysfs that was discussed a few days ago.
>>
>> This time it applies on top of 2.6.26-rc1,
>> which includes a fix from Daniel Lezcano to fix net device renaming
>> for sysfs (and of course all the network namespaces stuff that was
>> committed in net-2.6.26).
>>
>> This patchset still contains the patch from Serge Hallyn that implements
>> tagging for user namespaces.
>>
>> It also contains a patch from Daniel Lezcano which allows to have net
>> devices with the same name in two different network namespaces.
>
> Does this all work properly with both CONFIG_SYSFS_DEPRECATED enabled
> and disabled?
>
> Can you show some outputs of 'tree' in both cases on the /sys/class/net/
> directory?
>
> thanks,
>
> greg k-h
Here is the output from an ls -l of /sys/class/net. I tryed with
different configurations and checked the renamed network devices are
propagated to the sysfs.
1) CONFIG_SYSFS_DEPRECATED && CONFIG_SYS && CONFIG_NET_NS
=========================================================
Outside a namespace:
--------------------
qemu:~> ip l
1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
link/ether 1e:43:9e:1f:1f:55 brd ff:ff:ff:ff:ff:ff
4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
16: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 8e:94:e6:72:63:cf brd ff:ff:ff:ff:ff:ff
qemu:~/lxc> ll /sys/class/net/
total 0
drwxr-xr-x 5 root root 0 2008-05-07 15:24 br0
drwxr-xr-x 3 root root 0 2008-05-07 13:16 dummy0
drwxr-xr-x 4 root root 0 2008-05-07 15:24 eth0
drwxr-xr-x 3 root root 0 2008-05-07 13:16 lo
drwxr-xr-x 4 root root 0 2008-05-07 15:26 veth1
Inside the namespace:
----------------------
qemu:~> ip l
14: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
15: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
1000
link/ether 46:85:e5:76:90:05 brd ff:ff:ff:ff:ff:ff
virtnode:~lxc # ll /sys/class/net/
total 0
drwxr-xr-x 3 root root 0 May 7 15:28 lo
drwxr-xr-x 3 root root 0 May 7 15:26 veth2
2) CONFIG_SYSFS && CONFIG_NET_NS
================================
Outside the namespace:
----------------------
qemu:~> ip l
1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
link/ether 72:16:3d:88:11:70 brd ff:ff:ff:ff:ff:ff
4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
13: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether aa:6f:e8:dd:b8:ba brd ff:ff:ff:ff:ff:ff
* Content of /sys/class/net:
qemu:~/lxc> ll /sys/class/net/
total 0
lrwxrwxrwx 1 root root 0 2008-05-07 14:31 br0 ->
../../devices/virtual/net/br0
lrwxrwxrwx 1 root root 0 2008-05-07 14:28 dummy0 ->
../../devices/virtual/net/dummy0
lrwxrwxrwx 1 root root 0 2008-05-07 14:28 eth0 ->
../../devices/pci0000:00/0000:00:03.0/net/eth0
lrwxrwxrwx 1 root root 0 2008-05-07 14:28 lo -> ../../devices/virtual/net/lo
lrwxrwxrwx 1 root root 0 2008-05-07 14:35 veth1 ->
../../devices/virtual/net/veth1
Inside the namespace:
----------------------
qemu:~> ip l
11: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
12: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
1000
link/ether ee:c5:a3:7c:58:bd brd ff:ff:ff:ff:ff:ff
virtnode:~/lxc # ll /sys/class/net/
total 0
lrwxrwxrwx 1 root root 0 May 7 14:40 lo -> ../../devices/virtual/net/lo
lrwxrwxrwx 1 root root 0 May 7 14:40 veth2 ->
../../devices/virtual/net/veth2
3) CONFIG_SYSFS
===============
qemu:~> ip l
1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
link/ether 6e:3a:5f:2f:fa:cb brd ff:ff:ff:ff:ff:ff
4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
qemu:~/lxc> ll /sys/class/net/
total 0
lrwxrwxrwx 1 root root 0 2008-05-07 15:01 br0 ->
../../devices/virtual/net/br0
lrwxrwxrwx 1 root root 0 2008-05-07 14:59 dummy0 ->
../../devices/virtual/net/dummy0
lrwxrwxrwx 1 root root 0 2008-05-07 14:59 eth0 ->
../../devices/pci0000:00/0000:00:03.0/net/eth0
lrwxrwxrwx 1 root root 0 2008-05-07 14:59 lo -> ../../devices/virtual/net/lo
4) CONFIG_SYSFS_DEPRECATED && CONFIG_SYSFS
==========================================
qemu:~> ip l
1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
link/ether 16:85:f0:a8:34:44 brd ff:ff:ff:ff:ff:ff
4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
qemu:~> ll /sys/class/net/
total 0
drwxr-xr-x 5 root root 0 2008-05-07 15:07 br0
drwxr-xr-x 3 root root 0 2008-05-07 15:06 dummy0
drwxr-xr-x 4 root root 0 2008-05-07 15:07 eth0
drwxr-xr-x 3 root root 0 2008-05-07 15:06 lo
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [RESEND][PATCH 00/11] sysfs tagged directories
2008-05-07 13:19 ` Daniel Lezcano
@ 2008-05-07 13:47 ` Benjamin Thery
2008-05-14 15:07 ` Benjamin Thery
1 sibling, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-07 13:47 UTC (permalink / raw)
To: Daniel Lezcano
Cc: Greg KH, Benjamin Thery, linux-kernel, Eric W. Biederman,
Tejun Heo, Al Viro, Serge E. Hallyn, Pavel Emelyanov, netdev
On Wed, May 7, 2008 at 3:19 PM, Daniel Lezcano <dlezcano@fr.ibm.com> wrote:
> Greg KH wrote:
>
> > On Tue, May 06, 2008 at 07:30:30PM +0200, Benjamin Thery wrote:
> >
> > > This is still the same port of Eric Biederman's patchset to implement
> > > tagged directories in sysfs that was discussed a few days ago.
> > >
> > > This time it applies on top of 2.6.26-rc1, which includes a fix from
> Daniel Lezcano to fix net device renaming
> > > for sysfs (and of course all the network namespaces stuff that was
> committed in net-2.6.26).
> > >
> > > This patchset still contains the patch from Serge Hallyn that implements
> > > tagging for user namespaces.
> > >
> > > It also contains a patch from Daniel Lezcano which allows to have net
> devices with the same name in two different network namespaces.
> > >
> >
> > Does this all work properly with both CONFIG_SYSFS_DEPRECATED enabled
> > and disabled?
> >
> > Can you show some outputs of 'tree' in both cases on the /sys/class/net/
> > directory?
> >
> > thanks,
> >
> > greg k-h
> >
>
> Here is the output from an ls -l of /sys/class/net. I tryed with different
> configurations and checked the renamed network devices are propagated to the
> sysfs.
Thanks Daniel.
You covered more cases than I was planning to send :)
Benjamin
>
> 1) CONFIG_SYSFS_DEPRECATED && CONFIG_SYS && CONFIG_NET_NS
> =========================================================
>
> Outside a namespace:
> --------------------
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 1e:43:9e:1f:1f:55 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 16: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether 8e:94:e6:72:63:cf brd ff:ff:ff:ff:ff:ff
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> drwxr-xr-x 5 root root 0 2008-05-07 15:24 br0
> drwxr-xr-x 3 root root 0 2008-05-07 13:16 dummy0
> drwxr-xr-x 4 root root 0 2008-05-07 15:24 eth0
> drwxr-xr-x 3 root root 0 2008-05-07 13:16 lo
> drwxr-xr-x 4 root root 0 2008-05-07 15:26 veth1
>
>
> Inside the namespace:
> ----------------------
>
> qemu:~> ip l
> 14: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 15: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether 46:85:e5:76:90:05 brd ff:ff:ff:ff:ff:ff
>
> virtnode:~lxc # ll /sys/class/net/
> total 0
> drwxr-xr-x 3 root root 0 May 7 15:28 lo
> drwxr-xr-x 3 root root 0 May 7 15:26 veth2
>
> 2) CONFIG_SYSFS && CONFIG_NET_NS
> ================================
>
>
> Outside the namespace:
> ----------------------
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 72:16:3d:88:11:70 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 13: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether aa:6f:e8:dd:b8:ba brd ff:ff:ff:ff:ff:ff
>
> * Content of /sys/class/net:
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:31 br0 ->
> ../../devices/virtual/net/br0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 dummy0 ->
> ../../devices/virtual/net/dummy0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 eth0 ->
> ../../devices/pci0000:00/0000:00:03.0/net/eth0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 lo ->
> ../../devices/virtual/net/lo
> lrwxrwxrwx 1 root root 0 2008-05-07 14:35 veth1 ->
> ../../devices/virtual/net/veth1
>
> Inside the namespace:
> ----------------------
>
> qemu:~> ip l
> 11: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 12: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether ee:c5:a3:7c:58:bd brd ff:ff:ff:ff:ff:ff
>
> virtnode:~/lxc # ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 May 7 14:40 lo -> ../../devices/virtual/net/lo
> lrwxrwxrwx 1 root root 0 May 7 14:40 veth2 ->
> ../../devices/virtual/net/veth2
>
>
> 3) CONFIG_SYSFS
> ===============
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 6e:3a:5f:2f:fa:cb brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 2008-05-07 15:01 br0 ->
> ../../devices/virtual/net/br0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 dummy0 ->
> ../../devices/virtual/net/dummy0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 eth0 ->
> ../../devices/pci0000:00/0000:00:03.0/net/eth0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 lo ->
> ../../devices/virtual/net/lo
>
> 4) CONFIG_SYSFS_DEPRECATED && CONFIG_SYSFS
> ==========================================
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 16:85:f0:a8:34:44 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
>
> qemu:~> ll /sys/class/net/
> total 0
> drwxr-xr-x 5 root root 0 2008-05-07 15:07 br0
> drwxr-xr-x 3 root root 0 2008-05-07 15:06 dummy0
> drwxr-xr-x 4 root root 0 2008-05-07 15:07 eth0
> drwxr-xr-x 3 root root 0 2008-05-07 15:06 lo
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [RESEND][PATCH 00/11] sysfs tagged directories
2008-05-07 13:19 ` Daniel Lezcano
2008-05-07 13:47 ` Benjamin Thery
@ 2008-05-14 15:07 ` Benjamin Thery
1 sibling, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-05-14 15:07 UTC (permalink / raw)
To: Greg KH
Cc: Daniel Lezcano, linux-kernel, Eric W. Biederman, Tejun Heo,
Al Viro, Serge E. Hallyn, Pavel Emelyanov, netdev, Andrew Morton
So, what is the status for this patchset?
So far, we discussed patch 10 a bit and Eric found a better way to fix
the kobject_rename() issue. But there hasn't been much discussion on the
rest of the patchset this time.
Does this mean that there is no strong objection against it now?
Do you think the approach is suitable for what we're aiming to?
Reminder: We need this for network namespaces. Without it network
namespaces testing is still a bit complicated in 2.6.26 (need to disable
sysfs). Also, Serge uses it too for user namespaces.
Should I resend it one more time ported on top of a more recent tree in
the hope it can be merged?
BTW which tree is the best? Linus', Greg's, -mm?
Regards,
Benjamin
Daniel Lezcano wrote:
> Greg KH wrote:
>> On Tue, May 06, 2008 at 07:30:30PM +0200, Benjamin Thery wrote:
>>> This is still the same port of Eric Biederman's patchset to implement
>>> tagged directories in sysfs that was discussed a few days ago.
>>>
>>> This time it applies on top of 2.6.26-rc1, which includes a fix from
>>> Daniel Lezcano to fix net device renaming
>>> for sysfs (and of course all the network namespaces stuff that was
>>> committed in net-2.6.26).
>>>
>>> This patchset still contains the patch from Serge Hallyn that implements
>>> tagging for user namespaces.
>>>
>>> It also contains a patch from Daniel Lezcano which allows to have net
>>> devices with the same name in two different network namespaces.
>>
>> Does this all work properly with both CONFIG_SYSFS_DEPRECATED enabled
>> and disabled?
>>
>> Can you show some outputs of 'tree' in both cases on the /sys/class/net/
>> directory?
>>
>> thanks,
>>
>> greg k-h
>
> Here is the output from an ls -l of /sys/class/net. I tryed with
> different configurations and checked the renamed network devices are
> propagated to the sysfs.
>
> 1) CONFIG_SYSFS_DEPRECATED && CONFIG_SYS && CONFIG_NET_NS
> =========================================================
>
> Outside a namespace:
> --------------------
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 1e:43:9e:1f:1f:55 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 16: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether 8e:94:e6:72:63:cf brd ff:ff:ff:ff:ff:ff
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> drwxr-xr-x 5 root root 0 2008-05-07 15:24 br0
> drwxr-xr-x 3 root root 0 2008-05-07 13:16 dummy0
> drwxr-xr-x 4 root root 0 2008-05-07 15:24 eth0
> drwxr-xr-x 3 root root 0 2008-05-07 13:16 lo
> drwxr-xr-x 4 root root 0 2008-05-07 15:26 veth1
>
>
> Inside the namespace:
> ----------------------
>
> qemu:~> ip l
> 14: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 15: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether 46:85:e5:76:90:05 brd ff:ff:ff:ff:ff:ff
>
> virtnode:~lxc # ll /sys/class/net/
> total 0
> drwxr-xr-x 3 root root 0 May 7 15:28 lo
> drwxr-xr-x 3 root root 0 May 7 15:26 veth2
>
> 2) CONFIG_SYSFS && CONFIG_NET_NS
> ================================
>
>
> Outside the namespace:
> ----------------------
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 72:16:3d:88:11:70 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 13: veth1: <BROADCAST,MULTICAST,PROMISC> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether aa:6f:e8:dd:b8:ba brd ff:ff:ff:ff:ff:ff
>
> * Content of /sys/class/net:
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:31 br0 ->
> ../../devices/virtual/net/br0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 dummy0 ->
> ../../devices/virtual/net/dummy0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 eth0 ->
> ../../devices/pci0000:00/0000:00:03.0/net/eth0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:28 lo ->
> ../../devices/virtual/net/lo
> lrwxrwxrwx 1 root root 0 2008-05-07 14:35 veth1 ->
> ../../devices/virtual/net/veth1
>
> Inside the namespace:
> ----------------------
>
> qemu:~> ip l
> 11: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 12: veth2: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen
> 1000
> link/ether ee:c5:a3:7c:58:bd brd ff:ff:ff:ff:ff:ff
>
> virtnode:~/lxc # ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 May 7 14:40 lo -> ../../devices/virtual/net/lo
> lrwxrwxrwx 1 root root 0 May 7 14:40 veth2 ->
> ../../devices/virtual/net/veth2
>
>
> 3) CONFIG_SYSFS
> ===============
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 6e:3a:5f:2f:fa:cb brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
>
> qemu:~/lxc> ll /sys/class/net/
> total 0
> lrwxrwxrwx 1 root root 0 2008-05-07 15:01 br0 ->
> ../../devices/virtual/net/br0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 dummy0 ->
> ../../devices/virtual/net/dummy0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 eth0 ->
> ../../devices/pci0000:00/0000:00:03.0/net/eth0
> lrwxrwxrwx 1 root root 0 2008-05-07 14:59 lo ->
> ../../devices/virtual/net/lo
>
> 4) CONFIG_SYSFS_DEPRECATED && CONFIG_SYSFS
> ==========================================
>
> qemu:~> ip l
> 1: lo: <LOOPBACK,UP,10000> mtu 16436 qdisc noqueue
> link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
> 2: eth0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc pfifo_fast qlen 1000
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
> 3: dummy0: <BROADCAST,NOARP> mtu 1500 qdisc noop
> link/ether 16:85:f0:a8:34:44 brd ff:ff:ff:ff:ff:ff
> 4: br0: <BROADCAST,MULTICAST,UP,10000> mtu 1500 qdisc noqueue
> link/ether 52:54:00:12:34:56 brd ff:ff:ff:ff:ff:ff
>
> qemu:~> ll /sys/class/net/
> total 0
> drwxr-xr-x 5 root root 0 2008-05-07 15:07 br0
> drwxr-xr-x 3 root root 0 2008-05-07 15:06 dummy0
> drwxr-xr-x 4 root root 0 2008-05-07 15:07 eth0
> drwxr-xr-x 3 root root 0 2008-05-07 15:06 lo
>
>
--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D
http://www.bull.com
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-06 15:46 [PATCH 00/11] sysfs tagged directories V5 Benjamin Thery
@ 2008-06-06 15:47 ` Benjamin Thery
0 siblings, 0 replies; 75+ messages in thread
From: Benjamin Thery @ 2008-06-06 15:47 UTC (permalink / raw)
To: Greg Kroah-Hartman
Cc: Andrew Morton, Eric Biederman, Serge Hallyn, linux-kernel,
Tejun Heo, Al Viro, Linux Containers, Benjamin Thery
sysfs: Implement sysfs tagged directory support.
The problem. When implementing a network namespace I need to be able
to have multiple network devices with the same name. Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.
What this patch does is to add an additional tag field to the
sysfs dirent structure. For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible. Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.
I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.
For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded. Which means I need
a simple race free way to setup those directories as tagged.
To achieve a race free design all tagged directories are created
and managed by sysfs itself. The upper level code that knows what
tagged directories we need provides just two methods that enable
this:
sb_tag() - that returns a "void *" tag that identifies the context of
the process that mounted sysfs.
kobject_tag(kobj) - that returns a "void *" tag that identifies the context
a kobject should be in.
Everything else is left up to sysfs.
For the network namespace sb_tag and kobject_tag are essentially
one line functions, and look to remain that.
The work needed in sysfs is more extensive. At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent. Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.
Currently only directories which hold kobjects, and
symlinks are supported. There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/bin.c | 2
fs/sysfs/dir.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++----
fs/sysfs/file.c | 8 +-
fs/sysfs/group.c | 4 -
fs/sysfs/inode.c | 7 +
fs/sysfs/mount.c | 44 ++++++++++-
fs/sysfs/symlink.c | 2
fs/sysfs/sysfs.h | 17 ++++
include/linux/sysfs.h | 17 ++++
9 files changed, 257 insertions(+), 29 deletions(-)
Index: linux-mm/fs/sysfs/bin.c
===================================================================
--- linux-mm.orig/fs/sysfs/bin.c
+++ linux-mm/fs/sysfs/bin.c
@@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject
void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -103,8 +103,17 @@ static void sysfs_unlink_sibling(struct
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sb->s_root);
+ struct dentry *dentry;
+
+ /* Bail if this sd won't show up in this superblock */
+ if (sd->s_parent && sd->s_parent->s_flags & SYSFS_FLAG_TAGGED) {
+ const void *tag;
+ tag = sysfs_lookup_tag(sd->s_parent, sb);
+ if (sd->s_tag.tag != tag)
+ return ERR_PTR(-EXDEV);
+ }
+ dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
struct dentry *parent;
@@ -423,7 +432,11 @@ void sysfs_addrm_start(struct sysfs_addr
*/
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
- if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) {
+ const void *tag = NULL;
+
+ tag = sysfs_creation_tag(acxt->parent_sd, sd);
+
+ if (sysfs_find_dirent(acxt->parent_sd, tag, sd->s_name)) {
printk(KERN_WARNING "sysfs: duplicate filename '%s' "
"can not be created\n", sd->s_name);
WARN_ON(1);
@@ -439,6 +452,9 @@ int sysfs_add_one(struct sysfs_addrm_cxt
sd->s_parent = sysfs_get(acxt->parent_sd);
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
+
if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
inc_nlink(acxt->parent_inode);
@@ -585,13 +601,18 @@ void sysfs_addrm_finish(struct sysfs_add
* Pointer to sysfs_dirent if found, NULL if not.
*/
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name)
{
struct sysfs_dirent *sd;
- for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
+ for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (sd->s_tag.tag != tag))
+ continue;
if (!strcmp(sd->s_name, name))
return sd;
+ }
return NULL;
}
@@ -615,7 +636,7 @@ struct sysfs_dirent *sysfs_get_dirent(st
struct sysfs_dirent *sd;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name);
+ sd = sysfs_find_dirent(parent_sd, NULL, name);
sysfs_get(sd);
mutex_unlock(&sysfs_mutex);
@@ -681,13 +702,16 @@ static struct dentry * sysfs_lookup(stru
struct nameidata *nd)
{
struct dentry *ret = NULL;
- struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct dentry *parent = dentry->d_parent;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *sd;
struct inode *inode;
+ const void *tag;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+ sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
/* no such entry */
if (!sd) {
@@ -895,19 +919,24 @@ int sysfs_rename_dir(struct kobject * ko
struct sysfs_rename_struct *srs;
struct inode *parent_inode = NULL;
const char *dup_name = NULL;
+ const void *old_tag, *tag;
int error;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
+ old_tag = sysfs_dirent_tag(sd);
+ tag = sysfs_creation_tag(sd->s_parent, sd);
error = 0;
- if (strcmp(sd->s_name, new_name) == 0)
+ if ((old_tag == tag) && (strcmp(sd->s_name, new_name) == 0))
goto out; /* nothing to rename */
sysfs_grab_supers();
- error = prep_rename(&todo, sd, sd->s_parent, new_name);
- if (error)
- goto out_release;
+ if (old_tag == tag) {
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
+ }
error = -ENOMEM;
mutex_lock(&sysfs_mutex);
@@ -920,7 +949,7 @@ int sysfs_rename_dir(struct kobject * ko
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(sd->s_parent, new_name))
+ if (sysfs_find_dirent(sd->s_parent, tag, new_name))
goto out_unlock;
/* rename kobject and sysfs_dirent */
@@ -935,6 +964,8 @@ int sysfs_rename_dir(struct kobject * ko
dup_name = sd->s_name;
sd->s_name = new_name;
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
/* rename */
list_for_each_entry(srs, &todo, list) {
@@ -942,6 +973,20 @@ int sysfs_rename_dir(struct kobject * ko
d_move(srs->old_dentry, srs->new_dentry);
}
+ /* If we are moving across superblocks drop the dcache entries */
+ if (old_tag != tag) {
+ struct super_block *sb;
+ struct dentry *dentry;
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = __sysfs_get_dentry(sb, sd);
+ if (!dentry)
+ continue;
+ shrink_dcache_parent(dentry);
+ d_drop(dentry);
+ dput(dentry);
+ }
+ }
+
error = 0;
out_unlock:
mutex_unlock(&sysfs_mutex);
@@ -964,11 +1009,13 @@ int sysfs_move_dir(struct kobject *kobj,
struct sysfs_rename_struct *srs;
struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ const void *tag;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+ tag = sysfs_dirent_tag(sd);
error = 0;
if (sd->s_parent == new_parent_sd)
@@ -1002,7 +1049,7 @@ again:
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, sd->s_name))
+ if (sysfs_find_dirent(new_parent_sd, tag, sd->s_name))
goto out_unlock;
error = 0;
@@ -1041,10 +1088,11 @@ static inline unsigned char dt_type(stru
static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ struct dentry *parent = filp->f_path.dentry;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *pos;
ino_t ino;
+ const void *tag;
if (filp->f_pos == 0) {
ino = parent_sd->s_ino;
@@ -1062,6 +1110,8 @@ static int sysfs_readdir(struct file * f
if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
mutex_lock(&sysfs_mutex);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+
/* Skip the dentries we have already reported */
pos = parent_sd->s_dir.children;
while (pos && (filp->f_pos > pos->s_ino))
@@ -1071,6 +1121,10 @@ static int sysfs_readdir(struct file * f
const char * name;
int len;
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (pos->s_tag.tag != tag))
+ continue;
+
name = pos->s_name;
len = strlen(name);
filp->f_pos = ino = pos->s_ino;
@@ -1091,3 +1145,106 @@ const struct file_operations sysfs_dir_o
.read = generic_read_dir,
.readdir = sysfs_readdir,
};
+
+const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (parent_sd->s_flags & SYSFS_FLAG_TAGGED) {
+ struct kobject *kobj;
+ switch (sysfs_type(sd)) {
+ case SYSFS_DIR:
+ kobj = sd->s_dir.kobj;
+ break;
+ case SYSFS_KOBJ_LINK:
+ kobj = sd->s_symlink.target_sd->s_dir.kobj;
+ break;
+ default:
+ BUG();
+ }
+ tag = parent_sd->s_tag.ops->kobject_tag(kobj);
+ }
+ return tag;
+}
+
+const void *sysfs_removal_tag(struct kobject *kobj, struct sysfs_dirent *dir_sd)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = kobj->sd->s_tag.tag;
+
+ return tag;
+}
+
+const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = dir_sd->s_tag.ops->sb_tag(&sysfs_info(sb)->tag);
+
+ return tag;
+}
+
+const void *sysfs_dirent_tag(struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED))
+ tag = sd->s_tag.tag;
+
+ return tag;
+}
+
+/**
+ * sysfs_enable_tagging - Automatically tag all of the children in a
+ * directory.
+ * @kobj: object whose children should be filtered by tags
+ *
+ * Once tagging has been enabled on a directory the contents
+ * of the directory become dependent upon context captured when
+ * sysfs was mounted.
+ *
+ * tag_ops->sb_tag() returns the context for a given superblock.
+ *
+ * tag_ops->kobject_tag() returns the context that a given kobj
+ * resides in.
+ *
+ * Using those methods the sysfs code on tagged directories
+ * carefully stores the files so that when we lookup files
+ * we get the proper answer for our context.
+ *
+ * If the context of a kobject is changed it is expected that
+ * the kobject will be renamed so the appopriate sysfs data structures
+ * can be updated.
+ */
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ struct sysfs_dirent *sd;
+ int err;
+
+ err = -ENOENT;
+ sd = kobj->sd;
+
+ mutex_lock(&sysfs_mutex);
+ err = -EINVAL;
+ /* We can only enable tagging on empty directories
+ * where tagging is not already enabled, and
+ * who are not subdirectories of directories where tagging is
+ * enabled.
+ */
+ if (!sd->s_dir.children && (sysfs_type(sd) == SYSFS_DIR) &&
+ !(sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ sd->s_parent &&
+ !(sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)) {
+ err = 0;
+ sd->s_flags |= SYSFS_FLAG_TAGGED;
+ sd->s_tag.ops = tag_ops;
+ }
+ mutex_unlock(&sysfs_mutex);
+ return err;
+}
Index: linux-mm/fs/sysfs/file.c
===================================================================
--- linux-mm.orig/fs/sysfs/file.c
+++ linux-mm/fs/sysfs/file.c
@@ -460,9 +460,9 @@ void sysfs_notify(struct kobject *k, cha
mutex_lock(&sysfs_mutex);
if (sd && dir)
- sd = sysfs_find_dirent(sd, dir);
+ sd = sysfs_find_dirent(sd, NULL, dir);
if (sd && attr)
- sd = sysfs_find_dirent(sd, attr);
+ sd = sysfs_find_dirent(sd, NULL, attr);
if (sd) {
struct sysfs_open_dirent *od;
@@ -631,7 +631,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
}
@@ -651,7 +651,7 @@ void sysfs_remove_file_from_group(struct
else
dir_sd = sysfs_get(kobj->sd);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name);
+ sysfs_hash_and_remove(kobj, dir_sd, attr->name);
sysfs_put(dir_sd);
}
}
Index: linux-mm/fs/sysfs/group.c
===================================================================
--- linux-mm.orig/fs/sysfs/group.c
+++ linux-mm/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_di
int i;
for (i = 0, attr = grp->attrs; *attr; i++, attr++)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
}
static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dir
* visibility. Do this by first removing then
* re-adding (if required) the file */
if (update)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
Index: linux-mm/fs/sysfs/inode.c
===================================================================
--- linux-mm.orig/fs/sysfs/inode.c
+++ linux-mm/fs/sysfs/inode.c
@@ -217,17 +217,20 @@ struct inode * sysfs_get_inode(struct sy
return inode;
}
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name)
{
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ const void *tag;
if (!dir_sd)
return -ENOENT;
sysfs_addrm_start(&acxt, dir_sd);
+ tag = sysfs_removal_tag(kobj, dir_sd);
- sd = sysfs_find_dirent(dir_sd, name);
+ sd = sysfs_find_dirent(dir_sd, tag, name);
if (sd)
sysfs_remove_one(&acxt, sd);
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -75,6 +75,7 @@ static int sysfs_fill_super(struct super
goto out_err;
}
root->d_fsdata = &sysfs_root;
+ root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
return 0;
@@ -88,20 +89,55 @@ out_err:
return error;
}
+static int sysfs_test_super(struct super_block *sb, void *ptr)
+{
+ struct task_struct *task = ptr;
+ struct sysfs_super_info *info = sysfs_info(sb);
+ int found = 1;
+
+ return found;
+}
+
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- int rc;
+ struct super_block *sb;
+ int error;
mutex_lock(&sysfs_rename_mutex);
- rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ sb = sget(fs_type, sysfs_test_super, set_anon_super, current);
+ if (IS_ERR(sb)) {
+ error = PTR_ERR(sb);
+ goto out;
+ }
+ if (!sb->s_root) {
+ sb->s_flags = flags;
+ error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+ if (error) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ goto out;
+ }
+ sb->s_flags |= MS_ACTIVE;
+ }
+ do_remount_sb(sb, flags, data, 0);
+ error = simple_set_mnt(mnt, sb);
+out:
mutex_unlock(&sysfs_rename_mutex);
- return rc;
+ return error;
+}
+
+static void sysfs_kill_sb(struct super_block *sb)
+{
+ struct sysfs_super_info *info = sysfs_info(sb);
+
+ kill_anon_super(sb);
+ kfree(info);
}
struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = sysfs_kill_sb,
};
void sysfs_grab_supers(void)
Index: linux-mm/fs/sysfs/symlink.c
===================================================================
--- linux-mm.orig/fs/sysfs/symlink.c
+++ linux-mm/fs/sysfs/symlink.c
@@ -94,7 +94,7 @@ void sysfs_remove_link(struct kobject *
else
parent_sd = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name);
+ sysfs_hash_and_remove(kobj, parent_sd, name);
}
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
Index: linux-mm/fs/sysfs/sysfs.h
===================================================================
--- linux-mm.orig/fs/sysfs/sysfs.h
+++ linux-mm/fs/sysfs/sysfs.h
@@ -46,6 +46,10 @@ struct sysfs_dirent {
const char *s_name;
union {
+ const struct sysfs_tagged_dir_operations *ops;
+ const void *tag;
+ } s_tag;
+ union {
struct sysfs_elem_dir s_dir;
struct sysfs_elem_symlink s_symlink;
struct sysfs_elem_attr s_attr;
@@ -69,6 +73,7 @@ struct sysfs_dirent {
#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
#define SYSFS_FLAG_REMOVED 0x0200
+#define SYSFS_FLAG_TAGGED 0x0400
static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
{
@@ -87,6 +92,7 @@ struct sysfs_addrm_cxt {
struct sysfs_super_info {
int grabbed;
+ struct sysfs_tag_info tag;
};
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
@@ -113,6 +119,13 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
+extern const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd);
+extern const void *sysfs_removal_tag(struct kobject *kobj,
+ struct sysfs_dirent *dir_sd);
+extern const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb);
+extern const void *sysfs_dirent_tag(struct sysfs_dirent *sd);
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
@@ -124,6 +137,7 @@ void sysfs_remove_one(struct sysfs_addrm
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name);
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
const unsigned char *name);
@@ -155,7 +169,8 @@ static inline void sysfs_put(struct sysf
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name);
int sysfs_inode_init(void);
/*
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -78,6 +78,14 @@ struct sysfs_ops {
ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
};
+struct sysfs_tag_info {
+};
+
+struct sysfs_tagged_dir_operations {
+ const void *(*sb_tag)(struct sysfs_tag_info *info);
+ const void *(*kobject_tag)(struct kobject *kobj);
+};
+
#ifdef CONFIG_SYSFS
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -117,6 +125,9 @@ void sysfs_remove_file_from_group(struct
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
void sysfs_printk_last_file(void);
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops);
+
extern int __must_check sysfs_init(void);
#else /* CONFIG_SYSFS */
@@ -213,6 +224,12 @@ static inline void sysfs_notify(struct k
{
}
+static inline int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ return 0;
+}
+
static inline int __must_check sysfs_init(void)
{
return 0;
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-18 17:07 [PATCH 00/11] sysfs tagged directories V6 Benjamin Thery
@ 2008-06-18 17:08 ` Benjamin Thery
2008-06-23 2:05 ` Tejun Heo
0 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-06-18 17:08 UTC (permalink / raw)
To: Greg Kroah-Hartman, Andrew Morton
Cc: Eric Biederman, Daniel Lezcano, Serge Hallyn, linux-kernel,
Tejun Heo, Al Viro, Linux Containers, Benjamin Thery
sysfs: Implement sysfs tagged directory support.
The problem. When implementing a network namespace I need to be able
to have multiple network devices with the same name. Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.
What this patch does is to add an additional tag field to the
sysfs dirent structure. For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible. Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.
I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.
For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded. Which means I need
a simple race free way to setup those directories as tagged.
To achieve a race free design all tagged directories are created
and managed by sysfs itself. The upper level code that knows what
tagged directories we need provides just two methods that enable
this:
sb_tag() - that returns a "void *" tag that identifies the context of
the process that mounted sysfs.
kobject_tag(kobj) - that returns a "void *" tag that identifies the context
a kobject should be in.
Everything else is left up to sysfs.
For the network namespace sb_tag and kobject_tag are essentially
one line functions, and look to remain that.
The work needed in sysfs is more extensive. At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent. Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.
Currently only directories which hold kobjects, and
symlinks are supported. There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
---
fs/sysfs/bin.c | 2
fs/sysfs/dir.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++----
fs/sysfs/file.c | 8 +-
fs/sysfs/group.c | 4 -
fs/sysfs/inode.c | 7 +
fs/sysfs/mount.c | 44 ++++++++++-
fs/sysfs/symlink.c | 2
fs/sysfs/sysfs.h | 17 ++++
include/linux/sysfs.h | 17 ++++
9 files changed, 257 insertions(+), 29 deletions(-)
Index: linux-mm/fs/sysfs/bin.c
===================================================================
--- linux-mm.orig/fs/sysfs/bin.c
+++ linux-mm/fs/sysfs/bin.c
@@ -252,7 +252,7 @@ int sysfs_create_bin_file(struct kobject
void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->attr.name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name);
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
Index: linux-mm/fs/sysfs/dir.c
===================================================================
--- linux-mm.orig/fs/sysfs/dir.c
+++ linux-mm/fs/sysfs/dir.c
@@ -101,8 +101,17 @@ static void sysfs_unlink_sibling(struct
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd)
{
- struct dentry *dentry = dget(sb->s_root);
+ struct dentry *dentry;
+
+ /* Bail if this sd won't show up in this superblock */
+ if (sd->s_parent && sd->s_parent->s_flags & SYSFS_FLAG_TAGGED) {
+ const void *tag;
+ tag = sysfs_lookup_tag(sd->s_parent, sb);
+ if (sd->s_tag.tag != tag)
+ return ERR_PTR(-EXDEV);
+ }
+ dentry = dget(sb->s_root);
while (dentry->d_fsdata != sd) {
struct sysfs_dirent *cur;
struct dentry *parent;
@@ -421,11 +430,18 @@ void sysfs_addrm_start(struct sysfs_addr
*/
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
- if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
+ const void *tag = NULL;
+
+ tag = sysfs_creation_tag(acxt->parent_sd, sd);
+
+ if (sysfs_find_dirent(acxt->parent_sd, tag, sd->s_name))
return -EEXIST;
sd->s_parent = sysfs_get(acxt->parent_sd);
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
+
if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
inc_nlink(acxt->parent_inode);
@@ -572,13 +588,18 @@ void sysfs_addrm_finish(struct sysfs_add
* Pointer to sysfs_dirent if found, NULL if not.
*/
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name)
{
struct sysfs_dirent *sd;
- for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
+ for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (sd->s_tag.tag != tag))
+ continue;
if (!strcmp(sd->s_name, name))
return sd;
+ }
return NULL;
}
@@ -602,7 +623,7 @@ struct sysfs_dirent *sysfs_get_dirent(st
struct sysfs_dirent *sd;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, name);
+ sd = sysfs_find_dirent(parent_sd, NULL, name);
sysfs_get(sd);
mutex_unlock(&sysfs_mutex);
@@ -668,13 +689,16 @@ static struct dentry * sysfs_lookup(stru
struct nameidata *nd)
{
struct dentry *ret = NULL;
- struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+ struct dentry *parent = dentry->d_parent;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *sd;
struct inode *inode;
+ const void *tag;
mutex_lock(&sysfs_mutex);
- sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+ sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
/* no such entry */
if (!sd) {
@@ -882,19 +906,24 @@ int sysfs_rename_dir(struct kobject * ko
struct sysfs_rename_struct *srs;
struct inode *parent_inode = NULL;
const char *dup_name = NULL;
+ const void *old_tag, *tag;
int error;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
+ old_tag = sysfs_dirent_tag(sd);
+ tag = sysfs_creation_tag(sd->s_parent, sd);
error = 0;
- if (strcmp(sd->s_name, new_name) == 0)
+ if ((old_tag == tag) && (strcmp(sd->s_name, new_name) == 0))
goto out; /* nothing to rename */
sysfs_grab_supers();
- error = prep_rename(&todo, sd, sd->s_parent, new_name);
- if (error)
- goto out_release;
+ if (old_tag == tag) {
+ error = prep_rename(&todo, sd, sd->s_parent, new_name);
+ if (error)
+ goto out_release;
+ }
error = -ENOMEM;
mutex_lock(&sysfs_mutex);
@@ -907,7 +936,7 @@ int sysfs_rename_dir(struct kobject * ko
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(sd->s_parent, new_name))
+ if (sysfs_find_dirent(sd->s_parent, tag, new_name))
goto out_unlock;
/* rename kobject and sysfs_dirent */
@@ -922,6 +951,8 @@ int sysfs_rename_dir(struct kobject * ko
dup_name = sd->s_name;
sd->s_name = new_name;
+ if (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)
+ sd->s_tag.tag = tag;
/* rename */
list_for_each_entry(srs, &todo, list) {
@@ -929,6 +960,20 @@ int sysfs_rename_dir(struct kobject * ko
d_move(srs->old_dentry, srs->new_dentry);
}
+ /* If we are moving across superblocks drop the dcache entries */
+ if (old_tag != tag) {
+ struct super_block *sb;
+ struct dentry *dentry;
+ list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+ dentry = __sysfs_get_dentry(sb, sd);
+ if (!dentry)
+ continue;
+ shrink_dcache_parent(dentry);
+ d_drop(dentry);
+ dput(dentry);
+ }
+ }
+
error = 0;
out_unlock:
mutex_unlock(&sysfs_mutex);
@@ -951,11 +996,13 @@ int sysfs_move_dir(struct kobject *kobj,
struct sysfs_rename_struct *srs;
struct inode *old_parent_inode = NULL, *new_parent_inode = NULL;
int error;
+ const void *tag;
INIT_LIST_HEAD(&todo);
mutex_lock(&sysfs_rename_mutex);
BUG_ON(!sd->s_parent);
new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
+ tag = sysfs_dirent_tag(sd);
error = 0;
if (sd->s_parent == new_parent_sd)
@@ -989,7 +1036,7 @@ again:
mutex_lock(&sysfs_mutex);
error = -EEXIST;
- if (sysfs_find_dirent(new_parent_sd, sd->s_name))
+ if (sysfs_find_dirent(new_parent_sd, tag, sd->s_name))
goto out_unlock;
error = 0;
@@ -1028,10 +1075,11 @@ static inline unsigned char dt_type(stru
static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ struct dentry *parent = filp->f_path.dentry;
+ struct sysfs_dirent *parent_sd = parent->d_fsdata;
struct sysfs_dirent *pos;
ino_t ino;
+ const void *tag;
if (filp->f_pos == 0) {
ino = parent_sd->s_ino;
@@ -1049,6 +1097,8 @@ static int sysfs_readdir(struct file * f
if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
mutex_lock(&sysfs_mutex);
+ tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
+
/* Skip the dentries we have already reported */
pos = parent_sd->s_dir.children;
while (pos && (filp->f_pos > pos->s_ino))
@@ -1058,6 +1108,10 @@ static int sysfs_readdir(struct file * f
const char * name;
int len;
+ if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ (pos->s_tag.tag != tag))
+ continue;
+
name = pos->s_name;
len = strlen(name);
filp->f_pos = ino = pos->s_ino;
@@ -1078,3 +1132,106 @@ const struct file_operations sysfs_dir_o
.read = generic_read_dir,
.readdir = sysfs_readdir,
};
+
+const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (parent_sd->s_flags & SYSFS_FLAG_TAGGED) {
+ struct kobject *kobj;
+ switch (sysfs_type(sd)) {
+ case SYSFS_DIR:
+ kobj = sd->s_dir.kobj;
+ break;
+ case SYSFS_KOBJ_LINK:
+ kobj = sd->s_symlink.target_sd->s_dir.kobj;
+ break;
+ default:
+ BUG();
+ }
+ tag = parent_sd->s_tag.ops->kobject_tag(kobj);
+ }
+ return tag;
+}
+
+const void *sysfs_removal_tag(struct kobject *kobj, struct sysfs_dirent *dir_sd)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = kobj->sd->s_tag.tag;
+
+ return tag;
+}
+
+const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb)
+{
+ const void *tag = NULL;
+
+ if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
+ tag = dir_sd->s_tag.ops->sb_tag(&sysfs_info(sb)->tag);
+
+ return tag;
+}
+
+const void *sysfs_dirent_tag(struct sysfs_dirent *sd)
+{
+ const void *tag = NULL;
+
+ if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_TAGGED))
+ tag = sd->s_tag.tag;
+
+ return tag;
+}
+
+/**
+ * sysfs_enable_tagging - Automatically tag all of the children in a
+ * directory.
+ * @kobj: object whose children should be filtered by tags
+ *
+ * Once tagging has been enabled on a directory the contents
+ * of the directory become dependent upon context captured when
+ * sysfs was mounted.
+ *
+ * tag_ops->sb_tag() returns the context for a given superblock.
+ *
+ * tag_ops->kobject_tag() returns the context that a given kobj
+ * resides in.
+ *
+ * Using those methods the sysfs code on tagged directories
+ * carefully stores the files so that when we lookup files
+ * we get the proper answer for our context.
+ *
+ * If the context of a kobject is changed it is expected that
+ * the kobject will be renamed so the appopriate sysfs data structures
+ * can be updated.
+ */
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ struct sysfs_dirent *sd;
+ int err;
+
+ err = -ENOENT;
+ sd = kobj->sd;
+
+ mutex_lock(&sysfs_mutex);
+ err = -EINVAL;
+ /* We can only enable tagging on empty directories
+ * where tagging is not already enabled, and
+ * who are not subdirectories of directories where tagging is
+ * enabled.
+ */
+ if (!sd->s_dir.children && (sysfs_type(sd) == SYSFS_DIR) &&
+ !(sd->s_flags & SYSFS_FLAG_TAGGED) &&
+ sd->s_parent &&
+ !(sd->s_parent->s_flags & SYSFS_FLAG_TAGGED)) {
+ err = 0;
+ sd->s_flags |= SYSFS_FLAG_TAGGED;
+ sd->s_tag.ops = tag_ops;
+ }
+ mutex_unlock(&sysfs_mutex);
+ return err;
+}
Index: linux-mm/fs/sysfs/file.c
===================================================================
--- linux-mm.orig/fs/sysfs/file.c
+++ linux-mm/fs/sysfs/file.c
@@ -460,9 +460,9 @@ void sysfs_notify(struct kobject *k, cha
mutex_lock(&sysfs_mutex);
if (sd && dir)
- sd = sysfs_find_dirent(sd, dir);
+ sd = sysfs_find_dirent(sd, NULL, dir);
if (sd && attr)
- sd = sysfs_find_dirent(sd, attr);
+ sd = sysfs_find_dirent(sd, NULL, attr);
if (sd) {
struct sysfs_open_dirent *od;
@@ -631,7 +631,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
{
- sysfs_hash_and_remove(kobj->sd, attr->name);
+ sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
}
@@ -651,7 +651,7 @@ void sysfs_remove_file_from_group(struct
else
dir_sd = sysfs_get(kobj->sd);
if (dir_sd) {
- sysfs_hash_and_remove(dir_sd, attr->name);
+ sysfs_hash_and_remove(kobj, dir_sd, attr->name);
sysfs_put(dir_sd);
}
}
Index: linux-mm/fs/sysfs/group.c
===================================================================
--- linux-mm.orig/fs/sysfs/group.c
+++ linux-mm/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_di
int i;
for (i = 0, attr = grp->attrs; *attr; i++, attr++)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
}
static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dir
* visibility. Do this by first removing then
* re-adding (if required) the file */
if (update)
- sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
if (grp->is_visible) {
mode = grp->is_visible(kobj, *attr, i);
if (!mode)
Index: linux-mm/fs/sysfs/inode.c
===================================================================
--- linux-mm.orig/fs/sysfs/inode.c
+++ linux-mm/fs/sysfs/inode.c
@@ -217,17 +217,20 @@ struct inode * sysfs_get_inode(struct sy
return inode;
}
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name)
{
struct sysfs_addrm_cxt acxt;
struct sysfs_dirent *sd;
+ const void *tag;
if (!dir_sd)
return -ENOENT;
sysfs_addrm_start(&acxt, dir_sd);
+ tag = sysfs_removal_tag(kobj, dir_sd);
- sd = sysfs_find_dirent(dir_sd, name);
+ sd = sysfs_find_dirent(dir_sd, tag, name);
if (sd)
sysfs_remove_one(&acxt, sd);
Index: linux-mm/fs/sysfs/mount.c
===================================================================
--- linux-mm.orig/fs/sysfs/mount.c
+++ linux-mm/fs/sysfs/mount.c
@@ -75,6 +75,7 @@ static int sysfs_fill_super(struct super
goto out_err;
}
root->d_fsdata = &sysfs_root;
+ root->d_sb = sb;
sb->s_root = root;
sb->s_fs_info = info;
return 0;
@@ -88,20 +89,55 @@ out_err:
return error;
}
+static int sysfs_test_super(struct super_block *sb, void *ptr)
+{
+ struct task_struct *task = ptr;
+ struct sysfs_super_info *info = sysfs_info(sb);
+ int found = 1;
+
+ return found;
+}
+
static int sysfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
- int rc;
+ struct super_block *sb;
+ int error;
mutex_lock(&sysfs_rename_mutex);
- rc = get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt);
+ sb = sget(fs_type, sysfs_test_super, set_anon_super, current);
+ if (IS_ERR(sb)) {
+ error = PTR_ERR(sb);
+ goto out;
+ }
+ if (!sb->s_root) {
+ sb->s_flags = flags;
+ error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
+ if (error) {
+ up_write(&sb->s_umount);
+ deactivate_super(sb);
+ goto out;
+ }
+ sb->s_flags |= MS_ACTIVE;
+ }
+ do_remount_sb(sb, flags, data, 0);
+ error = simple_set_mnt(mnt, sb);
+out:
mutex_unlock(&sysfs_rename_mutex);
- return rc;
+ return error;
+}
+
+static void sysfs_kill_sb(struct super_block *sb)
+{
+ struct sysfs_super_info *info = sysfs_info(sb);
+
+ kill_anon_super(sb);
+ kfree(info);
}
struct file_system_type sysfs_fs_type = {
.name = "sysfs",
.get_sb = sysfs_get_sb,
- .kill_sb = kill_anon_super,
+ .kill_sb = sysfs_kill_sb,
};
void sysfs_grab_supers(void)
Index: linux-mm/fs/sysfs/symlink.c
===================================================================
--- linux-mm.orig/fs/sysfs/symlink.c
+++ linux-mm/fs/sysfs/symlink.c
@@ -94,7 +94,7 @@ void sysfs_remove_link(struct kobject *
else
parent_sd = kobj->sd;
- sysfs_hash_and_remove(parent_sd, name);
+ sysfs_hash_and_remove(kobj, parent_sd, name);
}
static int sysfs_get_target_path(struct sysfs_dirent *parent_sd,
Index: linux-mm/fs/sysfs/sysfs.h
===================================================================
--- linux-mm.orig/fs/sysfs/sysfs.h
+++ linux-mm/fs/sysfs/sysfs.h
@@ -46,6 +46,10 @@ struct sysfs_dirent {
const char *s_name;
union {
+ const struct sysfs_tagged_dir_operations *ops;
+ const void *tag;
+ } s_tag;
+ union {
struct sysfs_elem_dir s_dir;
struct sysfs_elem_symlink s_symlink;
struct sysfs_elem_attr s_attr;
@@ -69,6 +73,7 @@ struct sysfs_dirent {
#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK
#define SYSFS_FLAG_REMOVED 0x0200
+#define SYSFS_FLAG_TAGGED 0x0400
static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
{
@@ -87,6 +92,7 @@ struct sysfs_addrm_cxt {
struct sysfs_super_info {
int grabbed;
+ struct sysfs_tag_info tag;
};
#define sysfs_info(SB) ((struct sysfs_super_info *)(SB)->s_fs_info)
@@ -113,6 +119,13 @@ extern spinlock_t sysfs_assoc_lock;
extern const struct file_operations sysfs_dir_operations;
extern const struct inode_operations sysfs_dir_inode_operations;
+extern const void *sysfs_creation_tag(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd);
+extern const void *sysfs_removal_tag(struct kobject *kobj,
+ struct sysfs_dirent *dir_sd);
+extern const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd,
+ struct super_block *sb);
+extern const void *sysfs_dirent_tag(struct sysfs_dirent *sd);
struct dentry *sysfs_get_dentry(struct super_block *sb,
struct sysfs_dirent *sd);
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
@@ -124,6 +137,7 @@ void sysfs_remove_one(struct sysfs_addrm
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+ const void *tag,
const unsigned char *name);
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
const unsigned char *name);
@@ -155,7 +169,8 @@ static inline void sysfs_put(struct sysf
*/
struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ const char *name);
int sysfs_inode_init(void);
/*
Index: linux-mm/include/linux/sysfs.h
===================================================================
--- linux-mm.orig/include/linux/sysfs.h
+++ linux-mm/include/linux/sysfs.h
@@ -80,6 +80,14 @@ struct sysfs_ops {
ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
};
+struct sysfs_tag_info {
+};
+
+struct sysfs_tagged_dir_operations {
+ const void *(*sb_tag)(struct sysfs_tag_info *info);
+ const void *(*kobject_tag)(struct kobject *kobj);
+};
+
#ifdef CONFIG_SYSFS
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
@@ -119,6 +127,9 @@ void sysfs_remove_file_from_group(struct
void sysfs_notify(struct kobject *kobj, char *dir, char *attr);
void sysfs_printk_last_file(void);
+int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops);
+
extern int __must_check sysfs_init(void);
#else /* CONFIG_SYSFS */
@@ -215,6 +226,12 @@ static inline void sysfs_notify(struct k
{
}
+static inline int sysfs_enable_tagging(struct kobject *kobj,
+ const struct sysfs_tagged_dir_operations *tag_ops)
+{
+ return 0;
+}
+
static inline int __must_check sysfs_init(void)
{
return 0;
--
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-18 17:08 ` [PATCH 06/11] sysfs: Implement sysfs tagged directory support Benjamin Thery
@ 2008-06-23 2:05 ` Tejun Heo
2008-06-26 20:21 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-06-23 2:05 UTC (permalink / raw)
To: Benjamin Thery
Cc: Greg Kroah-Hartman, Andrew Morton, Eric Biederman,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello,
> Index: linux-mm/fs/sysfs/file.c
> ===================================================================
> --- linux-mm.orig/fs/sysfs/file.c
> +++ linux-mm/fs/sysfs/file.c
> @@ -460,9 +460,9 @@ void sysfs_notify(struct kobject *k, cha
> mutex_lock(&sysfs_mutex);
>
> if (sd && dir)
> - sd = sysfs_find_dirent(sd, dir);
> + sd = sysfs_find_dirent(sd, NULL, dir);
> if (sd && attr)
> - sd = sysfs_find_dirent(sd, attr);
> + sd = sysfs_find_dirent(sd, NULL, attr);
> if (sd) {
> struct sysfs_open_dirent *od;
>
As only directories can be tagged, I suppose handling tags explicitly
isn't necessary here, right? Can we please add a comment explaning
that?
> Index: linux-mm/fs/sysfs/inode.c
> ===================================================================
> --- linux-mm.orig/fs/sysfs/inode.c
> +++ linux-mm/fs/sysfs/inode.c
> @@ -217,17 +217,20 @@ struct inode * sysfs_get_inode(struct sy
> return inode;
> }
>
> -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
> +int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd,
> + const char *name)
> {
> struct sysfs_addrm_cxt acxt;
> struct sysfs_dirent *sd;
> + const void *tag;
>
> if (!dir_sd)
> return -ENOENT;
>
> sysfs_addrm_start(&acxt, dir_sd);
> + tag = sysfs_removal_tag(kobj, dir_sd);
>
> - sd = sysfs_find_dirent(dir_sd, name);
> + sd = sysfs_find_dirent(dir_sd, tag, name);
> if (sd)
> sysfs_remove_one(&acxt, sd);
Taking both @kobj and @dir_sd is ugly but it isn't your fault. I'll
clean things up later.
> Index: linux-mm/include/linux/sysfs.h
> ===================================================================
> --- linux-mm.orig/include/linux/sysfs.h
> +++ linux-mm/include/linux/sysfs.h
> @@ -80,6 +80,14 @@ struct sysfs_ops {
> ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
> };
>
> +struct sysfs_tag_info {
> +};
> +
> +struct sysfs_tagged_dir_operations {
> + const void *(*sb_tag)(struct sysfs_tag_info *info);
> + const void *(*kobject_tag)(struct kobject *kobj);
> +};
As before, I can't bring myself to like this interface. Is computing
tags dynamically really necessary? Can't we do the followings?
tag = sysfs_allocate_tag(s);
sysfs_enable_tag(kobj (or sd), tag);
sysfs_sb_show_tag(sb, tag);
Where tags are allocated using ida and each sb has bitmap of enabled
tags so that sysfs ops can simply use something like the following to
test whether it's enabled.
bool sysfs_tag_enabled(sb, tag)
{
return sysfs_info(sb)->tag_map & (1 << tag);
}
Tags which can change dynamically seems too confusing to me and it
makes things difficult to verify as it's unclear how those tags are
gonna to change.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-23 2:05 ` Tejun Heo
@ 2008-06-26 20:21 ` Eric W. Biederman
2008-06-29 3:51 ` Tejun Heo
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-06-26 20:21 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun thank you for the review, and my apologies for the delayed
reply.
Tejun Heo <htejun@gmail.com> writes:
>> Index: linux-mm/include/linux/sysfs.h
>> ===================================================================
>> --- linux-mm.orig/include/linux/sysfs.h
>> +++ linux-mm/include/linux/sysfs.h
>> @@ -80,6 +80,14 @@ struct sysfs_ops {
>> ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t);
>> };
>>
>> +struct sysfs_tag_info {
>> +};
>> +
>> +struct sysfs_tagged_dir_operations {
>> + const void *(*sb_tag)(struct sysfs_tag_info *info);
>> + const void *(*kobject_tag)(struct kobject *kobj);
>> +};
>
> As before, I can't bring myself to like this interface. Is computing
> tags dynamically really necessary? Can't we do the followings?
It isn't so much computing tags dynamically but rather it is reading them
from where they are stored.
> tag = sysfs_allocate_tag(s);
> sysfs_enable_tag(kobj (or sd), tag);
> sysfs_sb_show_tag(sb, tag);
>
> Where tags are allocated using ida and each sb has bitmap of enabled
> tags so that sysfs ops can simply use something like the following to
> test whether it's enabled.
>
> bool sysfs_tag_enabled(sb, tag)
> {
> return sysfs_info(sb)->tag_map & (1 << tag);
> }
Youch that seems limiting. The expectation is that we could have
as many as 100 different containers in use on a single system at one
time. So 100 apparent copies of the network stack.
There is also a second dimension here we multiplex different
directories based on different sets of tags. One directory based
on user namespaces another on the network namespaces.
The tags in practice are just pointers to the namespace pointers.
So while we could use the ida technique to specify which set of tags
we are talking about for a directory it isn't sufficient.
The question sysfs_tag_enabled(sb, tag) makes no sense to me.
Especially in the context of needed a sysfs_sb_show_tag(sb, tag);
The current structure is because of all of the darn fool races and
magic that sysfs does. We have to say for a given directory: Your
contents will always be tagged, and only those that one tag that
matches what was captured by the superblock when sysfs is mounted
will be shown.
> Tags which can change dynamically seems too confusing to me and it
> makes things difficult to verify as it's unclear how those tags are
> gonna to change.
We have a fundamental issue that we have to handle, and it sounds like
you are proposing something that will not handle it.
- network devices can move between namespaces.
- network devices have driver specific sysfs attributes hanging off of them.
So we have to move the network devices and their sysfs attributes
between namespaces, and I implemented that in kobject_rename,
sysfs_rename path.
The tags on a kobject can only change during a rename operation.
So when the change happens is well defined. Further there is a
set of functions: sysfs_creation_tag, sysfs_removal_tag,
sysfs_lookup_tag, sysfs_dirent_tag which makes it clear what we
are doing.
If you really don't like how the tags are managed we need to talk
about how we store the tags on kobjects and on the super block.
Registering a set of tags could easily make the sb_tag function
obsolete, and that is one small piece of code so it is no big deal.
struct sysfs_tag_type_operations {
const void *(*mount_tag)(void);
const void *(*kobject_tag)(struct kobject *kobj);
};
Then we could do:
struct sysfs_sbtag_operations *tag_type_ops[MAX_TAG_TYPES];
And sysfs_tag_info could become.
struct sysfs_tag_info {
void *tag[MAX_TAG_TYPES];
};
During subsystem initialization we could call
tag_type = sysfs_allocate_tag_type();
Just after the subsystem creates a directory.
sysfs_enable_tagging(kobj/sd, tag_type);
Then anytime we currently call sb_tag during lookup we can instead
just look at sysfs_info(sb)->tag[tag_type] and compare that with
sd->s_tag.tag.
The actual tag values themselves are current stored in the object in
which the kobject is embedded.
So we still need to call kobject_tag when we create or rename
something in a tagged directory. So we know what the tag is.
When we go to remove a kobj using the existing tag on the object
is the right choice.
Rename is the fun case where we need to grab the old tag from the
sd and place on it the new tag from kobject_tag.
One of the big problems at least with the class directories is that
the lifetimes are completely decoupled the between the tags and
the subsystem objects and subsystem directories that need to be
tagged. This isn't a set things up at the start of your subsystem
and everything is happy situation. To handle the races there must
be support at least at the kobject level for handling this in the
network namespace case.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-26 20:21 ` Eric W. Biederman
@ 2008-06-29 3:51 ` Tejun Heo
2008-06-30 18:56 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-06-29 3:51 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello, Eric.
Eric W. Biederman wrote:
> Tejun thank you for the review, and my apologies for the delayed
> reply.
Me being the king of delays, no need for apologies. :-)
>> As before, I can't bring myself to like this interface. Is computing
>> tags dynamically really necessary? Can't we do the followings?
>
> It isn't so much computing tags dynamically but rather it is reading them
> from where they are stored.
It's still dynamic from sysfs's POV and I think that will make
maintenance more difficult.
>> tag = sysfs_allocate_tag(s);
>> sysfs_enable_tag(kobj (or sd), tag);
>> sysfs_sb_show_tag(sb, tag);
>>
>> Where tags are allocated using ida and each sb has bitmap of enabled
>> tags so that sysfs ops can simply use something like the following to
>> test whether it's enabled.
>>
>> bool sysfs_tag_enabled(sb, tag)
>> {
>> return sysfs_info(sb)->tag_map & (1 << tag);
>> }
>
>
> Youch that seems limiting. The expectation is that we could have
> as many as 100 different containers in use on a single system at one
> time. So 100 apparent copies of the network stack.
100 netns would mean 100 bits and 100 different views of them would mean
100 sb's where each sb would need bitmap larger than 100 bits. I don't
think there would be a scalability problem. Am I missing something?
> There is also a second dimension here we multiplex different
> directories based on different sets of tags. One directory based
> on user namespaces another on the network namespaces.
No matter which criteria is used to select ns, it should end up being
mapped to a set of tags (here, ida allocated numbers). Unless tags can
change dynamically, there shouldn't be functional difference.
> The tags in practice are just pointers to the namespace pointers.
>
> So while we could use the ida technique to specify which set of tags
> we are talking about for a directory it isn't sufficient.
I failed to follow here. Can you please elaborate a bit? If you can
describe a simple example to me, it would be much appreciated.
> The question sysfs_tag_enabled(sb, tag) makes no sense to me.
> Especially in the context of needed a sysfs_sb_show_tag(sb, tag);
>
> The current structure is because of all of the darn fool races and
> magic that sysfs does. We have to say for a given directory: Your
> contents will always be tagged, and only those that one tag that
> matches what was captured by the superblock when sysfs is mounted
> will be shown.
sysfs_tag_enabled() was meant to test whether a directory which is
tagged should be shown under the current sb.
>> Tags which can change dynamically seems too confusing to me and it
>> makes things difficult to verify as it's unclear how those tags are
>> gonna to change.
>
> We have a fundamental issue that we have to handle, and it sounds like
> you are proposing something that will not handle it.
>
> - network devices can move between namespaces.
> - network devices have driver specific sysfs attributes hanging off of them.
>
> So we have to move the network devices and their sysfs attributes
> between namespaces, and I implemented that in kobject_rename,
> sysfs_rename path.
>
> The tags on a kobject can only change during a rename operation.
> So when the change happens is well defined. Further there is a
> set of functions: sysfs_creation_tag, sysfs_removal_tag,
> sysfs_lookup_tag, sysfs_dirent_tag which makes it clear what we
> are doing.
>
> If you really don't like how the tags are managed we need to talk
> about how we store the tags on kobjects and on the super block.
>
> Registering a set of tags could easily make the sb_tag function
> obsolete, and that is one small piece of code so it is no big deal.
>
> struct sysfs_tag_type_operations {
> const void *(*mount_tag)(void);
> const void *(*kobject_tag)(struct kobject *kobj);
> };
>
> Then we could do:
> struct sysfs_sbtag_operations *tag_type_ops[MAX_TAG_TYPES];
>
> And sysfs_tag_info could become.
> struct sysfs_tag_info {
> void *tag[MAX_TAG_TYPES];
> };
>
> During subsystem initialization we could call
> tag_type = sysfs_allocate_tag_type();
>
> Just after the subsystem creates a directory.
> sysfs_enable_tagging(kobj/sd, tag_type);
>
> Then anytime we currently call sb_tag during lookup we can instead
> just look at sysfs_info(sb)->tag[tag_type] and compare that with
> sd->s_tag.tag.
What you described is pretty much what I'm talking about. The only
difference is whether to use caller-provided pointer as tag or an
ida-allocated integer. The last sentence of the above paragraph is
basically sys_tag_enabled() function (maybe misnamed).
The main reason why I'm whining about this so much is because I think
tag should be something abstracted inside sysfs proper. It's something
which affects very internal operation of sysfs and I really want to keep
the implementation details inside sysfs. Spreading implementation over
kobject and sysfs didn't turn out too pretty after all.
Thank you.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-29 3:51 ` Tejun Heo
@ 2008-06-30 18:56 ` Eric W. Biederman
2008-06-30 21:44 ` Serge E. Hallyn
2008-07-01 6:47 ` Tejun Heo
0 siblings, 2 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-06-30 18:56 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> Hello, Eric.
>
> Eric W. Biederman wrote:
>> Tejun thank you for the review, and my apologies for the delayed
>> reply.
>
> Me being the king of delays, no need for apologies. :-)
>
>>> As before, I can't bring myself to like this interface. Is computing
>>> tags dynamically really necessary? Can't we do the followings?
>>
>> It isn't so much computing tags dynamically but rather it is reading them
>> from where they are stored.
>
> It's still dynamic from sysfs's POV and I think that will make
> maintenance more difficult.
Potentially. I have no problem make it clear that things are more static.
>> There is also a second dimension here we multiplex different
>> directories based on different sets of tags. One directory based
>> on user namespaces another on the network namespaces.
>
> No matter which criteria is used to select ns, it should end up being
> mapped to a set of tags (here, ida allocated numbers). Unless tags can
> change dynamically, there shouldn't be functional difference.
>
>> The tags in practice are just pointers to the namespace pointers.
>>
>> So while we could use the ida technique to specify which set of tags
>> we are talking about for a directory it isn't sufficient.
>
> I failed to follow here. Can you please elaborate a bit? If you can
> describe a simple example to me, it would be much appreciated.
See below.
>> The question sysfs_tag_enabled(sb, tag) makes no sense to me.
>> Especially in the context of needed a sysfs_sb_show_tag(sb, tag);
>>
>> The current structure is because of all of the darn fool races and
>> magic that sysfs does. We have to say for a given directory: Your
>> contents will always be tagged, and only those that one tag that
>> matches what was captured by the superblock when sysfs is mounted
>> will be shown.
>
> sysfs_tag_enabled() was meant to test whether a directory which is
> tagged should be shown under the current sb.
Ah. When we are doing readdir or lookup. Yes that makes sense.
See below. I honestly think sysfs_tab_enabled is the wrong question.
>>> Tags which can change dynamically seems too confusing to me and it
>>> makes things difficult to verify as it's unclear how those tags are
>>> gonna to change.
>>
>> We have a fundamental issue that we have to handle, and it sounds like
>> you are proposing something that will not handle it.
>>
>> - network devices can move between namespaces.
>> - network devices have driver specific sysfs attributes hanging off of them.
>>
>> So we have to move the network devices and their sysfs attributes
>> between namespaces, and I implemented that in kobject_rename,
>> sysfs_rename path.
>>
>> The tags on a kobject can only change during a rename operation.
>> So when the change happens is well defined. Further there is a
>> set of functions: sysfs_creation_tag, sysfs_removal_tag,
>> sysfs_lookup_tag, sysfs_dirent_tag which makes it clear what we
>> are doing.
>>
>> If you really don't like how the tags are managed we need to talk
>> about how we store the tags on kobjects and on the super block.
>>
>> Registering a set of tags could easily make the sb_tag function
>> obsolete, and that is one small piece of code so it is no big deal.
>>
>> struct sysfs_tag_type_operations {
>> const void *(*mount_tag)(void);
>> const void *(*kobject_tag)(struct kobject *kobj);
>> };
>>
>> Then we could do:
>> struct sysfs_sbtag_operations *tag_type_ops[MAX_TAG_TYPES];
>>
>> And sysfs_tag_info could become.
>> struct sysfs_tag_info {
>> void *tag[MAX_TAG_TYPES];
>> };
>>
>> During subsystem initialization we could call
>> tag_type = sysfs_allocate_tag_type();
>>
>> Just after the subsystem creates a directory.
>> sysfs_enable_tagging(kobj/sd, tag_type);
>>
>> Then anytime we currently call sb_tag during lookup we can instead
>> just look at sysfs_info(sb)->tag[tag_type] and compare that with
>> sd->s_tag.tag.
>
> What you described is pretty much what I'm talking about. The only
> difference is whether to use caller-provided pointer as tag or an
> ida-allocated integer. The last sentence of the above paragraph is
> basically sys_tag_enabled() function (maybe misnamed).
So some concrete code examples here. In the current code in lookup
what I am doing is:
tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
With the proposed change of adding tag types sysfs_lookup_tag becomes:
const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd, struct super_block *sb)
{
const void *tag = NULL;
if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
tag = sysfs_info(sb)->tag[dir_sd->tag_type];
return tag;
}
Which means that in practice I can lookup that tag that I am displaying
once.
Then in sysfs_find_dirent we do:
for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
(sd->s_tag.tag != tag))
continue;
if (!strcmp(sd->s_name, name))
return sd;
}
That should keep the implementation sufficiently inside of sysfs for there
to be no guessing. In addition as a practical matter we can only allow
one tag to be visible in a directory at once or else we can not check
for duplicate names. Which is the problem I see with a bitmap based test
too unnecessary many degrees of freedom.
The number of tag types will be low as it is the number of subsystems
that use the feature. Simple enough that I expect statically allocating
the tag types in an enumeration is a safe and sane way to operate.
i.e.
enum sysfs_tag_types {
SYSFS_TAG_NETNS,
SYSFS_TAG_USERNS,
SYSFS_TAG_MAX
};
> The main reason why I'm whining about this so much is because I think
> tag should be something abstracted inside sysfs proper. It's something
> which affects very internal operation of sysfs and I really want to keep
> the implementation details inside sysfs. Spreading implementation over
> kobject and sysfs didn't turn out too pretty after all.
I agree. Most of the implementation is in sysfs already. We just have
a few corner cases.
Fundamentally it is the subsystems responsibility that creates the
kobjects and the sysfs entries. The only case where I can see an
ida generated number being a help is if we start having lifetime
issues. Further the extra work to allocate and free tags ida based
tags seems unnecessary.
I don't doubt that there is a lot we can do better. My current goal
is for something that is clean enough it won't get us into trouble
later, and then merging the code. In tree where people can see
the code and the interactions I expect it will be easier to talk
about.
Currently the interface with the users is very small. Adding the
tag_type enumeration should make it smaller and make things more
obviously static.
Guys can we please make something useful happen?
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-30 18:56 ` Eric W. Biederman
@ 2008-06-30 21:44 ` Serge E. Hallyn
2008-07-01 7:50 ` Eric W. Biederman
2008-07-01 6:47 ` Tejun Heo
1 sibling, 1 reply; 75+ messages in thread
From: Serge E. Hallyn @ 2008-06-30 21:44 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Tejun Heo, Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Quoting Eric W. Biederman (ebiederm@xmission.com):
> Tejun Heo <htejun@gmail.com> writes:
>
> > Hello, Eric.
> >
> > Eric W. Biederman wrote:
> >> Tejun thank you for the review, and my apologies for the delayed
> >> reply.
> >
> > Me being the king of delays, no need for apologies. :-)
> >
> >>> As before, I can't bring myself to like this interface. Is computing
> >>> tags dynamically really necessary? Can't we do the followings?
> >>
> >> It isn't so much computing tags dynamically but rather it is reading them
> >> from where they are stored.
> >
> > It's still dynamic from sysfs's POV and I think that will make
> > maintenance more difficult.
>
> Potentially. I have no problem make it clear that things are more static.
>
> >> There is also a second dimension here we multiplex different
> >> directories based on different sets of tags. One directory based
> >> on user namespaces another on the network namespaces.
> >
> > No matter which criteria is used to select ns, it should end up being
> > mapped to a set of tags (here, ida allocated numbers). Unless tags can
> > change dynamically, there shouldn't be functional difference.
> >
> >> The tags in practice are just pointers to the namespace pointers.
> >>
> >> So while we could use the ida technique to specify which set of tags
> >> we are talking about for a directory it isn't sufficient.
> >
> > I failed to follow here. Can you please elaborate a bit? If you can
> > describe a simple example to me, it would be much appreciated.
>
> See below.
>
> >> The question sysfs_tag_enabled(sb, tag) makes no sense to me.
> >> Especially in the context of needed a sysfs_sb_show_tag(sb, tag);
> >>
> >> The current structure is because of all of the darn fool races and
> >> magic that sysfs does. We have to say for a given directory: Your
> >> contents will always be tagged, and only those that one tag that
> >> matches what was captured by the superblock when sysfs is mounted
> >> will be shown.
> >
> > sysfs_tag_enabled() was meant to test whether a directory which is
> > tagged should be shown under the current sb.
>
> Ah. When we are doing readdir or lookup. Yes that makes sense.
>
> See below. I honestly think sysfs_tab_enabled is the wrong question.
>
> >>> Tags which can change dynamically seems too confusing to me and it
> >>> makes things difficult to verify as it's unclear how those tags are
> >>> gonna to change.
> >>
> >> We have a fundamental issue that we have to handle, and it sounds like
> >> you are proposing something that will not handle it.
> >>
> >> - network devices can move between namespaces.
> >> - network devices have driver specific sysfs attributes hanging off of them.
> >>
> >> So we have to move the network devices and their sysfs attributes
> >> between namespaces, and I implemented that in kobject_rename,
> >> sysfs_rename path.
> >>
> >> The tags on a kobject can only change during a rename operation.
> >> So when the change happens is well defined. Further there is a
> >> set of functions: sysfs_creation_tag, sysfs_removal_tag,
> >> sysfs_lookup_tag, sysfs_dirent_tag which makes it clear what we
> >> are doing.
> >>
> >> If you really don't like how the tags are managed we need to talk
> >> about how we store the tags on kobjects and on the super block.
> >>
> >> Registering a set of tags could easily make the sb_tag function
> >> obsolete, and that is one small piece of code so it is no big deal.
> >>
> >> struct sysfs_tag_type_operations {
> >> const void *(*mount_tag)(void);
> >> const void *(*kobject_tag)(struct kobject *kobj);
> >> };
> >>
> >> Then we could do:
> >> struct sysfs_sbtag_operations *tag_type_ops[MAX_TAG_TYPES];
> >>
> >> And sysfs_tag_info could become.
> >> struct sysfs_tag_info {
> >> void *tag[MAX_TAG_TYPES];
> >> };
> >>
> >> During subsystem initialization we could call
> >> tag_type = sysfs_allocate_tag_type();
> >>
> >> Just after the subsystem creates a directory.
> >> sysfs_enable_tagging(kobj/sd, tag_type);
> >>
> >> Then anytime we currently call sb_tag during lookup we can instead
> >> just look at sysfs_info(sb)->tag[tag_type] and compare that with
> >> sd->s_tag.tag.
> >
> > What you described is pretty much what I'm talking about. The only
> > difference is whether to use caller-provided pointer as tag or an
> > ida-allocated integer. The last sentence of the above paragraph is
> > basically sys_tag_enabled() function (maybe misnamed).
>
> So some concrete code examples here. In the current code in lookup
> what I am doing is:
>
> tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
> sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
>
> With the proposed change of adding tag types sysfs_lookup_tag becomes:
>
> const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd, struct super_block *sb)
> {
> const void *tag = NULL;
>
> if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
> tag = sysfs_info(sb)->tag[dir_sd->tag_type];
>
> return tag;
> }
>
> Which means that in practice I can lookup that tag that I am displaying
> once.
>
> Then in sysfs_find_dirent we do:
>
> for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
> if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
> (sd->s_tag.tag != tag))
> continue;
> if (!strcmp(sd->s_name, name))
> return sd;
> }
>
> That should keep the implementation sufficiently inside of sysfs for there
> to be no guessing. In addition as a practical matter we can only allow
> one tag to be visible in a directory at once or else we can not check
> for duplicate names. Which is the problem I see with a bitmap based test
> too unnecessary many degrees of freedom.
>
> The number of tag types will be low as it is the number of subsystems
> that use the feature. Simple enough that I expect statically allocating
> the tag types in an enumeration is a safe and sane way to operate.
> i.e.
>
> enum sysfs_tag_types {
> SYSFS_TAG_NETNS,
> SYSFS_TAG_USERNS,
> SYSFS_TAG_MAX
> };
>
> > The main reason why I'm whining about this so much is because I think
> > tag should be something abstracted inside sysfs proper. It's something
> > which affects very internal operation of sysfs and I really want to keep
> > the implementation details inside sysfs. Spreading implementation over
> > kobject and sysfs didn't turn out too pretty after all.
>
> I agree. Most of the implementation is in sysfs already. We just have
> a few corner cases.
>
> Fundamentally it is the subsystems responsibility that creates the
> kobjects and the sysfs entries. The only case where I can see an
> ida generated number being a help is if we start having lifetime
> issues. Further the extra work to allocate and free tags ida based
> tags seems unnecessary.
>
> I don't doubt that there is a lot we can do better. My current goal
> is for something that is clean enough it won't get us into trouble
> later, and then merging the code. In tree where people can see
> the code and the interactions I expect it will be easier to talk
> about.
>
> Currently the interface with the users is very small. Adding the
> tag_type enumeration should make it smaller and make things more
> obviously static.
>
> Guys can we please make something useful happen?
>
> Eric
Now that the iproute2 patch is upstream, this patchset really is the
only thing keeping us from using network namespaces. Given that the
details of the tagging are trivially changeable with no abi changes, I'd
personally much rather see the patches go in as is, with whatever new
tagging patches Benjamin whips up, using ida or some new idea, being
applied later if we feel the need.
-serge
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-30 21:44 ` Serge E. Hallyn
@ 2008-07-01 7:50 ` Eric W. Biederman
0 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-01 7:50 UTC (permalink / raw)
To: Serge E. Hallyn
Cc: Tejun Heo, Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, linux-kernel, Al Viro, Linux Containers
"Serge E. Hallyn" <serue@us.ibm.com> writes:
> Now that the iproute2 patch is upstream, this patchset really is the
> only thing keeping us from using network namespaces. Given that the
> details of the tagging are trivially changeable with no abi changes, I'd
> personally much rather see the patches go in as is, with whatever new
> tagging patches Benjamin whips up, using ida or some new idea, being
> applied later if we feel the need.
My point exactly. No one seems to contest the userspace semantics so
as long as we don't put ourselves into a real mess we should be fine.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-06-30 18:56 ` Eric W. Biederman
2008-06-30 21:44 ` Serge E. Hallyn
@ 2008-07-01 6:47 ` Tejun Heo
2008-07-01 9:20 ` Eric W. Biederman
1 sibling, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-07-01 6:47 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello, Eric.
Eric W. Biederman wrote:
>> It's still dynamic from sysfs's POV and I think that will make
>> maintenance more difficult.
>
> Potentially. I have no problem make it clear that things are more static.
Great. :-)
>> What you described is pretty much what I'm talking about. The only
>> difference is whether to use caller-provided pointer as tag or an
>> ida-allocated integer. The last sentence of the above paragraph is
>> basically sys_tag_enabled() function (maybe misnamed).
>
> So some concrete code examples here. In the current code in lookup
> what I am doing is:
>
> tag = sysfs_lookup_tag(parent_sd, parent->d_sb);
> sd = sysfs_find_dirent(parent_sd, tag, dentry->d_name.name);
>
> With the proposed change of adding tag types sysfs_lookup_tag becomes:
>
> const void *sysfs_lookup_tag(struct sysfs_dirent *dir_sd, struct super_block *sb)
> {
> const void *tag = NULL;
>
> if (dir_sd->s_flags & SYSFS_FLAG_TAGGED)
> tag = sysfs_info(sb)->tag[dir_sd->tag_type];
>
> return tag;
> }
>
> Which means that in practice I can lookup that tag that I am displaying
> once.
>
> Then in sysfs_find_dirent we do:
>
> for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
> if ((parent_sd->s_flags & SYSFS_FLAG_TAGGED) &&
> (sd->s_tag.tag != tag))
> continue;
> if (!strcmp(sd->s_name, name))
> return sd;
> }
>
> That should keep the implementation sufficiently inside of sysfs for there
> to be no guessing. In addition as a practical matter we can only allow
> one tag to be visible in a directory at once or else we can not check
> for duplicate names. Which is the problem I see with a bitmap based test
> too unnecessary many degrees of freedom.
Having enumed tag types limits that a sb can have map to only one tag
but it doesn't really prevent multiple possibly visible entries which is
the real unnecessary degrees of freedom. That said, I don't really
think it's an issue.
> The number of tag types will be low as it is the number of subsystems
> that use the feature. Simple enough that I expect statically allocating
> the tag types in an enumeration is a safe and sane way to operate.
> i.e.
>
> enum sysfs_tag_types {
> SYSFS_TAG_NETNS,
> SYSFS_TAG_USERNS,
> SYSFS_TAG_MAX
> };
I still would prefer something which is more generic. The abstraction
is clearer too. A sb shows untagged and a set of tags. A sd can either
be untagged or tagged (a single tag).
>> The main reason why I'm whining about this so much is because I think
>> tag should be something abstracted inside sysfs proper. It's something
>> which affects very internal operation of sysfs and I really want to keep
>> the implementation details inside sysfs. Spreading implementation over
>> kobject and sysfs didn't turn out too pretty after all.
>
> I agree. Most of the implementation is in sysfs already. We just have
> a few corner cases.
>
> Fundamentally it is the subsystems responsibility that creates the
> kobjects and the sysfs entries. The only case where I can see an
> ida generated number being a help is if we start having lifetime
> issues. Further the extra work to allocate and free tags ida based
> tags seems unnecessary.
>
> I don't doubt that there is a lot we can do better. My current goal
> is for something that is clean enough it won't get us into trouble
> later, and then merging the code. In tree where people can see
> the code and the interactions I expect it will be easier to talk
> about.
>
> Currently the interface with the users is very small. Adding the
> tag_type enumeration should make it smaller and make things more
> obviously static.
Using ida (or idr if a pointer for private data is necessary) is really
easy. It'll probably take a few tens of lines of code. That said, I
don't think I have enough rationale to nack what you described. So, as
long as the tags are made static, I won't object.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-01 6:47 ` Tejun Heo
@ 2008-07-01 9:20 ` Eric W. Biederman
2008-07-01 10:30 ` Tejun Heo
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-01 9:20 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> Hello, Eric.
>
> Eric W. Biederman wrote:
>>> It's still dynamic from sysfs's POV and I think that will make
>>> maintenance more difficult.
>>
>> Potentially. I have no problem make it clear that things are more static.
>
> Great. :-)
>
> Having enumed tag types limits that a sb can have map to only one tag
> but it doesn't really prevent multiple possibly visible entries which is
> the real unnecessary degrees of freedom. That said, I don't really
> think it's an issue.
Having a single tag type per directory and thus a single tag visible per
directory does prevent multiple possible visible entries.
That is we can check when we add the sd if there will be a conflict in
the directory.
>> The number of tag types will be low as it is the number of subsystems
>> that use the feature. Simple enough that I expect statically allocating
>> the tag types in an enumeration is a safe and sane way to operate.
>> i.e.
>>
>> enum sysfs_tag_types {
>> SYSFS_TAG_NETNS,
>> SYSFS_TAG_USERNS,
>> SYSFS_TAG_MAX
>> };
>
> I still would prefer something which is more generic. The abstraction
> is clearer too. A sb shows untagged and a set of tags. A sd can either
> be untagged or tagged (a single tag).
That is the abstraction now.
The only difference is how we represent the set of tags.
I use and array of the valid tags.
You use a bitmap.
And array allows the lookup of the tag I am looking for before
I search for the sd. An bitmap requires me to compare each entry.
For me that is a deal breaker. Currently in certain pathological
cases we have scaling issues with sysctl and sysfs that we can
have enormous directories that start running slowly. To fix
lookup performance requires that we know the full name before
we do the directory search which is the name string and the
tag.
So I having a type of tag as being of fundamental importance in
the interface now so we don't need to refactor all of the users
later. In addition to the fact that we need the type to know
how to set the tags when mounting a superblock and when
given a new kobject to create an sd for.
We could make the types dynamic rather then a static enumeration but
that seems needless complexity for now.
> Using ida (or idr if a pointer for private data is necessary) is really
> easy. It'll probably take a few tens of lines of code. That said, I
> don't think I have enough rationale to nack what you described. So, as
> long as the tags are made static, I won't object.
Sounds good. The only justification I can think of for ida tags is that
they are smaller, and so can keep the sysfs_dirents smaller. Which
occasionally is a significant concern. Still that should be an optimization
that we can apply later, as it is not a structural difference in the code.
Just to confirm. Do you the two operations:
mount_tag - called only when the sb is mounted
kobject_tag - called when we create new sd or rename an sd
Cause you to view an the tags as dynamic?
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-01 9:20 ` Eric W. Biederman
@ 2008-07-01 10:30 ` Tejun Heo
2008-07-01 12:30 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-07-01 10:30 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello,
Eric W. Biederman wrote:
>> Having enumed tag types limits that a sb can have map to only one tag
>> but it doesn't really prevent multiple possibly visible entries which is
>> the real unnecessary degrees of freedom. That said, I don't really
>> think it's an issue.
>
> Having a single tag type per directory and thus a single tag visible per
> directory does prevent multiple possible visible entries.
>
> That is we can check when we add the sd if there will be a conflict in
> the directory.
Yeap, that we can do.
>> I still would prefer something which is more generic. The abstraction
>> is clearer too. A sb shows untagged and a set of tags. A sd can either
>> be untagged or tagged (a single tag).
>
> That is the abstraction now.
>
> The only difference is how we represent the set of tags.
> I use and array of the valid tags.
> You use a bitmap.
>
> And array allows the lookup of the tag I am looking for before
> I search for the sd. An bitmap requires me to compare each entry.
How so? sysfs_sb->bitmap which contains enough bits for all the defined
tags and determining whether a sd should be shown or not is as simple as
single test_bit.
> For me that is a deal breaker. Currently in certain pathological
> cases we have scaling issues with sysctl and sysfs that we can
> have enormous directories that start running slowly. To fix
> lookup performance requires that we know the full name before
> we do the directory search which is the name string and the
> tag.
>
> So I having a type of tag as being of fundamental importance in
> the interface now so we don't need to refactor all of the users
> later. In addition to the fact that we need the type to know
> how to set the tags when mounting a superblock and when
> given a new kobject to create an sd for.
>
> We could make the types dynamic rather then a static enumeration but
> that seems needless complexity for now.
What I'm feeling unease about is the extra level of abstraction added by
tag types. A sd is given a tag. A sb shows a set of tags. The most
straight forward to implement that is to give sd a tag and test the tag
against sb's set of tags. The type is added because pointer tag
requires sequential matching which is usually best to avoid. It's
nothing fundamental. It's an extra baggage.
>> Using ida (or idr if a pointer for private data is necessary) is really
>> easy. It'll probably take a few tens of lines of code. That said, I
>> don't think I have enough rationale to nack what you described. So, as
>> long as the tags are made static, I won't object.
>
> Sounds good. The only justification I can think of for ida tags is that
> they are smaller, and so can keep the sysfs_dirents smaller. Which
> occasionally is a significant concern. Still that should be an optimization
> that we can apply later, as it is not a structural difference in the code.
>
> Just to confirm. Do you the two operations:
> mount_tag - called only when the sb is mounted
> kobject_tag - called when we create new sd or rename an sd
>
> Cause you to view an the tags as dynamic?
The thing is that I don't really see why there's tagged_dir_ops at all.
What's needed is tagged sd's and sb's which can show subset of those
tags, so adding callback ops for tags just doesn't make much sense to
me. The interface should ideally be...
1. alloc/release tag
2. set / change / remove tag on sd
3. enable / disable tag on a sb
This has been my opinion from the beginning. Unless the tags need to be
changed dynamically on demand (which I hope is not the case), there just
is plainly no reason to have callbacks for tags.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-01 10:30 ` Tejun Heo
@ 2008-07-01 12:30 ` Eric W. Biederman
2008-07-02 3:24 ` Tejun Heo
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-01 12:30 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> Hello,
>
> Eric W. Biederman wrote:
>>> Having enumed tag types limits that a sb can have map to only one tag
>>> but it doesn't really prevent multiple possibly visible entries which is
>>> the real unnecessary degrees of freedom. That said, I don't really
>>> think it's an issue.
>>
>> Having a single tag type per directory and thus a single tag visible per
>> directory does prevent multiple possible visible entries.
>>
>> That is we can check when we add the sd if there will be a conflict in
>> the directory.
>
> Yeap, that we can do.
What we are implementing is not, a sb with a set of tags that are displayed,
but directories with a single tag that is displayed. The sb just happens
to hold the state for the directories.
A directory displaying only a single tag is an necessary constraint for
a large number of reasons.
>> And array allows the lookup of the tag I am looking for before
>> I search for the sd. An bitmap requires me to compare each entry.
>
> How so? sysfs_sb->bitmap which contains enough bits for all the defined
> tags and determining whether a sd should be shown or not is as simple as
> single test_bit.
Yes. The compare happens to be test_bit.
With a bitmap you must visit each dirent with a given name and see if
it has a tag that is displayed.
With an array you can lookup the tag aprori and can potentially do a
hash table lookup or a tree lookup and are not required to visit each
entry.
> What I'm feeling unease about is the extra level of abstraction added by
> tag types. A sd is given a tag. A sb shows a set of tags. The most
> straight forward to implement that is to give sd a tag and test the tag
> against sb's set of tags. The type is added because pointer tag
> requires sequential matching which is usually best to avoid. It's
> nothing fundamental. It's an extra baggage.
That is just one important aspect of it. We need a way to describe
which tag a sb,directory pair displays. It is a fundamental concept.
>>> Using ida (or idr if a pointer for private data is necessary) is really
>>> easy. It'll probably take a few tens of lines of code. That said, I
>>> don't think I have enough rationale to nack what you described. So, as
>>> long as the tags are made static, I won't object.
>>
>> Sounds good. The only justification I can think of for ida tags is that
>> they are smaller, and so can keep the sysfs_dirents smaller. Which
>> occasionally is a significant concern. Still that should be an optimization
>> that we can apply later, as it is not a structural difference in the code.
>>
>> Just to confirm. Do you the two operations:
>> mount_tag - called only when the sb is mounted
>> kobject_tag - called when we create new sd or rename an sd
>>
>> Cause you to view an the tags as dynamic?
>
> The thing is that I don't really see why there's tagged_dir_ops at all.
We need callbacks for interfacing with the kobject layer, and for
selecting our set of tags at mount time. Not tagged_dir_ops so much
as tagged_type_ops.
> What's needed is tagged sd's and sb's which can show subset of those
> tags, so adding callback ops for tags just doesn't make much sense to
> me. The interface should ideally be...
> 1. alloc/release tag
Agreed.
> 2. set / change / remove tag on sd
Essentially agreed.
Create an sd with a tag, change the tag on a sd.
Having an untagged sd in a directory that requires tags should
not be allowed.
> 3. enable / disable tag on a sb
Disagree that is too flexible. Tags on a sb need to be
unchanging or else we get vfs layer issues.
Further the abstraction is logically exactly one tag on a
(sb,directory) pair.
The operations needed are.
- Select the set of tags on a sb (at mount time)
This requires we call a set of callbacks. [ My mount_sb callback ]
- release a tag (which implies removing all tagged entries and
removing the sb reference)
4. Interface with the kobject layer.
kobject_add calls sysfs_create_dir
kboject_rename calls sysfs_rename_dir
kobject_del calls sysfs_remove_dir
For the first two operations we need a helper function to go from a
kobject to a tag.
For the second two operations we need to go from a kobject to a sd.
> This has been my opinion from the beginning. Unless the tags need to be
> changed dynamically on demand (which I hope is not the case), there just
> is plainly no reason to have callbacks for tags.
We don't need callbacks to poll to see if the tags on a sd have
changed.
We need helper functions for interfacing with the rest of the kernel.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-01 12:30 ` Eric W. Biederman
@ 2008-07-02 3:24 ` Tejun Heo
2008-07-02 3:53 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-07-02 3:24 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello,
Eric W. Biederman wrote:
> What we are implementing is not, a sb with a set of tags that are displayed,
> but directories with a single tag that is displayed. The sb just happens
> to hold the state for the directories.
>
> A directory displaying only a single tag is an necessary constraint for
> a large number of reasons.
Okay, that isn't exactly the impression I get but... well. Let's see.
>>> And array allows the lookup of the tag I am looking for before
>>> I search for the sd. An bitmap requires me to compare each entry.
>> How so? sysfs_sb->bitmap which contains enough bits for all the defined
>> tags and determining whether a sd should be shown or not is as simple as
>> single test_bit.
>
> Yes. The compare happens to be test_bit.
>
> With a bitmap you must visit each dirent with a given name and see if
> it has a tag that is displayed.
>
> With an array you can lookup the tag aprori and can potentially do a
> hash table lookup or a tree lookup and are not required to visit each
> entry.
A few things...
1. The lookup is currently done linearly and is fast enough for now.
Also, most lookup ops are cached by vfs layer. I'm not sure how
probable it is that we're gonna need hash or tree based sd lookup.
2. I don't think it's gonna be too difficult to speed up bitmap based
lookup. It would require a bit more intelligence but there's no
fundamental restriction. Just organizing the tree by tag first would
give us the same order of magnitude lookup given that the tags are used
the same way.
>> What I'm feeling unease about is the extra level of abstraction added by
>> tag types. A sd is given a tag. A sb shows a set of tags. The most
>> straight forward to implement that is to give sd a tag and test the tag
>> against sb's set of tags. The type is added because pointer tag
>> requires sequential matching which is usually best to avoid. It's
>> nothing fundamental. It's an extra baggage.
>
> That is just one important aspect of it. We need a way to describe
> which tag a sb,directory pair displays. It is a fundamental concept.
For netns, yes. I just think it would be better if the sysfs mechanism
to support that concept is more generic especially because it doesn't
seem too difficult to make it that way.
>>> Cause you to view an the tags as dynamic?
>> The thing is that I don't really see why there's tagged_dir_ops at all.
>
> We need callbacks for interfacing with the kobject layer, and for
> selecting our set of tags at mount time. Not tagged_dir_ops so much
> as tagged_type_ops.
The kobject op seems a bit strange way to interface to me. For mount,
yeah, we'll need a hook somewhere or pass it via mount option maybe.
>> What's needed is tagged sd's and sb's which can show subset of those
>> tags, so adding callback ops for tags just doesn't make much sense to
>> me. The interface should ideally be...
>
>> 1. alloc/release tag
> Agreed.
>
>> 2. set / change / remove tag on sd
> Essentially agreed.
>
> Create an sd with a tag, change the tag on a sd.
> Having an untagged sd in a directory that requires tags should
> not be allowed.
>
>> 3. enable / disable tag on a sb
> Disagree that is too flexible. Tags on a sb need to be
> unchanging or else we get vfs layer issues.
Yeah, this really should be something which can't change once it's mounted.
> Further the abstraction is logically exactly one tag on a
> (sb,directory) pair.
I'm not so sure here. As a policy, maybe but I don't really see a
fundamental reason that the mechanism should enforce this.
> The operations needed are.
> - Select the set of tags on a sb (at mount time)
> This requires we call a set of callbacks. [ My mount_sb callback ]
>
> - release a tag (which implies removing all tagged entries and
> removing the sb reference)
>
> 4. Interface with the kobject layer.
> kobject_add calls sysfs_create_dir
> kboject_rename calls sysfs_rename_dir
> kobject_del calls sysfs_remove_dir
>
> For the first two operations we need a helper function to go from a
> kobject to a tag.
Why not just add a parameter to sysfs_create_dir()? It's just twisted.
> For the second two operations we need to go from a kobject to a sd.
>
>> This has been my opinion from the beginning. Unless the tags need to be
>> changed dynamically on demand (which I hope is not the case), there just
>> is plainly no reason to have callbacks for tags.
>
> We don't need callbacks to poll to see if the tags on a sd have
> changed.
>
> We need helper functions for interfacing with the rest of the kernel.
Yes, that's why I view it as strange. These can be done in forward way
(by passing in mount options and/or arguments) but it's done by first
going into the sysfs and then calling back out to outer layer.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-02 3:24 ` Tejun Heo
@ 2008-07-02 3:53 ` Eric W. Biederman
2008-07-02 4:37 ` Tejun Heo
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-02 3:53 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> Hello,
>
> Eric W. Biederman wrote:
>> What we are implementing is not, a sb with a set of tags that are displayed,
>> but directories with a single tag that is displayed. The sb just happens
>> to hold the state for the directories.
>>
>> A directory displaying only a single tag is an necessary constraint for
>> a large number of reasons.
>
> Okay, that isn't exactly the impression I get but... well. Let's see.
Well one of those reasons is not having duplicate entries in your directory listing.
That is much harder otherwise.
> A few things...
>
> 1. The lookup is currently done linearly and is fast enough for now.
> Also, most lookup ops are cached by vfs layer. I'm not sure how
> probable it is that we're gonna need hash or tree based sd lookup.
I don't know how bad sysfs is. On the sysctl side I have people complaining
because I am doing a lookup during insert and that lookup is linear. Sysfs
appears to have the same complexity as sysctl but just smaller constants.
>> That is just one important aspect of it. We need a way to describe
>> which tag a sb,directory pair displays. It is a fundamental concept.
>
> For netns, yes. I just think it would be better if the sysfs mechanism
> to support that concept is more generic especially because it doesn't
> seem too difficult to make it that way.
Well the envisioned use is for other namespaces and they all are similar
to the network namespace in that way.
>>>> Cause you to view an the tags as dynamic?
>>> The thing is that I don't really see why there's tagged_dir_ops at all.
>>
>> We need callbacks for interfacing with the kobject layer, and for
>> selecting our set of tags at mount time. Not tagged_dir_ops so much
>> as tagged_type_ops.
>
> The kobject op seems a bit strange way to interface to me. For mount,
> yeah, we'll need a hook somewhere or pass it via mount option maybe.
I will look how if there is a place in the kobject layer to put it. With
a second but noticeably different user I can compare and see how hard that will be.
>>> 3. enable / disable tag on a sb
>> Disagree that is too flexible. Tags on a sb need to be
>> unchanging or else we get vfs layer issues.
>
> Yeah, this really should be something which can't change once it's mounted.
The VFS chokes otherwise because it can't cache things properly.
>> Further the abstraction is logically exactly one tag on a
>> (sb,directory) pair.
>
> I'm not so sure here. As a policy, maybe but I don't really see a
> fundamental reason that the mechanism should enforce this.
Well in the first implementation.
>> 4. Interface with the kobject layer.
>> kobject_add calls sysfs_create_dir
>> kboject_rename calls sysfs_rename_dir
>> kobject_del calls sysfs_remove_dir
>>
>> For the first two operations we need a helper function to go from a
>> kobject to a tag.
>
> Why not just add a parameter to sysfs_create_dir()? It's just twisted.
I added it where it was easiest. Adding a parameter to sysfs_create_dir
simply means I have to add the function to the kobject layer. It is certainly
worth a second look though.
>> We need helper functions for interfacing with the rest of the kernel.
>
> Yes, that's why I view it as strange. These can be done in forward way
> (by passing in mount options and/or arguments) but it's done by first
> going into the sysfs and then calling back out to outer layer.
Well in the case of mount the default parameter at least is current, and
there are good reasons for that.
On the other side I can't pass a tag through from the device layer to
the kobject layer. It isn't a concept the kobject layer supports.
At least though the conversation is in relative agreement. I will refresh
the patches shortly and see where we are at.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-02 3:53 ` Eric W. Biederman
@ 2008-07-02 4:37 ` Tejun Heo
2008-07-02 16:49 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-07-02 4:37 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello,
Eric W. Biederman wrote:
>>> A directory displaying only a single tag is an necessary constraint for
>>> a large number of reasons.
>> Okay, that isn't exactly the impression I get but... well. Let's see.
>
> Well one of those reasons is not having duplicate entries in your directory listing.
> That is much harder otherwise.
Agreed.
>> For netns, yes. I just think it would be better if the sysfs mechanism
>> to support that concept is more generic especially because it doesn't
>> seem too difficult to make it that way.
>
> Well the envisioned use is for other namespaces and they all are similar
> to the network namespace in that way.
Something I've been curious about is a directory which contains both the
untagged entries and tagged ones. I can definitely imagine something
like that to be useful for block device namespace.
>>>>> Cause you to view an the tags as dynamic?
>>>> The thing is that I don't really see why there's tagged_dir_ops at all.
>>> We need callbacks for interfacing with the kobject layer, and for
>>> selecting our set of tags at mount time. Not tagged_dir_ops so much
>>> as tagged_type_ops.
>> The kobject op seems a bit strange way to interface to me. For mount,
>> yeah, we'll need a hook somewhere or pass it via mount option maybe.
>
> I will look how if there is a place in the kobject layer to put it. With
> a second but noticeably different user I can compare and see how hard that will be.
Great, thanks.
>>> Further the abstraction is logically exactly one tag on a
>>> (sb,directory) pair.
>> I'm not so sure here. As a policy, maybe but I don't really see a
>> fundamental reason that the mechanism should enforce this.
>
> Well in the first implementation.
This pretty much defines the interface and is likely to force future
users to fit themselves into it.
>>> 4. Interface with the kobject layer.
>>> kobject_add calls sysfs_create_dir
>>> kboject_rename calls sysfs_rename_dir
>>> kobject_del calls sysfs_remove_dir
>>>
>>> For the first two operations we need a helper function to go from a
>>> kobject to a tag.
>> Why not just add a parameter to sysfs_create_dir()? It's just twisted.
>
> I added it where it was easiest. Adding a parameter to sysfs_create_dir
> simply means I have to add the function to the kobject layer. It is certainly
> worth a second look though.
Is it difficult to just export it via kobject and device layer? If
changing the default function is too much of a hassle (and I'm sure it
would be), just add an extended version which takes @tag. The current
implementation feels like it tried too hard to not add intermediate
interfaces and ended up shooting outside from the innermost layer.
>>> We need helper functions for interfacing with the rest of the kernel.
>> Yes, that's why I view it as strange. These can be done in forward way
>> (by passing in mount options and/or arguments) but it's done by first
>> going into the sysfs and then calling back out to outer layer.
>
> Well in the case of mount the default parameter at least is current, and
> there are good reasons for that.
I was imagining something like...
mount -t sysfs -o ns=0,4,5 /my/sys
And let the userland control which ns's are visible in the particular
mount. I'm not sure how useful that will be tho.
> On the other side I can't pass a tag through from the device layer to
> the kobject layer. It isn't a concept the kobject layer supports.
I think it's best to make kobject layer support it.
> At least though the conversation is in relative agreement. I will refresh
> the patches shortly and see where we are at.
Thanks a lot for the patience. :-)
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-02 4:37 ` Tejun Heo
@ 2008-07-02 16:49 ` Eric W. Biederman
2008-07-03 0:15 ` Greg KH
2008-07-03 3:18 ` Tejun Heo
0 siblings, 2 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-02 16:49 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> Is it difficult to just export it via kobject and device layer?
Well gregkh thought it wasn't a good idea last time I tried exploring
that.
> If
> changing the default function is too much of a hassle (and I'm sure it
> would be), just add an extended version which takes @tag. The current
> implementation feels like it tried too hard to not add intermediate
> interfaces and ended up shooting outside from the innermost layer.
It tried for something that was simple to use and that worked.
Also the way things work. I have to use all of the intermediate layers
and their calls to various functions. So just passing a parameter through
doesn't work to well.
It looks to me like the clean solution is move kobject_tag into
kobj_type, and have it call some higher level function.
We also need to remove the maintenance disaster that is
kobject_set_name from sysfs_rename_dir. And push it into
kobject_rename instead. The error handling is harder in
that case but otherwise we should be in good shape.
>> On the other side I can't pass a tag through from the device layer to
>> the kobject layer. It isn't a concept the kobject layer supports.
>
> I think it's best to make kobject layer support it.
Assuming Greg will accept it when he sees reasonable patches.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-02 16:49 ` Eric W. Biederman
@ 2008-07-03 0:15 ` Greg KH
2008-07-03 3:18 ` Tejun Heo
1 sibling, 0 replies; 75+ messages in thread
From: Greg KH @ 2008-07-03 0:15 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Tejun Heo, Benjamin Thery, Andrew Morton, Daniel Lezcano,
Serge Hallyn, linux-kernel, Al Viro, Linux Containers
On Wed, Jul 02, 2008 at 09:49:33AM -0700, Eric W. Biederman wrote:
> Assuming Greg will accept it when he sees reasonable patches.
I always accept "reasonable patches" :)
thanks,
greg k-h
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-02 16:49 ` Eric W. Biederman
2008-07-03 0:15 ` Greg KH
@ 2008-07-03 3:18 ` Tejun Heo
2008-07-03 5:11 ` Eric W. Biederman
1 sibling, 1 reply; 75+ messages in thread
From: Tejun Heo @ 2008-07-03 3:18 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Hello, Eric.
Eric W. Biederman wrote:
>> If
>> changing the default function is too much of a hassle (and I'm sure it
>> would be), just add an extended version which takes @tag. The current
>> implementation feels like it tried too hard to not add intermediate
>> interfaces and ended up shooting outside from the innermost layer.
>
> It tried for something that was simple to use and that worked.
>
> Also the way things work. I have to use all of the intermediate layers
> and their calls to various functions. So just passing a parameter through
> doesn't work to well.
There is rather large possibility that I'm just being dumb here
especially because I haven't reviewed the users of this facility, so all
the comments I'm making are from the POV of interfaces of sysfs and the
related layers. I think I've made my concerns clear by now. If you
still think the callbacks are the best way to go, please try to
enlighten me. I really don't wanna be stopping something which is
better from ignorance. Just give me some concrete examples or point me
to codes which show how and why the current interface is the best for
the users and switching isn't a good idea.
> It looks to me like the clean solution is move kobject_tag into
> kobj_type, and have it call some higher level function.
>
> We also need to remove the maintenance disaster that is
> kobject_set_name from sysfs_rename_dir. And push it into
> kobject_rename instead. The error handling is harder in
> that case but otherwise we should be in good shape.
Heh... I personally think kobject layer as a whole should just be hidden
under the cabinet of device driver model but I'm having difficult time
convincing other people of it. Anyways, fully agree the interaction
between kobject and sysfs is ugly at a lot of places.
>>> On the other side I can't pass a tag through from the device layer to
>>> the kobject layer. It isn't a concept the kobject layer supports.
>> I think it's best to make kobject layer support it.
>
> Assuming Greg will accept it when he sees reasonable patches.
Greg says he would. :-)
Thanks a lot for your patience.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 3:18 ` Tejun Heo
@ 2008-07-03 5:11 ` Eric W. Biederman
2008-07-03 10:56 ` Daniel Lezcano
0 siblings, 1 reply; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-03 5:11 UTC (permalink / raw)
To: Tejun Heo
Cc: Benjamin Thery, Greg Kroah-Hartman, Andrew Morton,
Daniel Lezcano, Serge Hallyn, linux-kernel, Al Viro,
Linux Containers
Tejun Heo <htejun@gmail.com> writes:
> There is rather large possibility that I'm just being dumb here
> especially because I haven't reviewed the users of this facility, so all
> the comments I'm making are from the POV of interfaces of sysfs and the
> related layers. I think I've made my concerns clear by now. If you
> still think the callbacks are the best way to go, please try to
> enlighten me. I really don't wanna be stopping something which is
> better from ignorance. Just give me some concrete examples or point me
> to codes which show how and why the current interface is the best for
> the users and switching isn't a good idea.
Currently I think a callback on to get the tag from a kobject is the
best way to go. That way we don't need to add a field to struct
kobject (and don't need the associated redundancy), and we can lookup
up the tag when we need it.
I have been playing with the code and just about have it ready
to go. I just need to refactor all of my changes into clean
patches at this point, plus a bit of review and test. Ben & Daniel
have given me a version of the previous patchset rebased unto the
latest -mm so that should help for the unchanged parts.
Introducing the sysfs_tag_type thing and pushing the functions to
the edges helps. It especially cleans up the ugly mount/umount
situation allowing us to handle that with generic code.
Moving the kobject_tag into struct ktype works and looks roughly
as clean as what happens with attributes. So I that seems reasonable,
and doesn't result in a significant change in the users.
The result of which means that I only have the helper function sysfs_creation_tag
left in sysfs/dir.c Left in there are some of the nasties in dealing with symlinks.
At this point I believe I have achieved a nice degree of simplifying the sysfs
code in the current patches without really changing the users or
making it more complex for them.
I have not implemented ida tags, and I don't plan to. That is just
unnecessary work right now. The users are simple and the meat of the
logic would not change so it should be simple to add.
>> It looks to me like the clean solution is move kobject_tag into
>> kobj_type, and have it call some higher level function.
>>
>> We also need to remove the maintenance disaster that is
>> kobject_set_name from sysfs_rename_dir. And push it into
>> kobject_rename instead. The error handling is harder in
>> that case but otherwise we should be in good shape.
>
> Heh... I personally think kobject layer as a whole should just be hidden
> under the cabinet of device driver model but I'm having difficult time
> convincing other people of it. Anyways, fully agree the interaction
> between kobject and sysfs is ugly at a lot of places.
I would be happy if we could remove all nonsense kobject that are there just
for structural purposes but have no purpose otherwise. Things like kobjects
for symlinks. The kobject layer doesn't seem to have a clear identity
and purpose that I can see right now.
> Thanks a lot for your patience.
Welcome. The code reached a point a while ago where it didn't make sense
to change it without review feedback.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 5:11 ` Eric W. Biederman
@ 2008-07-03 10:56 ` Daniel Lezcano
2008-07-03 12:27 ` Eric W. Biederman
0 siblings, 1 reply; 75+ messages in thread
From: Daniel Lezcano @ 2008-07-03 10:56 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Eric W. Biederman wrote:
> Tejun Heo <htejun@gmail.com> writes:
>
>> There is rather large possibility that I'm just being dumb here
>> especially because I haven't reviewed the users of this facility, so all
>> the comments I'm making are from the POV of interfaces of sysfs and the
>> related layers. I think I've made my concerns clear by now. If you
>> still think the callbacks are the best way to go, please try to
>> enlighten me. I really don't wanna be stopping something which is
>> better from ignorance. Just give me some concrete examples or point me
>> to codes which show how and why the current interface is the best for
>> the users and switching isn't a good idea.
>
> Currently I think a callback on to get the tag from a kobject is the
> best way to go. That way we don't need to add a field to struct
> kobject (and don't need the associated redundancy), and we can lookup
> up the tag when we need it.
The kobject events are sent through a netlink message which is not
currently per network namespace. Shouldn't be useful to have a way to
retrieve from the kobject the network namespace or the uevent socket
associated with it ? IMHO having idr in the kobject + netns pointer
associated may help to handle the sysfs isolation and makes the uevent
per namespace trivial, no ?
> I have been playing with the code and just about have it ready
> to go. I just need to refactor all of my changes into clean
> patches at this point, plus a bit of review and test. Ben & Daniel
> have given me a version of the previous patchset rebased unto the
> latest -mm so that should help for the unchanged parts.
>
> Introducing the sysfs_tag_type thing and pushing the functions to
> the edges helps. It especially cleans up the ugly mount/umount
> situation allowing us to handle that with generic code.
>
> Moving the kobject_tag into struct ktype works and looks roughly
> as clean as what happens with attributes. So I that seems reasonable,
> and doesn't result in a significant change in the users.
>
> The result of which means that I only have the helper function sysfs_creation_tag
> left in sysfs/dir.c Left in there are some of the nasties in dealing with symlinks.
>
> At this point I believe I have achieved a nice degree of simplifying the sysfs
> code in the current patches without really changing the users or
> making it more complex for them.
>
> I have not implemented ida tags, and I don't plan to. That is just
> unnecessary work right now. The users are simple and the meat of the
> logic would not change so it should be simple to add.
>
>>> It looks to me like the clean solution is move kobject_tag into
>>> kobj_type, and have it call some higher level function.
>>>
>>> We also need to remove the maintenance disaster that is
>>> kobject_set_name from sysfs_rename_dir. And push it into
>>> kobject_rename instead. The error handling is harder in
>>> that case but otherwise we should be in good shape.
>> Heh... I personally think kobject layer as a whole should just be hidden
>> under the cabinet of device driver model but I'm having difficult time
>> convincing other people of it. Anyways, fully agree the interaction
>> between kobject and sysfs is ugly at a lot of places.
>
> I would be happy if we could remove all nonsense kobject that are there just
> for structural purposes but have no purpose otherwise. Things like kobjects
> for symlinks. The kobject layer doesn't seem to have a clear identity
> and purpose that I can see right now.
>
>> Thanks a lot for your patience.
>
> Welcome. The code reached a point a while ago where it didn't make sense
> to change it without review feedback.
>
> Eric
>
> _______________________________________________
> Containers mailing list
> Containers@lists.linux-foundation.org
> https://lists.linux-foundation.org/mailman/listinfo/containers
>
--
Sauf indication contraire ci-dessus:
Compagnie IBM France
Siège Social : Tour Descartes, 2, avenue Gambetta, La Défense 5, 92400
Courbevoie
RCS Nanterre 552 118 465
Forme Sociale : S.A.S.
Capital Social : 542.737.118 ?
SIREN/SIRET : 552 118 465 02430
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 10:56 ` Daniel Lezcano
@ 2008-07-03 12:27 ` Eric W. Biederman
2008-07-03 12:37 ` Benjamin Thery
` (2 more replies)
0 siblings, 3 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-03 12:27 UTC (permalink / raw)
To: Daniel Lezcano
Cc: Tejun Heo, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Daniel Lezcano <dlezcano@fr.ibm.com> writes:
> The kobject events are sent through a netlink message which is not currently per
> network namespace. Shouldn't be useful to have a way to retrieve from the
> kobject the network namespace or the uevent socket associated with it ? IMHO
> having idr in the kobject + netns pointer associated may help to handle the
> sysfs isolation and makes the uevent per namespace trivial, no ?
Grumble. I have been conveniently been forgetting about that socket.
Similarly we have the user mode helpers to deal with.
For this conversation there is a simple answer. All of that is in the
kobject layer, and works even when you compile sysfs out of your kernel.
Therefore it is a separate problem. And sysfs idr tags have nothing
to do with it.
It is most definitely something we need to come back to. I bet there
are some interesting interactions when you have multiple network devices
with the same name generating events.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 12:27 ` Eric W. Biederman
@ 2008-07-03 12:37 ` Benjamin Thery
2008-07-03 19:57 ` Eric W. Biederman
2008-07-03 12:55 ` Daniel Lezcano
2008-07-03 15:58 ` Tejun Heo
2 siblings, 1 reply; 75+ messages in thread
From: Benjamin Thery @ 2008-07-03 12:37 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Daniel Lezcano, Tejun Heo, Greg Kroah-Hartman, linux-kernel,
Al Viro, Linux Containers, Andrew Morton
Eric W. Biederman wrote:
> Daniel Lezcano <dlezcano@fr.ibm.com> writes:
>
>> The kobject events are sent through a netlink message which is not currently per
>> network namespace. Shouldn't be useful to have a way to retrieve from the
>> kobject the network namespace or the uevent socket associated with it ? IMHO
>> having idr in the kobject + netns pointer associated may help to handle the
>> sysfs isolation and makes the uevent per namespace trivial, no ?
>
> Grumble. I have been conveniently been forgetting about that socket.
> Similarly we have the user mode helpers to deal with.
>
> For this conversation there is a simple answer. All of that is in the
> kobject layer, and works even when you compile sysfs out of your kernel.
> Therefore it is a separate problem. And sysfs idr tags have nothing
> to do with it.
> It is most definitely something we need to come back to. I bet there
> are some interesting interactions when you have multiple network devices
> with the same name generating events.
Indeed, we observed some fun things with one distro (which defines some
particular udev rules) when a device called eth0 in a namespace comes
back to init net :)
Benjamin
>
> Eric
>
>
>
>
>
>
>
>
>
>
--
B e n j a m i n T h e r y - BULL/DT/Open Software R&D
http://www.bull.com
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 12:37 ` Benjamin Thery
@ 2008-07-03 19:57 ` Eric W. Biederman
0 siblings, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-03 19:57 UTC (permalink / raw)
To: Benjamin Thery
Cc: Daniel Lezcano, Tejun Heo, Greg Kroah-Hartman, linux-kernel,
Al Viro, Linux Containers, Andrew Morton
Benjamin Thery <benjamin.thery@bull.net> writes:
> Indeed, we observed some fun things with one distro (which defines some
> particular udev rules) when a device called eth0 in a namespace comes back to
> init net :)
Speaking of. Don't let me forget but I have a patch I need to send out
that deletes pseudo devices instead of sending them back to eth0. We can't
do that for real hardware obviously but for things like veth and macvlan
devices it greatly simplifies the cleanup.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 12:27 ` Eric W. Biederman
2008-07-03 12:37 ` Benjamin Thery
@ 2008-07-03 12:55 ` Daniel Lezcano
2008-07-03 15:58 ` Tejun Heo
2 siblings, 0 replies; 75+ messages in thread
From: Daniel Lezcano @ 2008-07-03 12:55 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Tejun Heo, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Eric W. Biederman wrote:
> Daniel Lezcano <dlezcano@fr.ibm.com> writes:
>
>> The kobject events are sent through a netlink message which is not currently per
>> network namespace. Shouldn't be useful to have a way to retrieve from the
>> kobject the network namespace or the uevent socket associated with it ? IMHO
>> having idr in the kobject + netns pointer associated may help to handle the
>> sysfs isolation and makes the uevent per namespace trivial, no ?
>
> Grumble. I have been conveniently been forgetting about that socket.
> Similarly we have the user mode helpers to deal with.
>
> For this conversation there is a simple answer. All of that is in the
> kobject layer, and works even when you compile sysfs out of your kernel.
> Therefore it is a separate problem. And sysfs idr tags have nothing
> to do with it.
Ah Ok, I am not really familiar with kobject/sysfs so I thought there
was a proposition to store the id in the kobject instead of using the
tag callbacks, so I figured, perhaps, the idr could have been used in
the kobject layer and the sysfs being built upon that.
> It is most definitely something we need to come back to. I bet there
> are some interesting interactions when you have multiple network devices
> with the same name generating events.
Yes as mentionned Benjamin, we have the eth0 in the init_net which is
shut down when a network namespace with a netdev with the same name
exits. There is a udev rule which ifdown eth0 :)
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 12:27 ` Eric W. Biederman
2008-07-03 12:37 ` Benjamin Thery
2008-07-03 12:55 ` Daniel Lezcano
@ 2008-07-03 15:58 ` Tejun Heo
2008-07-03 18:29 ` Daniel Lezcano
2008-07-03 20:08 ` Eric W. Biederman
2 siblings, 2 replies; 75+ messages in thread
From: Tejun Heo @ 2008-07-03 15:58 UTC (permalink / raw)
To: Eric W. Biederman
Cc: Daniel Lezcano, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Hello,
Eric W. Biederman wrote:
> Daniel Lezcano <dlezcano@fr.ibm.com> writes:
>
>> The kobject events are sent through a netlink message which is not currently per
>> network namespace. Shouldn't be useful to have a way to retrieve from the
>> kobject the network namespace or the uevent socket associated with it ? IMHO
>> having idr in the kobject + netns pointer associated may help to handle the
>> sysfs isolation and makes the uevent per namespace trivial, no ?
>
> Grumble. I have been conveniently been forgetting about that socket.
> Similarly we have the user mode helpers to deal with.
>
> For this conversation there is a simple answer. All of that is in the
> kobject layer, and works even when you compile sysfs out of your kernel.
> Therefore it is a separate problem. And sysfs idr tags have nothing
> to do with it.
>
> It is most definitely something we need to come back to. I bet there
> are some interesting interactions when you have multiple network devices
> with the same name generating events.
Related delta: I've been thinking that uevents should be part of sysfs
not kobject as that's what the userland is gonna associate the event
with. Would that solve the problem you're thinking about?
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 15:58 ` Tejun Heo
@ 2008-07-03 18:29 ` Daniel Lezcano
2008-07-03 20:08 ` Eric W. Biederman
1 sibling, 0 replies; 75+ messages in thread
From: Daniel Lezcano @ 2008-07-03 18:29 UTC (permalink / raw)
To: Tejun Heo
Cc: Eric W. Biederman, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Tejun Heo wrote:
> Hello,
>
> Eric W. Biederman wrote:
>> Daniel Lezcano <dlezcano@fr.ibm.com> writes:
>>
>>> The kobject events are sent through a netlink message which is not currently per
>>> network namespace. Shouldn't be useful to have a way to retrieve from the
>>> kobject the network namespace or the uevent socket associated with it ? IMHO
>>> having idr in the kobject + netns pointer associated may help to handle the
>>> sysfs isolation and makes the uevent per namespace trivial, no ?
>> Grumble. I have been conveniently been forgetting about that socket.
>> Similarly we have the user mode helpers to deal with.
>>
>> For this conversation there is a simple answer. All of that is in the
>> kobject layer, and works even when you compile sysfs out of your kernel.
>> Therefore it is a separate problem. And sysfs idr tags have nothing
>> to do with it.
>>
>> It is most definitely something we need to come back to. I bet there
>> are some interesting interactions when you have multiple network devices
>> with the same name generating events.
>
> Related delta: I've been thinking that uevents should be part of sysfs
> not kobject as that's what the userland is gonna associate the event
> with. Would that solve the problem you're thinking about?
uevents can work with the network namespaces being compiled in and the
sysfs compiled out. AFAICS, uevents will be unable to handle multiple
network namespaces if it is tied with sysfs, no ?
^ permalink raw reply [flat|nested] 75+ messages in thread
* Re: [PATCH 06/11] sysfs: Implement sysfs tagged directory support.
2008-07-03 15:58 ` Tejun Heo
2008-07-03 18:29 ` Daniel Lezcano
@ 2008-07-03 20:08 ` Eric W. Biederman
1 sibling, 0 replies; 75+ messages in thread
From: Eric W. Biederman @ 2008-07-03 20:08 UTC (permalink / raw)
To: Tejun Heo
Cc: Daniel Lezcano, Greg Kroah-Hartman, linux-kernel, Al Viro,
Linux Containers, Andrew Morton, Benjamin Thery
Tejun Heo <htejun@gmail.com> writes:
> Related delta: I've been thinking that uevents should be part of sysfs
> not kobject as that's what the userland is gonna associate the event
> with. Would that solve the problem you're thinking about?
The good news is that uevent_sock is currently restricted to just
the initial network namespace (so the functionality completely
disappears in the other namespaces), and that it is broadcast only.
So it should be possible to look at who the client is and by some
magic criterian decide if it should receive the broadcast message.
The call to the user mode helper is trickier. How do we setup the
proper user space context.
None of this is fundamentally hard just different work, for a different
day.
Eric
^ permalink raw reply [flat|nested] 75+ messages in thread