All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
@ 2014-01-07  2:58 Darrick J. Wong
  2014-01-07 12:48 ` Jan Kara
  2014-01-07 22:04 ` [RFC PATCH v2] fs: new FS_IOC_[GS]ETFLAGS2 interface Darrick J. Wong
  0 siblings, 2 replies; 10+ messages in thread
From: Darrick J. Wong @ 2014-01-07  2:58 UTC (permalink / raw)
  To: Aurelien Jarno, Theodore Ts'o, Alexander Viro, Rob Browning,
	Dave Chinner, Miklos Szeredi
  Cc: linux-fsdevel, linux-kernel, linux-ext4

This is a proof of concept interface for replacing the contentious
FS_IOC_[GS]ETFLAGS interface with one that presents itself as the
xattr 'system.iflags'.  Instead of using integer inode flags, this
interface uses a comma-separated string of words, such as
"extents,immutable" to describe the inode flags.  This ought to enable
all filesystems to introduce arbitrary flags, with any sort of backend
they want, while also taking advantage of generic fs flags.

The initial conversion is for ext4, though given the similarities of
most filesystems' implementations, converting the rest shouldn't be
difficult.  How we get everyone to agree on common flag names is
anyone's guess.

Includes some helper methods to handle the string<->int conversion,
and a tweak to the generic xattr code to allow setting iflags on an
immutable file.

Usage:
# setfattr -n system.iflags -v 'extents,append' /path/file
# getfattr -n system.iflags /path/file
system.iflags="secrm,unrm,sync,immutable,nodump,noatime,journal_data,extents"

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/ext4/Makefile           |    2 
 fs/ext4/ext4.h             |    3 +
 fs/ext4/ioctl.c            |  198 ++++++++++++++++++++++++--------------------
 fs/ext4/xattr.c            |    1 
 fs/ext4/xattr.h            |    1 
 fs/ext4/xattr_iflags.c     |   90 ++++++++++++++++++++
 fs/xattr.c                 |    9 ++
 include/linux/strflags.h   |   27 ++++++
 include/uapi/linux/xattr.h |    2 
 lib/Makefile               |    2 
 lib/strflags.c             |  117 ++++++++++++++++++++++++++
 11 files changed, 358 insertions(+), 94 deletions(-)
 create mode 100644 fs/ext4/xattr_iflags.c
 create mode 100644 include/linux/strflags.h
 create mode 100644 lib/strflags.c

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 0310fec..7126958 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -8,7 +8,7 @@ ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
 		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
-		xattr_trusted.o inline.o
+		xattr_trusted.o inline.o xattr_iflags.o
 
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf6c5cd..24837ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2660,6 +2660,9 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 
 extern int ext4_convert_inline_data(struct inode *inode);
 
+/* ioctl.c */
+extern int ext4_set_iflags(struct inode *inode, int flags);
+
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
 extern const struct inode_operations ext4_special_inode_operations;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 60589b6..c6f21c9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -213,12 +213,115 @@ swap_boot_out:
 	return err;
 }
 
+/*
+ * Set inode flags.  Assume we already have mnt_want_write_file and i_mutex.
+ */
+int ext4_set_iflags(struct inode *inode, int flags)
+{
+	handle_t *handle = NULL;
+	int err, migrate = 0;
+	struct ext4_iloc iloc;
+	unsigned int oldflags, mask, i;
+	unsigned int jflag;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
+
+	flags = ext4_mask_flags(inode->i_mode, flags);
+
+	err = -EPERM;
+	/* Is it quota file? Do not allow user to mess with it */
+	if (IS_NOQUOTA(inode))
+		goto flags_out;
+
+	oldflags = ei->i_flags;
+
+	/* The JOURNAL_DATA flag is modifiable only by root */
+	jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 *
+	 * This test looks nicer. Thanks to Pauline Middelink
+	 */
+	if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
+			goto flags_out;
+	}
+
+	/*
+	 * The JOURNAL_DATA flag can only be changed by
+	 * the relevant capability.
+	 */
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+		if (!capable(CAP_SYS_RESOURCE))
+			goto flags_out;
+	}
+	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+		migrate = 1;
+
+	if (flags & EXT4_EOFBLOCKS_FL) {
+		/* we don't support adding EOFBLOCKS flag */
+		if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+			err = -EOPNOTSUPP;
+			goto flags_out;
+		}
+	} else if (oldflags & EXT4_EOFBLOCKS_FL)
+		ext4_truncate(inode);
+
+	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto flags_out;
+	}
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto flags_err;
+
+	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+		if (!(mask & EXT4_FL_USER_MODIFIABLE))
+			continue;
+		if (mask & flags)
+			ext4_set_inode_flag(inode, i);
+		else
+			ext4_clear_inode_flag(inode, i);
+	}
+
+	ext4_set_inode_flags(inode);
+	inode->i_ctime = ext4_current_time(inode);
+
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+	ext4_journal_stop(handle);
+	if (err)
+		goto flags_out;
+
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+		err = ext4_change_inode_journal_flag(inode, jflag);
+	if (err)
+		goto flags_out;
+	if (migrate) {
+		if (flags & EXT4_EXTENTS_FL)
+			err = ext4_ext_migrate(inode);
+		else
+			err = ext4_ind_migrate(inode);
+	}
+
+flags_out:
+	return err;
+}
+
 long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
 	struct super_block *sb = inode->i_sb;
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
+	int err;
 
 	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
 
@@ -227,13 +330,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		ext4_get_inode_flags(ei);
 		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
-	case EXT4_IOC_SETFLAGS: {
-		handle_t *handle = NULL;
-		int err, migrate = 0;
-		struct ext4_iloc iloc;
-		unsigned int oldflags, mask, i;
-		unsigned int jflag;
-
+	case EXT4_IOC_SETFLAGS:
 		if (!inode_owner_or_capable(inode))
 			return -EACCES;
 
@@ -244,95 +341,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			return err;
 
-		flags = ext4_mask_flags(inode->i_mode, flags);
-
-		err = -EPERM;
 		mutex_lock(&inode->i_mutex);
-		/* Is it quota file? Do not allow user to mess with it */
-		if (IS_NOQUOTA(inode))
-			goto flags_out;
-
-		oldflags = ei->i_flags;
-
-		/* The JOURNAL_DATA flag is modifiable only by root */
-		jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-		/*
-		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-		 * the relevant capability.
-		 *
-		 * This test looks nicer. Thanks to Pauline Middelink
-		 */
-		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-			if (!capable(CAP_LINUX_IMMUTABLE))
-				goto flags_out;
-		}
-
-		/*
-		 * The JOURNAL_DATA flag can only be changed by
-		 * the relevant capability.
-		 */
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE))
-				goto flags_out;
-		}
-		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-			migrate = 1;
-
-		if (flags & EXT4_EOFBLOCKS_FL) {
-			/* we don't support adding EOFBLOCKS flag */
-			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-				err = -EOPNOTSUPP;
-				goto flags_out;
-			}
-		} else if (oldflags & EXT4_EOFBLOCKS_FL)
-			ext4_truncate(inode);
-
-		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-		if (IS_ERR(handle)) {
-			err = PTR_ERR(handle);
-			goto flags_out;
-		}
-		if (IS_SYNC(inode))
-			ext4_handle_sync(handle);
-		err = ext4_reserve_inode_write(handle, inode, &iloc);
-		if (err)
-			goto flags_err;
-
-		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-			if (!(mask & EXT4_FL_USER_MODIFIABLE))
-				continue;
-			if (mask & flags)
-				ext4_set_inode_flag(inode, i);
-			else
-				ext4_clear_inode_flag(inode, i);
-		}
-
-		ext4_set_inode_flags(inode);
-		inode->i_ctime = ext4_current_time(inode);
-
-		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-		ext4_journal_stop(handle);
-		if (err)
-			goto flags_out;
-
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-			err = ext4_change_inode_journal_flag(inode, jflag);
-		if (err)
-			goto flags_out;
-		if (migrate) {
-			if (flags & EXT4_EXTENTS_FL)
-				err = ext4_ext_migrate(inode);
-			else
-				err = ext4_ind_migrate(inode);
-		}
-
-flags_out:
+		err = ext4_set_iflags(inode, flags);
 		mutex_unlock(&inode->i_mutex);
+
 		mnt_drop_write_file(filp);
 		return err;
-	}
 	case EXT4_IOC_GETVERSION:
 	case EXT4_IOC_GETVERSION_OLD:
 		return put_user(inode->i_generation, (int __user *) arg);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1423c48..64963e4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -105,6 +105,7 @@ static const struct xattr_handler *ext4_xattr_handler_map[] = {
 };
 
 const struct xattr_handler *ext4_xattr_handlers[] = {
+	&ext4_xattr_iflags_handler,
 	&ext4_xattr_user_handler,
 	&ext4_xattr_trusted_handler,
 #ifdef CONFIG_EXT4_FS_POSIX_ACL
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index c767dbd..2559583 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -99,6 +99,7 @@ extern const struct xattr_handler ext4_xattr_trusted_handler;
 extern const struct xattr_handler ext4_xattr_acl_access_handler;
 extern const struct xattr_handler ext4_xattr_acl_default_handler;
 extern const struct xattr_handler ext4_xattr_security_handler;
+extern const struct xattr_handler ext4_xattr_iflags_handler;
 
 extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
diff --git a/fs/ext4/xattr_iflags.c b/fs/ext4/xattr_iflags.c
new file mode 100644
index 0000000..e3c5472
--- /dev/null
+++ b/fs/ext4/xattr_iflags.c
@@ -0,0 +1,90 @@
+/*
+ * linux/fs/ext4/xattr_iflags.c
+ * Handler for using the extended attribute interface to store inode flags.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/strflags.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "xattr.h"
+
+static struct flag_db ext4_iflags = {
+	.flag_sz = sizeof(int),
+	.maps = {
+		{"secrm", EXT4_SECRM_FL},
+		{"unrm", EXT4_UNRM_FL},
+		{"compr", EXT4_COMPR_FL},
+		{"sync", EXT4_SYNC_FL},
+		{"immutable", EXT4_IMMUTABLE_FL},
+		{"append", EXT4_APPEND_FL},
+		{"nodump", EXT4_NODUMP_FL},
+		{"noatime", EXT4_NOATIME_FL},
+		{"htree_dir", EXT4_INDEX_FL},
+		{"journal_data", EXT4_JOURNAL_DATA_FL},
+		{"notail", EXT4_NOTAIL_FL},
+		{"dirsync", EXT4_DIRSYNC_FL},
+		{"topdir", EXT4_TOPDIR_FL},
+		{"extents", EXT4_EXTENTS_FL},
+		{NULL, 0},
+	}
+};
+
+static size_t
+ext4_xattr_iflags_list(struct dentry *dentry, char *list, size_t list_size,
+		       const char *name, size_t name_len, int type)
+{
+	const size_t prefix_len = sizeof(XATTR_NAME_IFLAGS)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_NAME_IFLAGS, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext4_xattr_iflags_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t size, int type)
+{
+	struct ext4_inode_info *ei;
+
+	ei = EXT4_I(dentry->d_inode);
+	return flags_to_string(&ext4_iflags, &ei->i_flags, buffer, size);
+}
+
+static int
+ext4_xattr_iflags_set(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags, int type)
+{
+	char *string = (char *)value;
+	int ret, moo;
+
+	string = kmalloc(size + 1, GFP_KERNEL);
+	if (!string)
+		return -ENOMEM;
+	memcpy(string, value, size);
+	string[size] = 0;
+
+	ret = string_to_flags(&ext4_iflags, string, size, &moo);
+	if (ret)
+		goto out;
+
+	ret = ext4_set_iflags(dentry->d_inode, moo);
+out:
+	kfree(string);
+	return ret;
+}
+
+const struct xattr_handler ext4_xattr_iflags_handler = {
+	.prefix	= XATTR_NAME_IFLAGS,
+	.list	= ext4_xattr_iflags_list,
+	.get	= ext4_xattr_iflags_get,
+	.set	= ext4_xattr_iflags_set,
+};
diff --git a/fs/xattr.c b/fs/xattr.c
index 3377dff..bf0fe89 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -32,6 +32,15 @@ static int
 xattr_permission(struct inode *inode, const char *name, int mask)
 {
 	/*
+	 * Always allow read/write of inode flag attribute.
+	 */
+	if (!strcmp(name, XATTR_NAME_IFLAGS)) {
+		if (mask & MAY_WRITE)
+			return inode_owner_or_capable(inode) ? 0 : -EACCES;
+		return 0;
+	}
+
+	/*
 	 * We can never set or remove an extended attribute on a read-only
 	 * filesystem  or on an immutable / append-only inode.
 	 */
diff --git a/include/linux/strflags.h b/include/linux/strflags.h
new file mode 100644
index 0000000..6c8067c
--- /dev/null
+++ b/include/linux/strflags.h
@@ -0,0 +1,27 @@
+/*
+ * File: linux/strflags.h
+ * Convert flags to pretty strings.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef _LINUX_STRFLAGS_H_
+#define _LINUX_STRFLAGS_H_
+
+#include <linux/types.h>
+
+struct flag_map {
+	const char *string;
+	unsigned long flag;
+};
+
+struct flag_db {
+	size_t flag_sz;
+	struct flag_map maps[];
+};
+
+int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
+		    size_t buf_len);
+int string_to_flags(struct flag_db *flagdb, char *buf, size_t buf_len,
+		    void *flags);
+
+#endif /* _LINUX_STRFLAGS_H_ */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index e4629b9..8cd78ae 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -63,5 +63,7 @@
 #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
 #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
 
+#define XATTR_IFLAGS "iflags"
+#define XATTR_NAME_IFLAGS XATTR_SYSTEM_PREFIX XATTR_IFLAGS
 
 #endif /* _UAPI_LINUX_XATTR_H */
diff --git a/lib/Makefile b/lib/Makefile
index a459c31..f3a517d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o
+	 percpu-refcount.o percpu_ida.o strflags.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/strflags.c b/lib/strflags.c
new file mode 100644
index 0000000..d1a34e7
--- /dev/null
+++ b/lib/strflags.c
@@ -0,0 +1,117 @@
+/*
+ * File: lib/strflags.c
+ * Convert flags to pretty strings.
+ *
+ * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/strflags.h>
+#include <linux/errno.h>
+
+int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
+		    size_t buf_len)
+{
+	struct flag_map *mapping;
+	char *b;
+	unsigned long fl;
+	int used, first;
+	size_t sz;
+
+	switch (flagdb->flag_sz) {
+	case 1:
+		fl = *(u8 *)flags;
+		break;
+	case 2:
+		fl = *(u16 *)flags;
+		break;
+	case 4:
+		fl = *(u32 *)flags;
+		break;
+	case 8:
+		fl = *(u64 *)flags;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Calculating buffer size */
+	sz = 0;
+	for (mapping = flagdb->maps; mapping->string; mapping++) {
+		if (mapping->flag & fl) {
+			sz += strlen(mapping->string);
+			sz++;
+		}
+	}
+
+	if (buf == NULL)
+		return sz;
+	if (buf_len < sz)
+		return -ERANGE;
+
+	/* Stringify the flags */
+	b = buf;
+	first = 1;
+	for (mapping = flagdb->maps; mapping->string; mapping++) {
+		if (mapping->flag & fl) {
+			used = snprintf(b, buf_len, "%s%s",
+					(first ? "" : ","),
+					mapping->string);
+			first = 0;
+			buf_len -= used;
+			b += used;
+		}
+	}
+
+	return b - buf;
+}
+EXPORT_SYMBOL_GPL(flags_to_string);
+
+int string_to_flags(struct flag_db *flagdb, char *buf, size_t buflen,
+		    void *flags)
+{
+	struct flag_map *mapping;
+	unsigned long fl = 0;
+	char *p, *q;
+	char c;
+	int found;
+
+	for (p = buf; p < buf + buflen; p = q + 1) {
+		for (q = p; *q != 0 && *q != ','; q++)
+			; /* empty */
+		if (p == q)
+			continue;
+
+		c = *q; *q = 0;
+		found = 0;
+		for (mapping = flagdb->maps; mapping->string; mapping++)
+			if (strcmp(p, mapping->string) == 0) {
+				fl |= mapping->flag;
+				found = 1;
+			}
+		if (!found)
+			return -EINVAL;
+		*q = c;
+		p = q + 1;
+	}
+
+	switch (flagdb->flag_sz) {
+	case 1:
+		*(u8 *)flags = fl;
+		break;
+	case 2:
+		*(u16 *)flags = fl;
+		break;
+	case 4:
+		*(u32 *)flags = fl;
+		break;
+	case 8:
+		*(u64 *)flags = fl;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(string_to_flags);

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07  2:58 [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface Darrick J. Wong
@ 2014-01-07 12:48 ` Jan Kara
  2014-01-07 15:49   ` Christoph Hellwig
  2014-01-07 22:04 ` [RFC PATCH v2] fs: new FS_IOC_[GS]ETFLAGS2 interface Darrick J. Wong
  1 sibling, 1 reply; 10+ messages in thread
From: Jan Kara @ 2014-01-07 12:48 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Aurelien Jarno, Theodore Ts'o, Alexander Viro, Rob Browning,
	Dave Chinner, Miklos Szeredi, linux-fsdevel, linux-kernel,
	linux-ext4

On Mon 06-01-14 18:58:55, Darrick J. Wong wrote:
> This is a proof of concept interface for replacing the contentious
> FS_IOC_[GS]ETFLAGS interface with one that presents itself as the
> xattr 'system.iflags'.  Instead of using integer inode flags, this
> interface uses a comma-separated string of words, such as
> "extents,immutable" to describe the inode flags.  This ought to enable
> all filesystems to introduce arbitrary flags, with any sort of backend
> they want, while also taking advantage of generic fs flags.
> 
> The initial conversion is for ext4, though given the similarities of
> most filesystems' implementations, converting the rest shouldn't be
> difficult.  How we get everyone to agree on common flag names is
> anyone's guess.
> 
> Includes some helper methods to handle the string<->int conversion,
> and a tweak to the generic xattr code to allow setting iflags on an
> immutable file.
  I have to say I'm not thrilled by the idea of juggling strings in
userspace and in kernel to set a flag for an inode...

								Honza

> Usage:
> # setfattr -n system.iflags -v 'extents,append' /path/file
> # getfattr -n system.iflags /path/file
> system.iflags="secrm,unrm,sync,immutable,nodump,noatime,journal_data,extents"
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
> ---
>  fs/ext4/Makefile           |    2 
>  fs/ext4/ext4.h             |    3 +
>  fs/ext4/ioctl.c            |  198 ++++++++++++++++++++++++--------------------
>  fs/ext4/xattr.c            |    1 
>  fs/ext4/xattr.h            |    1 
>  fs/ext4/xattr_iflags.c     |   90 ++++++++++++++++++++
>  fs/xattr.c                 |    9 ++
>  include/linux/strflags.h   |   27 ++++++
>  include/uapi/linux/xattr.h |    2 
>  lib/Makefile               |    2 
>  lib/strflags.c             |  117 ++++++++++++++++++++++++++
>  11 files changed, 358 insertions(+), 94 deletions(-)
>  create mode 100644 fs/ext4/xattr_iflags.c
>  create mode 100644 include/linux/strflags.h
>  create mode 100644 lib/strflags.c
> 
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index 0310fec..7126958 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -8,7 +8,7 @@ ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
>  		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
>  		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
>  		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
> -		xattr_trusted.o inline.o
> +		xattr_trusted.o inline.o xattr_iflags.o
>  
>  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
>  ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index bf6c5cd..24837ab 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -2660,6 +2660,9 @@ extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
>  
>  extern int ext4_convert_inline_data(struct inode *inode);
>  
> +/* ioctl.c */
> +extern int ext4_set_iflags(struct inode *inode, int flags);
> +
>  /* namei.c */
>  extern const struct inode_operations ext4_dir_inode_operations;
>  extern const struct inode_operations ext4_special_inode_operations;
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 60589b6..c6f21c9 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -213,12 +213,115 @@ swap_boot_out:
>  	return err;
>  }
>  
> +/*
> + * Set inode flags.  Assume we already have mnt_want_write_file and i_mutex.
> + */
> +int ext4_set_iflags(struct inode *inode, int flags)
> +{
> +	handle_t *handle = NULL;
> +	int err, migrate = 0;
> +	struct ext4_iloc iloc;
> +	unsigned int oldflags, mask, i;
> +	unsigned int jflag;
> +	struct ext4_inode_info *ei = EXT4_I(inode);
> +
> +	if (!inode_owner_or_capable(inode))
> +		return -EACCES;
> +
> +	flags = ext4_mask_flags(inode->i_mode, flags);
> +
> +	err = -EPERM;
> +	/* Is it quota file? Do not allow user to mess with it */
> +	if (IS_NOQUOTA(inode))
> +		goto flags_out;
> +
> +	oldflags = ei->i_flags;
> +
> +	/* The JOURNAL_DATA flag is modifiable only by root */
> +	jflag = flags & EXT4_JOURNAL_DATA_FL;
> +
> +	/*
> +	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> +	 * the relevant capability.
> +	 *
> +	 * This test looks nicer. Thanks to Pauline Middelink
> +	 */
> +	if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
> +		if (!capable(CAP_LINUX_IMMUTABLE))
> +			goto flags_out;
> +	}
> +
> +	/*
> +	 * The JOURNAL_DATA flag can only be changed by
> +	 * the relevant capability.
> +	 */
> +	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
> +		if (!capable(CAP_SYS_RESOURCE))
> +			goto flags_out;
> +	}
> +	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
> +		migrate = 1;
> +
> +	if (flags & EXT4_EOFBLOCKS_FL) {
> +		/* we don't support adding EOFBLOCKS flag */
> +		if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> +			err = -EOPNOTSUPP;
> +			goto flags_out;
> +		}
> +	} else if (oldflags & EXT4_EOFBLOCKS_FL)
> +		ext4_truncate(inode);
> +
> +	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
> +	if (IS_ERR(handle)) {
> +		err = PTR_ERR(handle);
> +		goto flags_out;
> +	}
> +	if (IS_SYNC(inode))
> +		ext4_handle_sync(handle);
> +	err = ext4_reserve_inode_write(handle, inode, &iloc);
> +	if (err)
> +		goto flags_err;
> +
> +	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
> +		if (!(mask & EXT4_FL_USER_MODIFIABLE))
> +			continue;
> +		if (mask & flags)
> +			ext4_set_inode_flag(inode, i);
> +		else
> +			ext4_clear_inode_flag(inode, i);
> +	}
> +
> +	ext4_set_inode_flags(inode);
> +	inode->i_ctime = ext4_current_time(inode);
> +
> +	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
> +flags_err:
> +	ext4_journal_stop(handle);
> +	if (err)
> +		goto flags_out;
> +
> +	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
> +		err = ext4_change_inode_journal_flag(inode, jflag);
> +	if (err)
> +		goto flags_out;
> +	if (migrate) {
> +		if (flags & EXT4_EXTENTS_FL)
> +			err = ext4_ext_migrate(inode);
> +		else
> +			err = ext4_ind_migrate(inode);
> +	}
> +
> +flags_out:
> +	return err;
> +}
> +
>  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  {
>  	struct inode *inode = file_inode(filp);
>  	struct super_block *sb = inode->i_sb;
>  	struct ext4_inode_info *ei = EXT4_I(inode);
>  	unsigned int flags;
> +	int err;
>  
>  	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
>  
> @@ -227,13 +330,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		ext4_get_inode_flags(ei);
>  		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
>  		return put_user(flags, (int __user *) arg);
> -	case EXT4_IOC_SETFLAGS: {
> -		handle_t *handle = NULL;
> -		int err, migrate = 0;
> -		struct ext4_iloc iloc;
> -		unsigned int oldflags, mask, i;
> -		unsigned int jflag;
> -
> +	case EXT4_IOC_SETFLAGS:
>  		if (!inode_owner_or_capable(inode))
>  			return -EACCES;
>  
> @@ -244,95 +341,12 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  		if (err)
>  			return err;
>  
> -		flags = ext4_mask_flags(inode->i_mode, flags);
> -
> -		err = -EPERM;
>  		mutex_lock(&inode->i_mutex);
> -		/* Is it quota file? Do not allow user to mess with it */
> -		if (IS_NOQUOTA(inode))
> -			goto flags_out;
> -
> -		oldflags = ei->i_flags;
> -
> -		/* The JOURNAL_DATA flag is modifiable only by root */
> -		jflag = flags & EXT4_JOURNAL_DATA_FL;
> -
> -		/*
> -		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
> -		 * the relevant capability.
> -		 *
> -		 * This test looks nicer. Thanks to Pauline Middelink
> -		 */
> -		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
> -			if (!capable(CAP_LINUX_IMMUTABLE))
> -				goto flags_out;
> -		}
> -
> -		/*
> -		 * The JOURNAL_DATA flag can only be changed by
> -		 * the relevant capability.
> -		 */
> -		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
> -			if (!capable(CAP_SYS_RESOURCE))
> -				goto flags_out;
> -		}
> -		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
> -			migrate = 1;
> -
> -		if (flags & EXT4_EOFBLOCKS_FL) {
> -			/* we don't support adding EOFBLOCKS flag */
> -			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
> -				err = -EOPNOTSUPP;
> -				goto flags_out;
> -			}
> -		} else if (oldflags & EXT4_EOFBLOCKS_FL)
> -			ext4_truncate(inode);
> -
> -		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
> -		if (IS_ERR(handle)) {
> -			err = PTR_ERR(handle);
> -			goto flags_out;
> -		}
> -		if (IS_SYNC(inode))
> -			ext4_handle_sync(handle);
> -		err = ext4_reserve_inode_write(handle, inode, &iloc);
> -		if (err)
> -			goto flags_err;
> -
> -		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
> -			if (!(mask & EXT4_FL_USER_MODIFIABLE))
> -				continue;
> -			if (mask & flags)
> -				ext4_set_inode_flag(inode, i);
> -			else
> -				ext4_clear_inode_flag(inode, i);
> -		}
> -
> -		ext4_set_inode_flags(inode);
> -		inode->i_ctime = ext4_current_time(inode);
> -
> -		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
> -flags_err:
> -		ext4_journal_stop(handle);
> -		if (err)
> -			goto flags_out;
> -
> -		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
> -			err = ext4_change_inode_journal_flag(inode, jflag);
> -		if (err)
> -			goto flags_out;
> -		if (migrate) {
> -			if (flags & EXT4_EXTENTS_FL)
> -				err = ext4_ext_migrate(inode);
> -			else
> -				err = ext4_ind_migrate(inode);
> -		}
> -
> -flags_out:
> +		err = ext4_set_iflags(inode, flags);
>  		mutex_unlock(&inode->i_mutex);
> +
>  		mnt_drop_write_file(filp);
>  		return err;
> -	}
>  	case EXT4_IOC_GETVERSION:
>  	case EXT4_IOC_GETVERSION_OLD:
>  		return put_user(inode->i_generation, (int __user *) arg);
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 1423c48..64963e4 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -105,6 +105,7 @@ static const struct xattr_handler *ext4_xattr_handler_map[] = {
>  };
>  
>  const struct xattr_handler *ext4_xattr_handlers[] = {
> +	&ext4_xattr_iflags_handler,
>  	&ext4_xattr_user_handler,
>  	&ext4_xattr_trusted_handler,
>  #ifdef CONFIG_EXT4_FS_POSIX_ACL
> diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
> index c767dbd..2559583 100644
> --- a/fs/ext4/xattr.h
> +++ b/fs/ext4/xattr.h
> @@ -99,6 +99,7 @@ extern const struct xattr_handler ext4_xattr_trusted_handler;
>  extern const struct xattr_handler ext4_xattr_acl_access_handler;
>  extern const struct xattr_handler ext4_xattr_acl_default_handler;
>  extern const struct xattr_handler ext4_xattr_security_handler;
> +extern const struct xattr_handler ext4_xattr_iflags_handler;
>  
>  extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
>  
> diff --git a/fs/ext4/xattr_iflags.c b/fs/ext4/xattr_iflags.c
> new file mode 100644
> index 0000000..e3c5472
> --- /dev/null
> +++ b/fs/ext4/xattr_iflags.c
> @@ -0,0 +1,90 @@
> +/*
> + * linux/fs/ext4/xattr_iflags.c
> + * Handler for using the extended attribute interface to store inode flags.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <linux/string.h>
> +#include <linux/fs.h>
> +#include <linux/security.h>
> +#include <linux/slab.h>
> +#include <linux/strflags.h>
> +#include "ext4_jbd2.h"
> +#include "ext4.h"
> +#include "xattr.h"
> +
> +static struct flag_db ext4_iflags = {
> +	.flag_sz = sizeof(int),
> +	.maps = {
> +		{"secrm", EXT4_SECRM_FL},
> +		{"unrm", EXT4_UNRM_FL},
> +		{"compr", EXT4_COMPR_FL},
> +		{"sync", EXT4_SYNC_FL},
> +		{"immutable", EXT4_IMMUTABLE_FL},
> +		{"append", EXT4_APPEND_FL},
> +		{"nodump", EXT4_NODUMP_FL},
> +		{"noatime", EXT4_NOATIME_FL},
> +		{"htree_dir", EXT4_INDEX_FL},
> +		{"journal_data", EXT4_JOURNAL_DATA_FL},
> +		{"notail", EXT4_NOTAIL_FL},
> +		{"dirsync", EXT4_DIRSYNC_FL},
> +		{"topdir", EXT4_TOPDIR_FL},
> +		{"extents", EXT4_EXTENTS_FL},
> +		{NULL, 0},
> +	}
> +};
> +
> +static size_t
> +ext4_xattr_iflags_list(struct dentry *dentry, char *list, size_t list_size,
> +		       const char *name, size_t name_len, int type)
> +{
> +	const size_t prefix_len = sizeof(XATTR_NAME_IFLAGS)-1;
> +	const size_t total_len = prefix_len + name_len + 1;
> +
> +	if (list && total_len <= list_size) {
> +		memcpy(list, XATTR_NAME_IFLAGS, prefix_len);
> +		memcpy(list+prefix_len, name, name_len);
> +		list[prefix_len + name_len] = '\0';
> +	}
> +	return total_len;
> +}
> +
> +static int
> +ext4_xattr_iflags_get(struct dentry *dentry, const char *name,
> +		       void *buffer, size_t size, int type)
> +{
> +	struct ext4_inode_info *ei;
> +
> +	ei = EXT4_I(dentry->d_inode);
> +	return flags_to_string(&ext4_iflags, &ei->i_flags, buffer, size);
> +}
> +
> +static int
> +ext4_xattr_iflags_set(struct dentry *dentry, const char *name,
> +		const void *value, size_t size, int flags, int type)
> +{
> +	char *string = (char *)value;
> +	int ret, moo;
> +
> +	string = kmalloc(size + 1, GFP_KERNEL);
> +	if (!string)
> +		return -ENOMEM;
> +	memcpy(string, value, size);
> +	string[size] = 0;
> +
> +	ret = string_to_flags(&ext4_iflags, string, size, &moo);
> +	if (ret)
> +		goto out;
> +
> +	ret = ext4_set_iflags(dentry->d_inode, moo);
> +out:
> +	kfree(string);
> +	return ret;
> +}
> +
> +const struct xattr_handler ext4_xattr_iflags_handler = {
> +	.prefix	= XATTR_NAME_IFLAGS,
> +	.list	= ext4_xattr_iflags_list,
> +	.get	= ext4_xattr_iflags_get,
> +	.set	= ext4_xattr_iflags_set,
> +};
> diff --git a/fs/xattr.c b/fs/xattr.c
> index 3377dff..bf0fe89 100644
> --- a/fs/xattr.c
> +++ b/fs/xattr.c
> @@ -32,6 +32,15 @@ static int
>  xattr_permission(struct inode *inode, const char *name, int mask)
>  {
>  	/*
> +	 * Always allow read/write of inode flag attribute.
> +	 */
> +	if (!strcmp(name, XATTR_NAME_IFLAGS)) {
> +		if (mask & MAY_WRITE)
> +			return inode_owner_or_capable(inode) ? 0 : -EACCES;
> +		return 0;
> +	}
> +
> +	/*
>  	 * We can never set or remove an extended attribute on a read-only
>  	 * filesystem  or on an immutable / append-only inode.
>  	 */
> diff --git a/include/linux/strflags.h b/include/linux/strflags.h
> new file mode 100644
> index 0000000..6c8067c
> --- /dev/null
> +++ b/include/linux/strflags.h
> @@ -0,0 +1,27 @@
> +/*
> + * File: linux/strflags.h
> + * Convert flags to pretty strings.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#ifndef _LINUX_STRFLAGS_H_
> +#define _LINUX_STRFLAGS_H_
> +
> +#include <linux/types.h>
> +
> +struct flag_map {
> +	const char *string;
> +	unsigned long flag;
> +};
> +
> +struct flag_db {
> +	size_t flag_sz;
> +	struct flag_map maps[];
> +};
> +
> +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
> +		    size_t buf_len);
> +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buf_len,
> +		    void *flags);
> +
> +#endif /* _LINUX_STRFLAGS_H_ */
> diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
> index e4629b9..8cd78ae 100644
> --- a/include/uapi/linux/xattr.h
> +++ b/include/uapi/linux/xattr.h
> @@ -63,5 +63,7 @@
>  #define XATTR_POSIX_ACL_DEFAULT  "posix_acl_default"
>  #define XATTR_NAME_POSIX_ACL_DEFAULT XATTR_SYSTEM_PREFIX XATTR_POSIX_ACL_DEFAULT
>  
> +#define XATTR_IFLAGS "iflags"
> +#define XATTR_NAME_IFLAGS XATTR_SYSTEM_PREFIX XATTR_IFLAGS
>  
>  #endif /* _UAPI_LINUX_XATTR_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index a459c31..f3a517d 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>  	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
>  	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
>  	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
> -	 percpu-refcount.o percpu_ida.o
> +	 percpu-refcount.o percpu_ida.o strflags.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
>  obj-y += kstrtox.o
> diff --git a/lib/strflags.c b/lib/strflags.c
> new file mode 100644
> index 0000000..d1a34e7
> --- /dev/null
> +++ b/lib/strflags.c
> @@ -0,0 +1,117 @@
> +/*
> + * File: lib/strflags.c
> + * Convert flags to pretty strings.
> + *
> + * Copyright (C) 2014 Oracle, Darrick J. Wong <darrick.wong@oracle.com>
> + */
> +#include <linux/kernel.h>
> +#include <linux/string.h>
> +#include <linux/strflags.h>
> +#include <linux/errno.h>
> +
> +int flags_to_string(struct flag_db *flagdb, void *flags, char *buf,
> +		    size_t buf_len)
> +{
> +	struct flag_map *mapping;
> +	char *b;
> +	unsigned long fl;
> +	int used, first;
> +	size_t sz;
> +
> +	switch (flagdb->flag_sz) {
> +	case 1:
> +		fl = *(u8 *)flags;
> +		break;
> +	case 2:
> +		fl = *(u16 *)flags;
> +		break;
> +	case 4:
> +		fl = *(u32 *)flags;
> +		break;
> +	case 8:
> +		fl = *(u64 *)flags;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	/* Calculating buffer size */
> +	sz = 0;
> +	for (mapping = flagdb->maps; mapping->string; mapping++) {
> +		if (mapping->flag & fl) {
> +			sz += strlen(mapping->string);
> +			sz++;
> +		}
> +	}
> +
> +	if (buf == NULL)
> +		return sz;
> +	if (buf_len < sz)
> +		return -ERANGE;
> +
> +	/* Stringify the flags */
> +	b = buf;
> +	first = 1;
> +	for (mapping = flagdb->maps; mapping->string; mapping++) {
> +		if (mapping->flag & fl) {
> +			used = snprintf(b, buf_len, "%s%s",
> +					(first ? "" : ","),
> +					mapping->string);
> +			first = 0;
> +			buf_len -= used;
> +			b += used;
> +		}
> +	}
> +
> +	return b - buf;
> +}
> +EXPORT_SYMBOL_GPL(flags_to_string);
> +
> +int string_to_flags(struct flag_db *flagdb, char *buf, size_t buflen,
> +		    void *flags)
> +{
> +	struct flag_map *mapping;
> +	unsigned long fl = 0;
> +	char *p, *q;
> +	char c;
> +	int found;
> +
> +	for (p = buf; p < buf + buflen; p = q + 1) {
> +		for (q = p; *q != 0 && *q != ','; q++)
> +			; /* empty */
> +		if (p == q)
> +			continue;
> +
> +		c = *q; *q = 0;
> +		found = 0;
> +		for (mapping = flagdb->maps; mapping->string; mapping++)
> +			if (strcmp(p, mapping->string) == 0) {
> +				fl |= mapping->flag;
> +				found = 1;
> +			}
> +		if (!found)
> +			return -EINVAL;
> +		*q = c;
> +		p = q + 1;
> +	}
> +
> +	switch (flagdb->flag_sz) {
> +	case 1:
> +		*(u8 *)flags = fl;
> +		break;
> +	case 2:
> +		*(u16 *)flags = fl;
> +		break;
> +	case 4:
> +		*(u32 *)flags = fl;
> +		break;
> +	case 8:
> +		*(u64 *)flags = fl;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(string_to_flags);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-- 
Jan Kara <jack@suse.cz>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 12:48 ` Jan Kara
@ 2014-01-07 15:49   ` Christoph Hellwig
  2014-01-07 17:04     ` Theodore Ts'o
  0 siblings, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2014-01-07 15:49 UTC (permalink / raw)
  To: Jan Kara
  Cc: Darrick J. Wong, Aurelien Jarno, Theodore Ts'o,
	Alexander Viro, Rob Browning, Dave Chinner, Miklos Szeredi,
	linux-fsdevel, linux-kernel, linux-ext4

On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
>   I have to say I'm not thrilled by the idea of juggling strings in
> userspace and in kernel to set a flag for an inode...

Nevermind the massive amounts of code that sit in the filesystem.

Although my recent ACL patches are the first step towards handling the
existing semantically overloaded xattrs in common code.  Unless Al has
a valid reason to disagree I'd like to put a big NAK on adding any new
xattrs that aren't stored on disk as-is but provide magic functionality,
as they are pain to implement, maintain, and audit.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 15:49   ` Christoph Hellwig
@ 2014-01-07 17:04     ` Theodore Ts'o
  2014-01-07 19:43       ` Darrick J. Wong
  0 siblings, 1 reply; 10+ messages in thread
From: Theodore Ts'o @ 2014-01-07 17:04 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jan Kara, Darrick J. Wong, Aurelien Jarno, Alexander Viro,
	Rob Browning, Dave Chinner, Miklos Szeredi, linux-fsdevel,
	linux-kernel, linux-ext4

On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> >   I have to say I'm not thrilled by the idea of juggling strings in
> > userspace and in kernel to set a flag for an inode...
> 
> Nevermind the massive amounts of code that sit in the filesystem.

The reason for this patch was to address what Dave Chinner has called
"a shitty interface"[1].  Using bitfields that need to be coordinated
across file systems, when sometimes a bit assignment is validly a fs
specific thing, and then later becomes something that gets shared
across file systems.

[1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396

If we don't go about it this way, there are alternatives: we could
create new ioctls (or a new syscall) as we start running out of bits
used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
are intended for fs-specific flags, which then later get promoted to
the new syscall when some functionality starts to get shared accross
other file systems (probably with a different bit assignment).  This
is certainly less code, but it does mean more complexity outside of
the code when we try to coordinate new functionality across file
systems.

Personally, I don't mind dealing with codepoint assignments, but my
impression is that this is a minority viewpoint.  Al and Linus have
historically hated bitfields, and Al in the past has spoken favorably
of Plan 9's approach of using strings for the system interface.

So while I have a preference towards using bitfields, as opposed to
using the xattr approach, what I'd really like is that we make a
decision, one way or another, about what's the best way to move
forward.

   	    	     	      	      	  - Ted

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 17:04     ` Theodore Ts'o
@ 2014-01-07 19:43       ` Darrick J. Wong
  2014-01-07 19:59         ` Chris Mason
  2014-01-07 22:27         ` Theodore Ts'o
  0 siblings, 2 replies; 10+ messages in thread
From: Darrick J. Wong @ 2014-01-07 19:43 UTC (permalink / raw)
  To: Theodore Ts'o, Christoph Hellwig, Jan Kara, Aurelien Jarno,
	Alexander Viro, Rob Browning, Dave Chinner, Miklos Szeredi,
	linux-fsdevel, linux-kernel, linux-ext4

On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > userspace and in kernel to set a flag for an inode...
> > 
> > Nevermind the massive amounts of code that sit in the filesystem.
> 
> The reason for this patch was to address what Dave Chinner has called
> "a shitty interface"[1].  Using bitfields that need to be coordinated
> across file systems, when sometimes a bit assignment is validly a fs
> specific thing, and then later becomes something that gets shared
> across file systems.
> 
> [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> 
> If we don't go about it this way, there are alternatives: we could
> create new ioctls (or a new syscall) as we start running out of bits
> used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> are intended for fs-specific flags, which then later get promoted to
> the new syscall when some functionality starts to get shared accross
> other file systems (probably with a different bit assignment).  This
> is certainly less code, but it does mean more complexity outside of
> the code when we try to coordinate new functionality across file
> systems.

I had thought of indexed inode flags as an alternative to the xattr/string
parsing thing.  Feature flags make their first appearance as part of a per-FS
flag-space and are migrated to the common flag-space when there is demand.
It would also avoid the need for each fs to create its own flag ioctl.

On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
so off I went. :)

#define FS_IOC_FLAGS_COMMON	0
#define FS_IOC_FLAGS_COMMON2	1
#define FS_IOC_FLAGS_EXT4	0xEF53

struct inode_flag_ioctl {
	u32 flag;
	u32 value;	/* or u64? */
};
#define FS_IOC_GETFLAGS2 _IOR('f', 12, struct inode_flag_ioctl);
#define FS_IOC_SETFLAGS2 _IOW('f', 13, struct inode_flag_ioctl);

foo() {
	struct inode_flag_ioctl if;

	if.flag = FS_IOC_FLAGS_COMMON;
	ioctl(fd, FS_IOC_GETFLAGS2, &if);
	printf("%d\n", if.value);

	if.flag = FS_IOC_FLAGS_EXT4;
	if.value = EXT5_BONGHITS_FL | EXT4_EA_INODE_FL;
	ioctl(fd, FS_IOC_SETFLAGS2, &if);
}

> Personally, I don't mind dealing with codepoint assignments, but my
> impression is that this is a minority viewpoint.  Al and Linus have
> historically hated bitfields, and Al in the past has spoken favorably
> of Plan 9's approach of using strings for the system interface.

I prefer strings too, but I suppose one pays for the complexity.  Given that
all the flags so far seem to have been booleans, this could be good enough.

> So while I have a preference towards using bitfields, as opposed to
> using the xattr approach, what I'd really like is that we make a
> decision, one way or another, about what's the best way to move
> forward.

Agreed.

--D
> 
>    	    	     	      	      	  - Ted
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 19:43       ` Darrick J. Wong
@ 2014-01-07 19:59         ` Chris Mason
  2014-01-07 22:02           ` Darrick J. Wong
  2014-01-07 22:27         ` Theodore Ts'o
  1 sibling, 1 reply; 10+ messages in thread
From: Chris Mason @ 2014-01-07 19:59 UTC (permalink / raw)
  To: darrick.wong
  Cc: linux-kernel, hch, miklos, viro, rlb, linux-fsdevel, aurelien,
	tytso, linux-ext4, david, jack

On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > userspace and in kernel to set a flag for an inode...
> > > 
> > > Nevermind the massive amounts of code that sit in the filesystem.
> > 
> > The reason for this patch was to address what Dave Chinner has called
> > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > across file systems, when sometimes a bit assignment is validly a fs
> > specific thing, and then later becomes something that gets shared
> > across file systems.
> > 
> > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > 
> > If we don't go about it this way, there are alternatives: we could
> > create new ioctls (or a new syscall) as we start running out of bits
> > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > are intended for fs-specific flags, which then later get promoted to
> > the new syscall when some functionality starts to get shared accross
> > other file systems (probably with a different bit assignment).  This
> > is certainly less code, but it does mean more complexity outside of
> > the code when we try to coordinate new functionality across file
> > systems.
> 
> I had thought of indexed inode flags as an alternative to the xattr/string
> parsing thing.  Feature flags make their first appearance as part of a per-FS
> flag-space and are migrated to the common flag-space when there is demand.
> It would also avoid the need for each fs to create its own flag ioctl.
> 
> On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> so off I went. :)
> 

At least in btrfs xattrs are more expensive than something right in the
inode.  We can cache it when we load the inode (it'll be right next to
the inode most of the time) but for performance critical things I do
like the good old fashioned flags.

It's also possible to turn xattrs off, so we have to deal with
filesystems that are mounted with them off and then back on again.  I
can't think of huge problems from that right now, just something to be
aware of.

-chris


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 19:59         ` Chris Mason
@ 2014-01-07 22:02           ` Darrick J. Wong
  2014-01-07 22:08             ` Chris Mason
  0 siblings, 1 reply; 10+ messages in thread
From: Darrick J. Wong @ 2014-01-07 22:02 UTC (permalink / raw)
  To: Chris Mason
  Cc: linux-kernel, hch, miklos, viro, rlb, linux-fsdevel, aurelien,
	tytso, linux-ext4, david, jack

On Tue, Jan 07, 2014 at 07:59:15PM +0000, Chris Mason wrote:
> On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> > On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > > userspace and in kernel to set a flag for an inode...
> > > > 
> > > > Nevermind the massive amounts of code that sit in the filesystem.
> > > 
> > > The reason for this patch was to address what Dave Chinner has called
> > > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > > across file systems, when sometimes a bit assignment is validly a fs
> > > specific thing, and then later becomes something that gets shared
> > > across file systems.
> > > 
> > > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > > 
> > > If we don't go about it this way, there are alternatives: we could
> > > create new ioctls (or a new syscall) as we start running out of bits
> > > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > > are intended for fs-specific flags, which then later get promoted to
> > > the new syscall when some functionality starts to get shared accross
> > > other file systems (probably with a different bit assignment).  This
> > > is certainly less code, but it does mean more complexity outside of
> > > the code when we try to coordinate new functionality across file
> > > systems.
> > 
> > I had thought of indexed inode flags as an alternative to the xattr/string
> > parsing thing.  Feature flags make their first appearance as part of a per-FS
> > flag-space and are migrated to the common flag-space when there is demand.
> > It would also avoid the need for each fs to create its own flag ioctl.
> > 
> > On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> > so off I went. :)
> > 
> 
> At least in btrfs xattrs are more expensive than something right in the
> inode.  We can cache it when we load the inode (it'll be right next to
> the inode most of the time) but for performance critical things I do
> like the good old fashioned flags.

Just to clarify -- I wasn't proposing any on-disk changes for any filesystems,
merely creating virtual xattrs that wrap the inode flags.

> It's also possible to turn xattrs off, so we have to deal with
> filesystems that are mounted with them off and then back on again.  I
> can't think of huge problems from that right now, just something to be
> aware of.

Just to satisfy my curiosity: are xattrs always separate objects in btrfs?

--D
> 
> -chris
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [RFC PATCH v2] fs: new FS_IOC_[GS]ETFLAGS2 interface
  2014-01-07  2:58 [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface Darrick J. Wong
  2014-01-07 12:48 ` Jan Kara
@ 2014-01-07 22:04 ` Darrick J. Wong
  1 sibling, 0 replies; 10+ messages in thread
From: Darrick J. Wong @ 2014-01-07 22:04 UTC (permalink / raw)
  To: Aurelien Jarno, Theodore Ts'o, Alexander Viro, Rob Browning,
	Dave Chinner, Miklos Szeredi
  Cc: linux-fsdevel, linux-kernel, linux-ext4

This is a(nother) proof of concept interface for replacing the
contentious FS_IOC_[GS]ETFLAGS interface with a new ioctl that allows
for a dramatic increase in the size of the flag-space.

In this new interface, the new [GS]ETFLAGS functions take a flag
number and either return or set the flag value.  This will let us
put all the 'common' flags into a common flag-space and create
specific flag-spaces for each filesystem's unique features.

The initial conversion is for ext4, though given the similarities of
most filesystems' implementations, converting the rest shouldn't be
difficult.  It also pretends to service GETVERSION as an example of an
ext4-specific "flag".

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 Documentation/filesystems/inode-flags-ioctl.txt |   89 ++++++++
 fs/ext4/ioctl.c                                 |  257 ++++++++++++++---------
 include/uapi/linux/fs.h                         |    9 +
 3 files changed, 256 insertions(+), 99 deletions(-)
 create mode 100644 Documentation/filesystems/inode-flags-ioctl.txt

diff --git a/Documentation/filesystems/inode-flags-ioctl.txt b/Documentation/filesystems/inode-flags-ioctl.txt
new file mode 100644
index 0000000..7244c37
--- /dev/null
+++ b/Documentation/filesystems/inode-flags-ioctl.txt
@@ -0,0 +1,89 @@
+This is the new inode flags ioctl interface.  Whereas before we could only work
+with a single value whose size changed depending on the platform, this new
+interface establishes the capability to store 2^32 (flagspace, value) tuples
+per inode.  Both key and value are defined to be an unsigned 32-bit quantity.
+
+To use this interface, we define struct inode_flag_ioctl.  The 'flag' field
+contains an identifier for the flag space that you want.  This is
+FS_IOC_FLAGS_COMMON for the old [GS]ETFLAGS functionality, or a FS-specific
+value.  The 'value' field contains the value that you wish to get or set.
+
+Here is an example program:
+/*
+ * Get/set flags via FS_IOC_[GS]ETFLAGS2 interface
+ * Copyright (C) 2014 Oracle, Darrick J. Wong
+ * Licensed under the GPLv2.
+ */
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <asm/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef FS_IOC_GETFLAGS2
+struct inode_flag_ioctl {
+	__u32 flag;
+	__u32 value;
+};
+#define FS_IOC_GETFLAGS2 		_IOR('f', 12, struct inode_flag_ioctl)
+#define FS_IOC_SETFLAGS2 		_IOW('f', 13, struct inode_flag_ioctl)
+#endif /* FS_IOC_GETFLAGS2 */
+
+int main(int argc, char *argv[])
+{
+	struct inode_flag_ioctl fi;
+	__u32 key, value;
+	int set, ret, opt, i, fd;
+
+	if (argc < 2) {
+		printf("Usage: %s [-g key|-s key] [-v value] files\n",
+		       argv[0]);
+		return 1;
+	}
+
+	set = key = value = 0;
+	while ((opt = getopt(argc, argv, "g:s:v:")) != -1) {
+		switch (opt) {
+		case 'g':
+			set = 0;
+			key = strtol(optarg, NULL, 0);
+			break;
+		case 's':
+			set = 1;
+			key = strtol(optarg, NULL, 0);
+			break;
+		case 'v':
+			value = strtol(optarg, NULL, 0);
+			break;
+		default:
+			printf("Usage: %s [-g key|-s key] [-v value] files\n",
+			       argv[0]);
+			return 1;
+		}
+	}
+
+	fi.flag = key;
+	for (i = optind; i < argc; i++) {
+		fd = open(argv[i], O_RDONLY);
+		if (fd < 0) {
+			perror(argv[i]);
+			continue;
+		}
+		if (set)
+			fi.value = value;
+		ret = ioctl(fd, (set ? FS_IOC_SETFLAGS2 : FS_IOC_GETFLAGS2),
+			    &fi);
+		close(fd);
+		if (ret) {
+			perror(argv[i]);
+			continue;
+		}
+		if (!set)
+			printf("%s: 0x%x = 0x%x\n", argv[i], key, fi.value);
+	}
+
+	return 0;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 60589b6..9de72d5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -213,128 +213,187 @@ swap_boot_out:
 	return err;
 }
 
-long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static int ext4_ioctl_setflags(struct file *filp, unsigned int flags)
 {
 	struct inode *inode = file_inode(filp);
-	struct super_block *sb = inode->i_sb;
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned int flags;
+	handle_t *handle = NULL;
+	int err, migrate = 0;
+	struct ext4_iloc iloc;
+	unsigned int oldflags, mask, i;
+	unsigned int jflag;
 
-	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
 
-	switch (cmd) {
-	case EXT4_IOC_GETFLAGS:
-		ext4_get_inode_flags(ei);
-		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
-		return put_user(flags, (int __user *) arg);
-	case EXT4_IOC_SETFLAGS: {
-		handle_t *handle = NULL;
-		int err, migrate = 0;
-		struct ext4_iloc iloc;
-		unsigned int oldflags, mask, i;
-		unsigned int jflag;
+	err = mnt_want_write_file(filp);
+	if (err)
+		return err;
 
-		if (!inode_owner_or_capable(inode))
-			return -EACCES;
+	flags = ext4_mask_flags(inode->i_mode, flags);
 
-		if (get_user(flags, (int __user *) arg))
-			return -EFAULT;
+	err = -EPERM;
+	mutex_lock(&inode->i_mutex);
+	/* Is it quota file? Do not allow user to mess with it */
+	if (IS_NOQUOTA(inode))
+		goto flags_out;
 
-		err = mnt_want_write_file(filp);
-		if (err)
-			return err;
+	oldflags = ei->i_flags;
 
-		flags = ext4_mask_flags(inode->i_mode, flags);
+	/* The JOURNAL_DATA flag is modifiable only by root */
+	jflag = flags & EXT4_JOURNAL_DATA_FL;
 
-		err = -EPERM;
-		mutex_lock(&inode->i_mutex);
-		/* Is it quota file? Do not allow user to mess with it */
-		if (IS_NOQUOTA(inode))
+	/*
+	 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+	 * the relevant capability.
+	 *
+	 * This test looks nicer. Thanks to Pauline Middelink
+	 */
+	if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+		if (!capable(CAP_LINUX_IMMUTABLE))
 			goto flags_out;
+	}
 
-		oldflags = ei->i_flags;
-
-		/* The JOURNAL_DATA flag is modifiable only by root */
-		jflag = flags & EXT4_JOURNAL_DATA_FL;
-
-		/*
-		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
-		 * the relevant capability.
-		 *
-		 * This test looks nicer. Thanks to Pauline Middelink
-		 */
-		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
-			if (!capable(CAP_LINUX_IMMUTABLE))
-				goto flags_out;
-		}
-
-		/*
-		 * The JOURNAL_DATA flag can only be changed by
-		 * the relevant capability.
-		 */
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
-			if (!capable(CAP_SYS_RESOURCE))
-				goto flags_out;
-		}
-		if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
-			migrate = 1;
-
-		if (flags & EXT4_EOFBLOCKS_FL) {
-			/* we don't support adding EOFBLOCKS flag */
-			if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
-				err = -EOPNOTSUPP;
-				goto flags_out;
-			}
-		} else if (oldflags & EXT4_EOFBLOCKS_FL)
-			ext4_truncate(inode);
+	/*
+	 * The JOURNAL_DATA flag can only be changed by
+	 * the relevant capability.
+	 */
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+		if (!capable(CAP_SYS_RESOURCE))
+			goto flags_out;
+	}
+	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+		migrate = 1;
 
-		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
-		if (IS_ERR(handle)) {
-			err = PTR_ERR(handle);
+	if (flags & EXT4_EOFBLOCKS_FL) {
+		/* we don't support adding EOFBLOCKS flag */
+		if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+			err = -EOPNOTSUPP;
 			goto flags_out;
 		}
-		if (IS_SYNC(inode))
-			ext4_handle_sync(handle);
-		err = ext4_reserve_inode_write(handle, inode, &iloc);
-		if (err)
-			goto flags_err;
-
-		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
-			if (!(mask & EXT4_FL_USER_MODIFIABLE))
-				continue;
-			if (mask & flags)
-				ext4_set_inode_flag(inode, i);
-			else
-				ext4_clear_inode_flag(inode, i);
-		}
+	} else if (oldflags & EXT4_EOFBLOCKS_FL)
+		ext4_truncate(inode);
 
-		ext4_set_inode_flags(inode);
-		inode->i_ctime = ext4_current_time(inode);
+	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto flags_out;
+	}
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto flags_err;
+
+	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+		if (!(mask & EXT4_FL_USER_MODIFIABLE))
+			continue;
+		if (mask & flags)
+			ext4_set_inode_flag(inode, i);
+		else
+			ext4_clear_inode_flag(inode, i);
+	}
 
-		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
-		ext4_journal_stop(handle);
-		if (err)
-			goto flags_out;
+	ext4_set_inode_flags(inode);
+	inode->i_ctime = ext4_current_time(inode);
 
-		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
-			err = ext4_change_inode_journal_flag(inode, jflag);
-		if (err)
-			goto flags_out;
-		if (migrate) {
-			if (flags & EXT4_EXTENTS_FL)
-				err = ext4_ext_migrate(inode);
-			else
-				err = ext4_ind_migrate(inode);
-		}
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+	ext4_journal_stop(handle);
+	if (err)
+		goto flags_out;
+
+	if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+		err = ext4_change_inode_journal_flag(inode, jflag);
+	if (err)
+		goto flags_out;
+	if (migrate) {
+		if (flags & EXT4_EXTENTS_FL)
+			err = ext4_ext_migrate(inode);
+		else
+			err = ext4_ind_migrate(inode);
+	}
 
 flags_out:
-		mutex_unlock(&inode->i_mutex);
-		mnt_drop_write_file(filp);
-		return err;
+	mutex_unlock(&inode->i_mutex);
+	mnt_drop_write_file(filp);
+	return err;
+}
+
+static int ext4_ioctl_getflags2(struct file *filp, void __user *arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct inode_flag_ioctl fi;
+
+	if (copy_from_user(&fi, arg, sizeof(fi)))
+		return -EFAULT;
+
+	switch (fi.flag) {
+	case FS_IOC_FLAGS_COMMON:
+		ext4_get_inode_flags(ei);
+		fi.value = ei->i_flags & EXT4_FL_USER_VISIBLE;
+		break;
+	case FS_IOC_FLAGS_EXT4_IVERSION:
+		fi.value = inode->i_generation;
+		break;
+	default:
+		return -EINVAL;
 	}
+
+	if (copy_to_user(arg, &fi, sizeof(fi)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ext4_ioctl_setflags2(struct file *filp, void __user *arg)
+{
+	struct inode_flag_ioctl fi;
+
+	if (copy_from_user(&fi, arg, sizeof(fi)))
+		return -EFAULT;
+
+	switch (fi.flag) {
+	case FS_IOC_FLAGS_COMMON:
+		return ext4_ioctl_setflags(filp, fi.value);
+	case FS_IOC_FLAGS_EXT4_IVERSION:
+		/* not supported right now */
+		return -ENOTTY;
+	}
+	return -EINVAL;
+}
+
+long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct super_block *sb = inode->i_sb;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned int flags;
+
+	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
+
+	switch (cmd) {
+	case FS_IOC_GETFLAGS2:
+		return ext4_ioctl_getflags2(filp, (void __user *)arg);
+	case FS_IOC_SETFLAGS2:
+		return ext4_ioctl_setflags2(filp, (void __user *)arg);
+	case EXT4_IOC_GETFLAGS:
+		printk_once(KERN_WARNING "EXT4-fs: FS_IOC_GETFLAGS is "
+			    "deprecated; use FS_IOC_GETFLAGS2.\n");
+		ext4_get_inode_flags(ei);
+		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case EXT4_IOC_SETFLAGS:
+		printk_once(KERN_WARNING "EXT4-fs: FS_IOC_SETFLAGS is "
+			    "deprecated; use FS_IOC_SETFLAGS2.\n");
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+		return ext4_ioctl_setflags(filp, flags);
 	case EXT4_IOC_GETVERSION:
 	case EXT4_IOC_GETVERSION_OLD:
+		printk_once(KERN_WARNING "EXT4-fs: FS_IOC_GETVERSION is "
+			    "deprecated; use FS_IOC_GETFLAGS2.\n");
 		return put_user(inode->i_generation, (int __user *) arg);
 	case EXT4_IOC_SETVERSION:
 	case EXT4_IOC_SETVERSION_OLD: {
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 6c28b61..806165f 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -196,6 +196,15 @@ struct inodes_stat_t {
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
 #define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
+/* New inode flags API */
+#define FS_IOC_FLAGS_COMMON		0x00000000 /* common inode flags */
+#define FS_IOC_FLAGS_EXT4_IVERSION	0x0000EF53 /* ext4 inode version */
+struct inode_flag_ioctl {
+	__u32 flag;
+	__u32 value;
+};
+#define FS_IOC_GETFLAGS2		_IOR('f', 12, struct inode_flag_ioctl)
+#define FS_IOC_SETFLAGS2		_IOW('f', 13, struct inode_flag_ioctl)
 
 #define SYNC_FILE_RANGE_WAIT_BEFORE	1
 #define SYNC_FILE_RANGE_WRITE		2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 22:02           ` Darrick J. Wong
@ 2014-01-07 22:08             ` Chris Mason
  0 siblings, 0 replies; 10+ messages in thread
From: Chris Mason @ 2014-01-07 22:08 UTC (permalink / raw)
  To: darrick.wong
  Cc: linux-kernel, hch, miklos, viro, rlb, linux-fsdevel, aurelien,
	tytso, linux-ext4, david, jack

On Tue, 2014-01-07 at 14:02 -0800, Darrick J. Wong wrote:
> On Tue, Jan 07, 2014 at 07:59:15PM +0000, Chris Mason wrote:
> > On Tue, 2014-01-07 at 11:43 -0800, Darrick J. Wong wrote:
> > > On Tue, Jan 07, 2014 at 12:04:30PM -0500, Theodore Ts'o wrote:
> > > > On Tue, Jan 07, 2014 at 07:49:35AM -0800, Christoph Hellwig wrote:
> > > > > On Tue, Jan 07, 2014 at 01:48:31PM +0100, Jan Kara wrote:
> > > > > >   I have to say I'm not thrilled by the idea of juggling strings in
> > > > > > userspace and in kernel to set a flag for an inode...
> > > > > 
> > > > > Nevermind the massive amounts of code that sit in the filesystem.
> > > > 
> > > > The reason for this patch was to address what Dave Chinner has called
> > > > "a shitty interface"[1].  Using bitfields that need to be coordinated
> > > > across file systems, when sometimes a bit assignment is validly a fs
> > > > specific thing, and then later becomes something that gets shared
> > > > across file systems.
> > > > 
> > > > [1] http://thread.gmane.org/gmane.linux.file-systems/80164/focus=80396
> > > > 
> > > > If we don't go about it this way, there are alternatives: we could
> > > > create new ioctls (or a new syscall) as we start running out of bits
> > > > used by FS_IOC_[GS]ETFLAGS.  We can create new ioctls for bits which
> > > > are intended for fs-specific flags, which then later get promoted to
> > > > the new syscall when some functionality starts to get shared accross
> > > > other file systems (probably with a different bit assignment).  This
> > > > is certainly less code, but it does mean more complexity outside of
> > > > the code when we try to coordinate new functionality across file
> > > > systems.
> > > 
> > > I had thought of indexed inode flags as an alternative to the xattr/string
> > > parsing thing.  Feature flags make their first appearance as part of a per-FS
> > > flag-space and are migrated to the common flag-space when there is demand.
> > > It would also avoid the need for each fs to create its own flag ioctl.
> > > 
> > > On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> > > so off I went. :)
> > > 
> > 
> > At least in btrfs xattrs are more expensive than something right in the
> > inode.  We can cache it when we load the inode (it'll be right next to
> > the inode most of the time) but for performance critical things I do
> > like the good old fashioned flags.
> 
> Just to clarify -- I wasn't proposing any on-disk changes for any filesystems,
> merely creating virtual xattrs that wrap the inode flags.

Ah, sorry I missed that part.  I was assuming that as we run out of
flags we'll want to use real xattrs instead.

> 
> > It's also possible to turn xattrs off, so we have to deal with
> > filesystems that are mounted with them off and then back on again.  I
> > can't think of huge problems from that right now, just something to be
> > aware of.
> 
> Just to satisfy my curiosity: are xattrs always separate objects in btrfs?

Strictly speaking, yes.  They aren't in the inode struct.  But most of
the time they will be in the same btree block.  For anything performance
critical we can order the keys to push them to the front.

-chris

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface
  2014-01-07 19:43       ` Darrick J. Wong
  2014-01-07 19:59         ` Chris Mason
@ 2014-01-07 22:27         ` Theodore Ts'o
  1 sibling, 0 replies; 10+ messages in thread
From: Theodore Ts'o @ 2014-01-07 22:27 UTC (permalink / raw)
  To: Darrick J. Wong
  Cc: Christoph Hellwig, Jan Kara, Aurelien Jarno, Alexander Viro,
	Rob Browning, Dave Chinner, Miklos Szeredi, linux-fsdevel,
	linux-kernel, linux-ext4

On Tue, Jan 07, 2014 at 11:43:56AM -0800, Darrick J. Wong wrote:
> I had thought of indexed inode flags as an alternative to the xattr/string
> parsing thing.  Feature flags make their first appearance as part of a per-FS
> flag-space and are migrated to the common flag-space when there is demand.
> It would also avoid the need for each fs to create its own flag ioctl.
> 
> On the other hand, someone suggested I try remaking IOC_[GS]ETFLAG as an xattr,
> so off I went. :)
> 
> #define FS_IOC_FLAGS_COMMON	0
> #define FS_IOC_FLAGS_COMMON2	1
> #define FS_IOC_FLAGS_EXT4	0xEF53
> 
> struct inode_flag_ioctl {
> 	u32 flag;
> 	u32 value;	/* or u64? */
> };
> #define FS_IOC_GETFLAGS2 _IOR('f', 12, struct inode_flag_ioctl);
> #define FS_IOC_SETFLAGS2 _IOW('f', 13, struct inode_flag_ioctl);

Is having this structure and demultiplexing based on
inode_flag_ioctl.flag really worth it?

I'd just simply introduce two new ioctl's for generic flags:
FS_IOC_COMMON_[GS]ETFLAGS, and then two new ioctl's for each file
system: FS_IOC_EXT4_[GS]ETFLAGS, FS_IOC_BTRFS_[GS]ETFLAGS, etc.

Is this uglier or pretier than using strings?  Eh.... six of one, half
dozen of the other.  I think it's mostly a matter of personal taste.

      	     	       	     	  	 - Ted

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2014-01-07 22:28 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-01-07  2:58 [RFC PATCH] fs: xattr-based FS_IOC_[GS]ETFLAGS interface Darrick J. Wong
2014-01-07 12:48 ` Jan Kara
2014-01-07 15:49   ` Christoph Hellwig
2014-01-07 17:04     ` Theodore Ts'o
2014-01-07 19:43       ` Darrick J. Wong
2014-01-07 19:59         ` Chris Mason
2014-01-07 22:02           ` Darrick J. Wong
2014-01-07 22:08             ` Chris Mason
2014-01-07 22:27         ` Theodore Ts'o
2014-01-07 22:04 ` [RFC PATCH v2] fs: new FS_IOC_[GS]ETFLAGS2 interface Darrick J. Wong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.