All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/12] e2fsck: add support for large xattrs in external inodes
@ 2017-06-26 13:43 Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 02/12] tune2fs: do not allow disabling ea_inode feature Tahsin Erdogan
                   ` (11 more replies)
  0 siblings, 12 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Andreas Dilger, Kalpak Shah, Tahsin Erdogan

From: Andreas Dilger <andreas.dilger@intel.com>

Add support for the INCOMPAT_EA_INODE feature, which stores large
extended attributes into an external inode instead of data blocks.
The inode is referenced by the e_value_inum field (formerly the
unused e_value_block field) from the extent header, and stores the
xattr data starting at byte offset 0 in the inode data block.

The xattr inode stores the referring inode number in its i_mtime,
and the parent i_generation in its own i_generation, so that there
is a solid linkage between the two that e2fsck can verify.  The
xattr inode is itself marked with EXT4_EA_INODE_FL as well.

Signed-off-by: Kalpak Shah <kalpak.shah@sun.com>
Signed-off-by: Andreas Dilger <andreas.dilger@intel.com>
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/e2fsck.h            |   1 +
 e2fsck/pass1.c             | 128 ++++++++++++++++++++++++++++++++++-----------
 e2fsck/pass4.c             |  18 +++++++
 e2fsck/problem.c           |  22 ++++++++
 e2fsck/problem.h           |  14 +++++
 lib/ext2fs/ext2_ext_attr.h |   2 +-
 lib/ext2fs/ext2_fs.h       |   3 +-
 lib/ext2fs/ext2fs.h        |   1 +
 lib/ext2fs/ext_attr.c      | 116 +++++++++++++++++++++++++---------------
 lib/ext2fs/swapfs.c        |   2 +-
 misc/mke2fs.c              |   1 +
 misc/tune2fs.c             |   2 +
 12 files changed, 235 insertions(+), 75 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index fd12024c2cd7..c19cdfdd733e 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -254,6 +254,7 @@ struct e2fsck_struct {
 	ext2fs_inode_bitmap inode_bb_map; /* Inodes which are in bad blocks */
 	ext2fs_inode_bitmap inode_imagic_map; /* AFS inodes */
 	ext2fs_inode_bitmap inode_reg_map; /* Inodes which are regular files*/
+	ext2fs_inode_bitmap inode_ea_map; /* EA inodes which are non-orphan */
 
 	ext2fs_block_bitmap block_found_map; /* Blocks which are in use */
 	ext2fs_block_bitmap block_dup_map; /* Blks referenced more than once */
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 422a3d699111..1c68af8990b4 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -26,6 +26,7 @@
  * 	- A bitmap of which blocks are in use.		(block_found_map)
  * 	- A bitmap of which blocks are in use by two inodes	(block_dup_map)
  * 	- The data blocks of the directory inodes.	(dir_map)
+ *	- A bitmap of EA inodes.			(inode_ea_map)
  *
  * Pass 1 is designed to stash away enough information so that the
  * other passes should not need to read in the inode information
@@ -333,6 +334,56 @@ static void check_size(e2fsck_t ctx, struct problem_context *pctx)
 	e2fsck_write_inode(ctx, pctx->ino, pctx->inode, "pass1");
 }
 
+static void mark_inode_ea_map(e2fsck_t ctx, struct problem_context *pctx,
+			      ext2_ino_t ino)
+{
+	if (!ctx->inode_ea_map) {
+		pctx->errcode = ext2fs_allocate_inode_bitmap(ctx->fs,
+					 _("EA inode map"),
+					 &ctx->inode_ea_map);
+		if (pctx->errcode) {
+			fix_problem(ctx, PR_1_ALLOCATE_IBITMAP_ERROR,
+				    pctx);
+			exit(1);
+		}
+	}
+
+	ext2fs_mark_inode_bitmap2(ctx->inode_ea_map, ino);
+}
+
+/*
+ * Check validity of EA inode. Return 0 if EA inode is valid, otherwise return
+ * the problem code.
+ */
+static problem_t check_large_ea_inode(e2fsck_t ctx,
+				      struct ext2_ext_attr_entry *entry,
+				      struct problem_context *pctx)
+{
+	struct ext2_inode inode;
+
+	/* Check if inode is within valid range */
+	if ((entry->e_value_inum < EXT2_FIRST_INODE(ctx->fs->super)) ||
+	    (entry->e_value_inum > ctx->fs->super->s_inodes_count))
+		return PR_1_ATTR_VALUE_EA_INODE;
+
+	e2fsck_read_inode(ctx, entry->e_value_inum, &inode, "pass1");
+	if (!(inode.i_flags & EXT4_EA_INODE_FL)) {
+		/* If EXT4_EA_INODE_FL flag is not present but back-pointer
+		 * matches then we should set this flag */
+		if (inode.i_mtime == pctx->ino &&
+		    inode.i_generation == pctx->inode->i_generation &&
+		    fix_problem(ctx, PR_1_ATTR_SET_EA_INODE_FL, pctx)) {
+			inode.i_flags |= EXT4_EA_INODE_FL;
+			ext2fs_write_inode(ctx->fs, entry->e_value_inum,&inode);
+		} else
+			return PR_1_ATTR_NO_EA_INODE_FL;
+	} else if (inode.i_mtime != pctx->ino ||
+		   inode.i_generation != pctx->inode->i_generation)
+		return PR_1_ATTR_INVAL_EA_INODE;
+
+	return 0;
+}
+
 static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 {
 	struct ext2_super_block *sb = ctx->fs->super;
@@ -391,25 +442,28 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 		/* attribute len eats this space */
 		remain -= EXT2_EXT_ATTR_SIZE(entry->e_name_len);
 
-		/* check value size */
-		if (entry->e_value_size > remain) {
-			pctx->num = entry->e_value_size;
-			problem = PR_1_ATTR_VALUE_SIZE;
-			goto fix;
-		}
+		if (entry->e_value_inum == 0) {
+			/* check value size */
+			if (entry->e_value_size > remain) {
+				pctx->num = entry->e_value_size;
+				problem = PR_1_ATTR_VALUE_SIZE;
+				goto fix;
+			}
 
-		/* e_value_block must be 0 in inode's ea */
-		if (entry->e_value_block != 0) {
-			pctx->num = entry->e_value_block;
-			problem = PR_1_ATTR_VALUE_BLOCK;
-			goto fix;
-		}
+			if (entry->e_value_size &&
+			    region_allocate(region,
+					    sizeof(__u32) + entry->e_value_offs,
+					    EXT2_EXT_ATTR_SIZE(
+						entry->e_value_size))) {
+				problem = PR_1_INODE_EA_ALLOC_COLLISION;
+				goto fix;
+			}
+		} else {
+			problem = check_large_ea_inode(ctx, entry, pctx);
+			if (problem != 0)
+				goto fix;
 
-		if (entry->e_value_size &&
-		    region_allocate(region, sizeof(__u32) + entry->e_value_offs,
-				    EXT2_EXT_ATTR_SIZE(entry->e_value_size))) {
-			problem = PR_1_INODE_EA_ALLOC_COLLISION;
-			goto fix;
+			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
 		}
 
 		hash = ext2fs_ext_attr_hash_entry(entry,
@@ -422,7 +476,10 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 			goto fix;
 		}
 
-		remain -= entry->e_value_size;
+		/* If EA value is stored in external inode then it does not
+		 * consume space here */
+		if (entry->e_value_inum == 0)
+			remain -= entry->e_value_size;
 
 		entry = EXT2_EXT_ATTR_NEXT(entry);
 	}
@@ -2368,19 +2425,28 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 				goto clear_extattr;
 			break;
 		}
-		if (entry->e_value_block != 0) {
-			if (fix_problem(ctx, PR_1_EA_BAD_VALUE, pctx))
-				goto clear_extattr;
-		}
-		if (entry->e_value_offs + entry->e_value_size > fs->blocksize) {
-			if (fix_problem(ctx, PR_1_EA_BAD_VALUE, pctx))
-				goto clear_extattr;
-			break;
-		}
-		if (entry->e_value_size &&
-		    region_allocate(region, entry->e_value_offs,
-				    EXT2_EXT_ATTR_SIZE(entry->e_value_size))) {
-			if (fix_problem(ctx, PR_1_EA_ALLOC_COLLISION, pctx))
+		if (entry->e_value_inum == 0) {
+			if (entry->e_value_offs + entry->e_value_size >
+			    fs->blocksize) {
+				if (fix_problem(ctx, PR_1_EA_BAD_VALUE, pctx))
+					goto clear_extattr;
+				break;
+			}
+			if (entry->e_value_size &&
+			    region_allocate(region, entry->e_value_offs,
+					    EXT2_EXT_ATTR_SIZE(entry->e_value_size))) {
+				if (fix_problem(ctx, PR_1_EA_ALLOC_COLLISION,
+						pctx))
+					goto clear_extattr;
+			}
+		} else {
+			problem_t problem;
+
+			problem = check_large_ea_inode(ctx, entry, pctx);
+			if (problem == 0)
+				mark_inode_ea_map(ctx, pctx,
+						  entry->e_value_inum);
+			else if (fix_problem(ctx, problem, pctx))
 				goto clear_extattr;
 		}
 
diff --git a/e2fsck/pass4.c b/e2fsck/pass4.c
index 8c101fd2805c..e44fc69c1613 100644
--- a/e2fsck/pass4.c
+++ b/e2fsck/pass4.c
@@ -11,6 +11,7 @@
  * Pass 4 frees the following data structures:
  * 	- A bitmap of which inodes are in bad blocks.	(inode_bb_map)
  * 	- A bitmap of which inodes are imagic inodes.	(inode_imagic_map)
+ *	- A bitmap of EA inodes.			(inode_ea_map)
  */
 
 #include "config.h"
@@ -38,6 +39,21 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
 			       "pass4: disconnect_inode");
 	if (EXT2_INODE_SIZE(fs->super) > EXT2_GOOD_OLD_INODE_SIZE)
 		extra_size = inode->i_extra_isize;
+
+	if (inode->i_flags & EXT4_EA_INODE_FL) {
+		if (ext2fs_test_inode_bitmap2(ctx->inode_ea_map, i)) {
+			ext2fs_icount_store(ctx->inode_count, i, 1);
+			return 0;
+		} else {
+			/* Zero the link count so that when inode is linked to
+			 * lost+found it has correct link count */
+			inode->i_links_count = 0;
+			e2fsck_write_inode(ctx, i, EXT2_INODE(inode),
+					   "disconnect_inode");
+			ext2fs_icount_store(ctx->inode_link_info, i, 0);
+		}
+	}
+
 	clear_problem_context(&pctx);
 	pctx.ino = i;
 	pctx.inode = EXT2_INODE(inode);
@@ -195,6 +211,8 @@ void e2fsck_pass4(e2fsck_t ctx)
 	}
 	ext2fs_free_icount(ctx->inode_link_info); ctx->inode_link_info = 0;
 	ext2fs_free_icount(ctx->inode_count); ctx->inode_count = 0;
+	ext2fs_free_inode_bitmap(ctx->inode_ea_map);
+	ctx->inode_ea_map = 0;
 	ext2fs_free_inode_bitmap(ctx->inode_bb_map);
 	ctx->inode_bb_map = 0;
 	ext2fs_free_inode_bitmap(ctx->inode_imagic_map);
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index 206a8a0b0639..dcdf9abe1abd 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1145,6 +1145,28 @@ static struct e2fsck_problem problem_table[] = {
 	  N_("Timestamp(s) on @i %i beyond 2310-04-04 are likely pre-1970.\n"),
 	  PROMPT_FIX, PR_PREEN_OK | PR_NO_OK },
 
+	/* Inode has illegal extended attribute value inode */
+	{ PR_1_ATTR_VALUE_EA_INODE,
+	  N_("@i %i has @I @a value @i %N.\n"),
+	  PROMPT_CLEAR, PR_PREEN_OK },
+
+	/* Invalid backpointer from extended attribute inode to parent inode */
+	{ PR_1_ATTR_INVAL_EA_INODE,
+	  N_("@n backpointer from @a @i %N to parent @i %i.\n"),
+	  PROMPT_CLEAR, PR_PREEN_OK },
+
+	/* Inode has invalid extended attribute. EA inode missing
+	 * EA_INODE flag. */
+	{ PR_1_ATTR_NO_EA_INODE_FL,
+	  N_("@i %i has @n @a. EA @i %N missing EA_INODE flag.\n"),
+	  PROMPT_CLEAR, PR_PREEN_OK },
+
+	/* EA inode for parent inode missing EA_INODE flag. */
+	{ PR_1_ATTR_SET_EA_INODE_FL,
+	  N_("EA @i %N for parent @i %i missing EA_INODE flag.\n "),
+	  PROMPT_FIX, PR_PREEN_OK },
+
+
 	/* Pass 1b errors */
 
 	/* Pass 1B: Rescan for duplicate/bad blocks */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index f8650b941b9a..dad608c94fd0 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -675,6 +675,20 @@ struct problem_context {
 /* Timestamp(s) on inode beyond 2310-04-04 are likely pre-1970. */
 #define PR_1_EA_TIME_OUT_OF_RANGE		0x010082
 
+/* Inode has illegal EA value inode */
+#define PR_1_ATTR_VALUE_EA_INODE		0x010083
+
+/* Invalid backpointer from EA inode to parent inode */
+#define PR_1_ATTR_INVAL_EA_INODE		0x010084
+
+/* Parent inode has invalid EA entry. EA inode does not have
+ * EXT4_EA_INODE_FL flag. Delete EA entry? */
+#define PR_1_ATTR_NO_EA_INODE_FL		0x010085
+
+/* EA inode for parent inode does not have EXT4_EA_INODE_FL flag */
+#define PR_1_ATTR_SET_EA_INODE_FL		0x010086
+
+
 /*
  * Pass 1b errors
  */
diff --git a/lib/ext2fs/ext2_ext_attr.h b/lib/ext2fs/ext2_ext_attr.h
index bbb0aaa97bca..f2042ed56cf9 100644
--- a/lib/ext2fs/ext2_ext_attr.h
+++ b/lib/ext2fs/ext2_ext_attr.h
@@ -29,7 +29,7 @@ struct ext2_ext_attr_entry {
 	__u8	e_name_len;	/* length of name */
 	__u8	e_name_index;	/* attribute name index */
 	__u16	e_value_offs;	/* offset in disk block of value */
-	__u32	e_value_block;	/* disk block attribute is stored on (n/i) */
+	__u32	e_value_inum;	/* inode in which the value is stored */
 	__u32	e_value_size;	/* size of attribute value */
 	__u32	e_hash;		/* hash value of name and value */
 #if 0
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 66b7058c3993..3b55000e3a90 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -922,7 +922,8 @@ EXT4_FEATURE_INCOMPAT_FUNCS(encrypt,		4, ENCRYPT)
 #define EXT2_FEATURE_COMPAT_SUPP	0
 #define EXT2_FEATURE_INCOMPAT_SUPP    (EXT2_FEATURE_INCOMPAT_FILETYPE| \
 				       EXT4_FEATURE_INCOMPAT_MMP| \
-				       EXT4_FEATURE_INCOMPAT_LARGEDIR)
+				       EXT4_FEATURE_INCOMPAT_LARGEDIR| \
+				       EXT4_FEATURE_INCOMPAT_EA_INODE)
 #define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index c18ea5f827ad..f4131a640f9c 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -598,6 +598,7 @@ typedef struct ext2_icount *ext2_icount_t;
 					 EXT3_FEATURE_INCOMPAT_RECOVER|\
 					 EXT3_FEATURE_INCOMPAT_EXTENTS|\
 					 EXT4_FEATURE_INCOMPAT_FLEX_BG|\
+					 EXT4_FEATURE_INCOMPAT_EA_INODE|\
 					 EXT4_LIB_INCOMPAT_MMP|\
 					 EXT4_FEATURE_INCOMPAT_64BIT|\
 					 EXT4_FEATURE_INCOMPAT_INLINE_DATA|\
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index 7a9a2d5a39bc..14d05c7e57fb 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -46,7 +46,7 @@ __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry, void *data)
 	}
 
 	/* The hash needs to be calculated on the data in little-endian. */
-	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+	if (entry->e_value_inum == 0 && entry->e_value_size != 0) {
 		__u32 *value = (__u32 *)data;
 		for (n = (entry->e_value_size + EXT2_EXT_ATTR_ROUND) >>
 			 EXT2_EXT_ATTR_PAD_BITS; n; n--) {
@@ -626,7 +626,7 @@ static errcode_t write_xattrs_to_buffer(struct ext2_xattr_handle *handle,
 		e->e_name_index = (ret ? idx : 0);
 		e->e_value_offs = end - value_size - (char *)entries_start +
 				value_offset_correction;
-		e->e_value_block = 0;
+		e->e_value_inum = 0;
 		e->e_value_size = x->value_len;
 
 		/* Store name and value */
@@ -824,38 +824,6 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
 	remain = storage_size;
 	while (remain >= sizeof(struct ext2_ext_attr_entry) &&
 	       !EXT2_EXT_IS_LAST_ENTRY(entry)) {
-		__u32 hash;
-
-		/* header eats this space */
-		remain -= sizeof(struct ext2_ext_attr_entry);
-
-		/* attribute len eats this space */
-		remain -= EXT2_EXT_ATTR_SIZE(entry->e_name_len);
-
-		/* check value size */
-		if (entry->e_value_size > remain)
-			return EXT2_ET_EA_BAD_VALUE_SIZE;
-
-		if (entry->e_value_offs + entry->e_value_size > values_size)
-			return EXT2_ET_EA_BAD_VALUE_OFFSET;
-
-		if (entry->e_value_size > 0 &&
-		    value_start + entry->e_value_offs <
-		    (char *)end + sizeof(__u32))
-			return EXT2_ET_EA_BAD_VALUE_OFFSET;
-
-		/* e_value_block must be 0 in inode's ea */
-		if (entry->e_value_block != 0)
-			return EXT2_ET_BAD_EA_BLOCK_NUM;
-
-		hash = ext2fs_ext_attr_hash_entry(entry, value_start +
-							 entry->e_value_offs);
-
-		/* e_hash may be 0 in older inode's ea */
-		if (entry->e_hash != 0 && entry->e_hash != hash)
-			return EXT2_ET_BAD_EA_HASH;
-
-		remain -= entry->e_value_size;
 
 		/* Allocate space for more attrs? */
 		if (x == handle->attrs + handle->length) {
@@ -865,7 +833,13 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
 			x = handle->attrs + handle->length - 4;
 		}
 
-		/* Extract name/value */
+		/* header eats this space */
+		remain -= sizeof(struct ext2_ext_attr_entry);
+
+		/* attribute len eats this space */
+		remain -= EXT2_EXT_ATTR_SIZE(entry->e_name_len);
+
+		/* Extract name */
 		prefix = find_ea_prefix(entry->e_name_index);
 		prefix_len = (prefix ? strlen(prefix) : 0);
 		err = ext2fs_get_memzero(entry->e_name_len + prefix_len + 1,
@@ -879,12 +853,72 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
 			       (char *)entry + sizeof(*entry),
 			       entry->e_name_len);
 
-		err = ext2fs_get_mem(entry->e_value_size, &x->value);
-		if (err)
-			return err;
+		/* Check & copy value */
+		if (!ext2fs_has_feature_ea_inode(handle->fs->super) &&
+		    entry->e_value_inum != 0)
+			return EXT2_ET_BAD_EA_BLOCK_NUM;
+
+		if (entry->e_value_inum == 0) {
+			if (entry->e_value_size > remain)
+				return EXT2_ET_EA_BAD_VALUE_SIZE;
+
+			if (entry->e_value_offs + entry->e_value_size > values_size)
+				return EXT2_ET_EA_BAD_VALUE_OFFSET;
+
+			if (entry->e_value_size > 0 &&
+			    value_start + entry->e_value_offs <
+			    (char *)end + sizeof(__u32))
+				return EXT2_ET_EA_BAD_VALUE_OFFSET;
+
+			remain -= entry->e_value_size;
+
+			err = ext2fs_get_mem(entry->e_value_size, &x->value);
+			if (err)
+				return err;
+			memcpy(x->value, value_start + entry->e_value_offs,
+			       entry->e_value_size);
+		} else {
+			ext2_file_t ea_file;
+
+			if (entry->e_value_offs != 0)
+				return EXT2_ET_EA_BAD_VALUE_OFFSET;
+
+			if (entry->e_value_size > (64 * 1024))
+				return EXT2_ET_EA_BAD_VALUE_SIZE;
+
+			err = ext2fs_get_mem(entry->e_value_size, &x->value);
+			if (err)
+				return err;
+
+			err = ext2fs_file_open(handle->fs, entry->e_value_inum,
+					       0, &ea_file);
+			if (err)
+				return err;
+
+			if (ext2fs_file_get_size(ea_file) !=
+			    entry->e_value_size)
+				err = EXT2_ET_EA_BAD_VALUE_SIZE;
+			else
+				err = ext2fs_file_read(ea_file, x->value,
+						       entry->e_value_size, 0);
+			ext2fs_file_close(ea_file);
+			if (err)
+				return err;
+		}
+
 		x->value_len = entry->e_value_size;
-		memcpy(x->value, value_start + entry->e_value_offs,
-		       entry->e_value_size);
+
+		/* e_hash may be 0 in older inode's ea */
+		if (entry->e_hash != 0) {
+			__u32 hash;
+			void *data = (entry->e_value_inum != 0) ?
+					0 : value_start + entry->e_value_offs;
+
+			hash = ext2fs_ext_attr_hash_entry(entry, data);
+			if (entry->e_hash != hash)
+				return EXT2_ET_BAD_EA_HASH;
+		}
+
 		x++;
 		(*nr_read)++;
 		entry = EXT2_EXT_ATTR_NEXT(entry);
@@ -1107,7 +1141,7 @@ errcode_t ext2fs_xattr_inode_max_size(ext2_filsys fs, ext2_ino_t ino,
 			inode->i_extra_isize + sizeof(__u32);
 		entry = (struct ext2_ext_attr_entry *) start;
 		while (!EXT2_EXT_IS_LAST_ENTRY(entry)) {
-			if (!entry->e_value_block && entry->e_value_size) {
+			if (!entry->e_value_inum && entry->e_value_size) {
 				unsigned int offs = entry->e_value_offs;
 				if (offs < minoff)
 					minoff = offs;
diff --git a/lib/ext2fs/swapfs.c b/lib/ext2fs/swapfs.c
index 2d05ee7b995b..23a8570c2a3a 100644
--- a/lib/ext2fs/swapfs.c
+++ b/lib/ext2fs/swapfs.c
@@ -170,7 +170,7 @@ void ext2fs_swap_ext_attr_entry(struct ext2_ext_attr_entry *to_entry,
 				struct ext2_ext_attr_entry *from_entry)
 {
 	to_entry->e_value_offs  = ext2fs_swab16(from_entry->e_value_offs);
-	to_entry->e_value_block = ext2fs_swab32(from_entry->e_value_block);
+	to_entry->e_value_inum  = ext2fs_swab32(from_entry->e_value_inum);
 	to_entry->e_value_size  = ext2fs_swab32(from_entry->e_value_size);
 	to_entry->e_hash	= ext2fs_swab32(from_entry->e_hash);
 }
diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 6cbba33b31d8..1ef46f44f35d 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1082,6 +1082,7 @@ static __u32 ok_features[3] = {
 		EXT3_FEATURE_INCOMPAT_JOURNAL_DEV|
 		EXT2_FEATURE_INCOMPAT_META_BG|
 		EXT4_FEATURE_INCOMPAT_FLEX_BG|
+		EXT4_FEATURE_INCOMPAT_EA_INODE|
 		EXT4_FEATURE_INCOMPAT_MMP |
 		EXT4_FEATURE_INCOMPAT_64BIT|
 		EXT4_FEATURE_INCOMPAT_INLINE_DATA|
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index dc5872934427..90746707064b 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -153,6 +153,7 @@ static __u32 ok_features[3] = {
 	EXT2_FEATURE_INCOMPAT_FILETYPE |
 		EXT3_FEATURE_INCOMPAT_EXTENTS |
 		EXT4_FEATURE_INCOMPAT_FLEX_BG |
+		EXT4_FEATURE_INCOMPAT_EA_INODE|
 		EXT4_FEATURE_INCOMPAT_MMP |
 		EXT4_FEATURE_INCOMPAT_64BIT |
 		EXT4_FEATURE_INCOMPAT_ENCRYPT |
@@ -179,6 +180,7 @@ static __u32 clear_ok_features[3] = {
 	/* Incompat */
 	EXT2_FEATURE_INCOMPAT_FILETYPE |
 		EXT4_FEATURE_INCOMPAT_FLEX_BG |
+		EXT4_FEATURE_INCOMPAT_EA_INODE|
 		EXT4_FEATURE_INCOMPAT_MMP |
 		EXT4_FEATURE_INCOMPAT_64BIT |
 		EXT4_FEATURE_INCOMPAT_CSUM_SEED,
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 02/12] tune2fs: do not allow disabling ea_inode feature
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 03/12] e2fsck: ea_inode hash validation Tahsin Erdogan
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

Disabling ea_inode feature would require inlining all the existing
xattr values that are currently stored in external inodes. This is
not always possible. Just disallow it.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 misc/tune2fs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 90746707064b..2a437b9fc7b7 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -180,7 +180,6 @@ static __u32 clear_ok_features[3] = {
 	/* Incompat */
 	EXT2_FEATURE_INCOMPAT_FILETYPE |
 		EXT4_FEATURE_INCOMPAT_FLEX_BG |
-		EXT4_FEATURE_INCOMPAT_EA_INODE|
 		EXT4_FEATURE_INCOMPAT_MMP |
 		EXT4_FEATURE_INCOMPAT_64BIT |
 		EXT4_FEATURE_INCOMPAT_CSUM_SEED,
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 03/12] e2fsck: ea_inode hash validation
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 02/12] tune2fs: do not allow disabling ea_inode feature Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 21:21   ` Andreas Dilger
  2017-06-26 13:43 ` [PATCH 04/12] e2fsck: do not early terminate extra space check Tahsin Erdogan
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

In original implementation of ea_inode feature, each xattr inode had
a single parent. Child inode tracked the parent by storing its inode
number in i_mtime field. Also, child's i_generation matched parent's
i_generation.

With deduplication support, xattr inodes can now be shared so a single
backpointer is not sufficient to achieve strong binding. This is now
replaced by hash validation.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/pass1.c        | 91 +++++++++++++++++++++++++++++++++------------------
 e2fsck/problem.c      |  5 ---
 e2fsck/problem.h      |  7 ++--
 lib/ext2fs/ext2fs.h   |  3 ++
 lib/ext2fs/ext_attr.c | 67 +++++++++++++++++++++++++++++++++++--
 5 files changed, 128 insertions(+), 45 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 1c68af8990b4..32152f3ec926 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -360,27 +360,54 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
 				      struct problem_context *pctx)
 {
 	struct ext2_inode inode;
+	__u32 hash;
+	errcode_t retval;
 
 	/* Check if inode is within valid range */
 	if ((entry->e_value_inum < EXT2_FIRST_INODE(ctx->fs->super)) ||
-	    (entry->e_value_inum > ctx->fs->super->s_inodes_count))
+	    (entry->e_value_inum > ctx->fs->super->s_inodes_count)) {
+		pctx->num = entry->e_value_inum;
 		return PR_1_ATTR_VALUE_EA_INODE;
+	}
 
 	e2fsck_read_inode(ctx, entry->e_value_inum, &inode, "pass1");
+
+	retval = ext2fs_ext_attr_hash_entry2(ctx->fs, entry, NULL, &hash);
+	if (retval) {
+		com_err("check_large_ea_inode", retval,
+			_("while hashing entry with e_value_inum = %u"),
+			entry->e_value_inum);
+		fatal_error(ctx, 0);
+	}
+
+	if (hash != entry->e_hash) {
+		/* This might be an old Lustre-style ea_inode reference. */
+		if (inode.i_mtime != pctx->ino ||
+		    inode.i_generation != pctx->inode->i_generation) {
+
+			/* If target inode is also missing EA_INODE flag,
+			 * this is likely to be a bad reference.
+			 */
+			if (!(inode.i_flags & EXT4_EA_INODE_FL)) {
+				pctx->num = entry->e_value_inum;
+				return PR_1_ATTR_VALUE_EA_INODE;
+			} else {
+				pctx->num = entry->e_hash;
+				return PR_1_ATTR_HASH;
+			}
+		}
+	}
+
 	if (!(inode.i_flags & EXT4_EA_INODE_FL)) {
-		/* If EXT4_EA_INODE_FL flag is not present but back-pointer
-		 * matches then we should set this flag */
-		if (inode.i_mtime == pctx->ino &&
-		    inode.i_generation == pctx->inode->i_generation &&
-		    fix_problem(ctx, PR_1_ATTR_SET_EA_INODE_FL, pctx)) {
+		pctx->num = entry->e_value_inum;
+		if (fix_problem(ctx, PR_1_ATTR_SET_EA_INODE_FL, pctx)) {
 			inode.i_flags |= EXT4_EA_INODE_FL;
-			ext2fs_write_inode(ctx->fs, entry->e_value_inum,&inode);
-		} else
+			ext2fs_write_inode(ctx->fs, entry->e_value_inum,
+					   &inode);
+		} else {
 			return PR_1_ATTR_NO_EA_INODE_FL;
-	} else if (inode.i_mtime != pctx->ino ||
-		   inode.i_generation != pctx->inode->i_generation)
-		return PR_1_ATTR_INVAL_EA_INODE;
-
+		}
+	}
 	return 0;
 }
 
@@ -458,6 +485,16 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 				problem = PR_1_INODE_EA_ALLOC_COLLISION;
 				goto fix;
 			}
+
+			hash = ext2fs_ext_attr_hash_entry(entry,
+							  start + entry->e_value_offs);
+
+			/* e_hash may be 0 in older inode's ea */
+			if (entry->e_hash != 0 && entry->e_hash != hash) {
+				pctx->num = entry->e_hash;
+				problem = PR_1_ATTR_HASH;
+				goto fix;
+			}
 		} else {
 			problem = check_large_ea_inode(ctx, entry, pctx);
 			if (problem != 0)
@@ -466,16 +503,6 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
 		}
 
-		hash = ext2fs_ext_attr_hash_entry(entry,
-						  start + entry->e_value_offs);
-
-		/* e_hash may be 0 in older inode's ea */
-		if (entry->e_hash != 0 && entry->e_hash != hash) {
-			pctx->num = entry->e_hash;
-			problem = PR_1_ATTR_HASH;
-			goto fix;
-		}
-
 		/* If EA value is stored in external inode then it does not
 		 * consume space here */
 		if (entry->e_value_inum == 0)
@@ -2439,6 +2466,16 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 						pctx))
 					goto clear_extattr;
 			}
+
+			hash = ext2fs_ext_attr_hash_entry(entry, block_buf +
+							  entry->e_value_offs);
+
+			if (entry->e_hash != hash) {
+				pctx->num = entry->e_hash;
+				if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
+					goto clear_extattr;
+				entry->e_hash = hash;
+			}
 		} else {
 			problem_t problem;
 
@@ -2450,16 +2487,6 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 				goto clear_extattr;
 		}
 
-		hash = ext2fs_ext_attr_hash_entry(entry, block_buf +
-							 entry->e_value_offs);
-
-		if (entry->e_hash != hash) {
-			pctx->num = entry->e_hash;
-			if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
-				goto clear_extattr;
-			entry->e_hash = hash;
-		}
-
 		entry = EXT2_EXT_ATTR_NEXT(entry);
 	}
 	if (region_allocate(region, (char *)entry - (char *)header, 4)) {
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index dcdf9abe1abd..deffa4d66a89 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1150,11 +1150,6 @@ static struct e2fsck_problem problem_table[] = {
 	  N_("@i %i has @I @a value @i %N.\n"),
 	  PROMPT_CLEAR, PR_PREEN_OK },
 
-	/* Invalid backpointer from extended attribute inode to parent inode */
-	{ PR_1_ATTR_INVAL_EA_INODE,
-	  N_("@n backpointer from @a @i %N to parent @i %i.\n"),
-	  PROMPT_CLEAR, PR_PREEN_OK },
-
 	/* Inode has invalid extended attribute. EA inode missing
 	 * EA_INODE flag. */
 	{ PR_1_ATTR_NO_EA_INODE_FL,
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index dad608c94fd0..8e07c10895c1 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -678,15 +678,12 @@ struct problem_context {
 /* Inode has illegal EA value inode */
 #define PR_1_ATTR_VALUE_EA_INODE		0x010083
 
-/* Invalid backpointer from EA inode to parent inode */
-#define PR_1_ATTR_INVAL_EA_INODE		0x010084
-
 /* Parent inode has invalid EA entry. EA inode does not have
  * EXT4_EA_INODE_FL flag. Delete EA entry? */
-#define PR_1_ATTR_NO_EA_INODE_FL		0x010085
+#define PR_1_ATTR_NO_EA_INODE_FL		0x010084
 
 /* EA inode for parent inode does not have EXT4_EA_INODE_FL flag */
-#define PR_1_ATTR_SET_EA_INODE_FL		0x010086
+#define PR_1_ATTR_SET_EA_INODE_FL		0x010085
 
 
 /*
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index f4131a640f9c..f72cbe17a8cd 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1198,6 +1198,9 @@ extern errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir);
 /* ext_attr.c */
 extern __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry,
 					void *data);
+extern errcode_t ext2fs_ext_attr_hash_entry2(ext2_filsys fs,
+					     struct ext2_ext_attr_entry *entry,
+					     void *data, __u32 *hash);
 extern errcode_t ext2fs_read_ext_attr(ext2_filsys fs, blk_t block, void *buf);
 extern errcode_t ext2fs_read_ext_attr2(ext2_filsys fs, blk64_t block,
 				       void *buf);
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index 14d05c7e57fb..d48ab5c55270 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -25,6 +25,18 @@
 
 #include "ext2fs.h"
 
+static errcode_t read_ea_inode_hash(ext2_filsys fs, ext2_ino_t ino, __u32 *hash)
+{
+	struct ext2_inode inode;
+	errcode_t retval;
+
+	retval = ext2fs_read_inode(fs, ino, &inode);
+	if (retval)
+		return retval;
+	*hash = inode.i_atime;
+	return 0;
+}
+
 #define NAME_HASH_SHIFT 5
 #define VALUE_HASH_SHIFT 16
 
@@ -59,6 +71,35 @@ __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry, void *data)
 	return hash;
 }
 
+/*
+ * ext2fs_ext_attr_hash_entry2()
+ *
+ * Compute the hash of an extended attribute.
+ * This version of the function supports hashing entries that reference
+ * external inodes (ea_inode feature).
+ */
+errcode_t ext2fs_ext_attr_hash_entry2(ext2_filsys fs,
+				      struct ext2_ext_attr_entry *entry,
+				      void *data, __u32 *hash)
+{
+	*hash = ext2fs_ext_attr_hash_entry(entry, data);
+
+	if (entry->e_value_inum) {
+		__u32 ea_inode_hash;
+		errcode_t retval;
+
+		retval = read_ea_inode_hash(fs, entry->e_value_inum,
+					    &ea_inode_hash);
+		if (retval)
+			return retval;
+
+		*hash = (*hash << VALUE_HASH_SHIFT) ^
+			(*hash >> (8*sizeof(*hash) - VALUE_HASH_SHIFT)) ^
+			ea_inode_hash;
+	}
+	return 0;
+}
+
 static errcode_t check_ext_attr_header(struct ext2_ext_attr_header *header)
 {
 	if ((header->h_magic != EXT2_EXT_ATTR_MAGIC_v1 &&
@@ -914,9 +955,29 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
 			void *data = (entry->e_value_inum != 0) ?
 					0 : value_start + entry->e_value_offs;
 
-			hash = ext2fs_ext_attr_hash_entry(entry, data);
-			if (entry->e_hash != hash)
-				return EXT2_ET_BAD_EA_HASH;
+			err = ext2fs_ext_attr_hash_entry2(handle->fs, entry,
+							  data, &hash);
+			if (err)
+				return err;
+			if (entry->e_hash != hash) {
+				struct ext2_inode parent, child;
+
+				/* Check whether this is an old Lustre-style
+				 * ea_inode reference.
+				 */
+				err = ext2fs_read_inode(handle->fs, handle->ino,
+							&parent);
+				if (err)
+					return err;
+				err = ext2fs_read_inode(handle->fs,
+							entry->e_value_inum,
+							&child);
+				if (err)
+					return err;
+				if (child.i_mtime != handle->ino ||
+				    child.i_generation != parent.i_generation)
+					return EXT2_ET_BAD_EA_HASH;
+			}
 		}
 
 		x++;
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 04/12] e2fsck: do not early terminate extra space check
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 02/12] tune2fs: do not allow disabling ea_inode feature Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 03/12] e2fsck: ea_inode hash validation Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 21:23   ` Andreas Dilger
  2017-06-26 13:43 ` [PATCH 05/12] e2fsck: generalize ea_refcount Tahsin Erdogan
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

When check_inode_extra_space() detects a problem with the value of
i_extra_isize, it adjusts it and then returns without further validation
of contents in the inode body. Change this so that it will proceed to
check inline extended attributes.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/pass1.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 32152f3ec926..1532fd2067f2 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -582,7 +582,6 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
 			inode->i_extra_isize = (inode->i_extra_isize + 3) & ~3;
 		e2fsck_write_inode_full(ctx, pctx->ino, pctx->inode,
 					EXT2_INODE_SIZE(sb), "pass1");
-		return;
 	}
 
 	/* check if there is no place for an EA header */
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 05/12] e2fsck: generalize ea_refcount
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (2 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 04/12] e2fsck: do not early terminate extra space check Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature Tahsin Erdogan
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

Currently ea_refcount is only used to track ea block refcounts. By
generalizing it, we could use it for ea quota tracking and also
ea_inode refcounts.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/e2fsck.h      |  21 ++++---
 e2fsck/ea_refcount.c | 160 +++++++++++++++++++++++++++------------------------
 e2fsck/pass1.c       |   4 +-
 3 files changed, 99 insertions(+), 86 deletions(-)

diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index c19cdfdd733e..f9285d01978c 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -466,18 +466,23 @@ extern int e2fsck_get_num_dx_dirinfo(e2fsck_t ctx);
 extern struct dx_dir_info *e2fsck_dx_dir_info_iter(e2fsck_t ctx, int *control);
 
 /* ea_refcount.c */
-extern errcode_t ea_refcount_create(int size, ext2_refcount_t *ret);
+typedef __u64 ea_key_t;
+typedef __u64 ea_value_t;
+
+extern errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret);
 extern void ea_refcount_free(ext2_refcount_t refcount);
-extern errcode_t ea_refcount_fetch(ext2_refcount_t refcount, blk64_t blk, int *ret);
+extern errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
+				   ea_value_t *ret);
 extern errcode_t ea_refcount_increment(ext2_refcount_t refcount,
-				       blk64_t blk, int *ret);
+				       ea_key_t ea_key, ea_value_t *ret);
 extern errcode_t ea_refcount_decrement(ext2_refcount_t refcount,
-				       blk64_t blk, int *ret);
-extern errcode_t ea_refcount_store(ext2_refcount_t refcount,
-				   blk64_t blk, int count);
-extern blk_t ext2fs_get_refcount_size(ext2_refcount_t refcount);
+				       ea_key_t ea_key, ea_value_t *ret);
+extern errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
+				   ea_value_t count);
+extern size_t ext2fs_get_refcount_size(ext2_refcount_t refcount);
 extern void ea_refcount_intr_begin(ext2_refcount_t refcount);
-extern blk64_t ea_refcount_intr_next(ext2_refcount_t refcount, int *ret);
+extern ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
+				      ea_value_t *ret);
 
 /* ehandler.c */
 extern const char *ehandler_operation(const char *op);
diff --git a/e2fsck/ea_refcount.c b/e2fsck/ea_refcount.c
index fcfaf4970ece..ecb198640c69 100644
--- a/e2fsck/ea_refcount.c
+++ b/e2fsck/ea_refcount.c
@@ -25,14 +25,15 @@
  * checked, its bit is set in the block_ea_map bitmap.
  */
 struct ea_refcount_el {
-	blk64_t	ea_blk;
-	int	ea_count;
+	/* ea_key could either be an inode number or block number. */
+	ea_key_t	ea_key;
+	ea_value_t	ea_value;
 };
 
 struct ea_refcount {
-	blk_t		count;
-	blk_t		size;
-	blk_t		cursor;
+	size_t		count;
+	size_t		size;
+	size_t		cursor;
 	struct ea_refcount_el	*list;
 };
 
@@ -46,7 +47,7 @@ void ea_refcount_free(ext2_refcount_t refcount)
 	ext2fs_free_mem(&refcount);
 }
 
-errcode_t ea_refcount_create(int size, ext2_refcount_t *ret)
+errcode_t ea_refcount_create(size_t size, ext2_refcount_t *ret)
 {
 	ext2_refcount_t	refcount;
 	errcode_t	retval;
@@ -60,9 +61,9 @@ errcode_t ea_refcount_create(int size, ext2_refcount_t *ret)
 	if (!size)
 		size = 500;
 	refcount->size = size;
-	bytes = (size_t) (size * sizeof(struct ea_refcount_el));
+	bytes = size * sizeof(struct ea_refcount_el);
 #ifdef DEBUG
-	printf("Refcount allocated %d entries, %d bytes.\n",
+	printf("Refcount allocated %zu entries, %zu bytes.\n",
 	       refcount->size, bytes);
 #endif
 	retval = ext2fs_get_mem(bytes, &refcount->list);
@@ -92,14 +93,14 @@ static void refcount_collapse(ext2_refcount_t refcount)
 
 	list = refcount->list;
 	for (i = 0, j = 0; i < refcount->count; i++) {
-		if (list[i].ea_count) {
+		if (list[i].ea_value) {
 			if (i != j)
 				list[j] = list[i];
 			j++;
 		}
 	}
 #if defined(DEBUG) || defined(TEST_PROGRAM)
-	printf("Refcount_collapse: size was %d, now %d\n",
+	printf("Refcount_collapse: size was %zu, now %d\n",
 	       refcount->count, j);
 #endif
 	refcount->count = j;
@@ -111,11 +112,11 @@ static void refcount_collapse(ext2_refcount_t refcount)
  * 	specified position.
  */
 static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
-						 blk64_t blk, int pos)
+						 ea_key_t ea_key, int pos)
 {
 	struct ea_refcount_el 	*el;
 	errcode_t		retval;
-	blk_t			new_size = 0;
+	size_t			new_size = 0;
 	int			num;
 
 	if (refcount->count >= refcount->size) {
@@ -141,8 +142,8 @@ static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
 	}
 	refcount->count++;
 	el = &refcount->list[pos];
-	el->ea_count = 0;
-	el->ea_blk = blk;
+	el->ea_key = ea_key;
+	el->ea_value = 0;
 	return el;
 }
 
@@ -153,7 +154,7 @@ static struct ea_refcount_el *insert_refcount_el(ext2_refcount_t refcount,
  * 	and we can't find an entry, create one in the sorted list.
  */
 static struct ea_refcount_el *get_refcount_el(ext2_refcount_t refcount,
-					      blk64_t blk, int create)
+					      ea_key_t ea_key, int create)
 {
 	int	low, high, mid;
 
@@ -163,11 +164,11 @@ retry:
 	low = 0;
 	high = (int) refcount->count-1;
 	if (create && ((refcount->count == 0) ||
-		       (blk > refcount->list[high].ea_blk))) {
+		       (ea_key > refcount->list[high].ea_key))) {
 		if (refcount->count >= refcount->size)
 			refcount_collapse(refcount);
 
-		return insert_refcount_el(refcount, blk,
+		return insert_refcount_el(refcount, ea_key,
 					  (unsigned) refcount->count);
 	}
 	if (refcount->count == 0)
@@ -175,18 +176,18 @@ retry:
 
 	if (refcount->cursor >= refcount->count)
 		refcount->cursor = 0;
-	if (blk == refcount->list[refcount->cursor].ea_blk)
+	if (ea_key == refcount->list[refcount->cursor].ea_key)
 		return &refcount->list[refcount->cursor++];
 #ifdef DEBUG
-	printf("Non-cursor get_refcount_el: %u\n", blk);
+	printf("Non-cursor get_refcount_el: %u\n", ea_key);
 #endif
 	while (low <= high) {
 		mid = (low+high)/2;
-		if (blk == refcount->list[mid].ea_blk) {
+		if (ea_key == refcount->list[mid].ea_key) {
 			refcount->cursor = mid+1;
 			return &refcount->list[mid];
 		}
-		if (blk < refcount->list[mid].ea_blk)
+		if (ea_key < refcount->list[mid].ea_key)
 			high = mid-1;
 		else
 			low = mid+1;
@@ -201,69 +202,72 @@ retry:
 			if (refcount->count < refcount->size)
 				goto retry;
 		}
-		return insert_refcount_el(refcount, blk, low);
+		return insert_refcount_el(refcount, ea_key, low);
 	}
 	return 0;
 }
 
-errcode_t ea_refcount_fetch(ext2_refcount_t refcount, blk64_t blk,
-				int *ret)
+errcode_t ea_refcount_fetch(ext2_refcount_t refcount, ea_key_t ea_key,
+			    ea_value_t *ret)
 {
 	struct ea_refcount_el	*el;
 
-	el = get_refcount_el(refcount, blk, 0);
+	el = get_refcount_el(refcount, ea_key, 0);
 	if (!el) {
 		*ret = 0;
 		return 0;
 	}
-	*ret = el->ea_count;
+	*ret = el->ea_value;
 	return 0;
 }
 
-errcode_t ea_refcount_increment(ext2_refcount_t refcount, blk64_t blk, int *ret)
+errcode_t ea_refcount_increment(ext2_refcount_t refcount, ea_key_t ea_key,
+				ea_value_t *ret)
 {
 	struct ea_refcount_el	*el;
 
-	el = get_refcount_el(refcount, blk, 1);
+	el = get_refcount_el(refcount, ea_key, 1);
 	if (!el)
 		return EXT2_ET_NO_MEMORY;
-	el->ea_count++;
+	el->ea_value++;
 
 	if (ret)
-		*ret = el->ea_count;
+		*ret = el->ea_value;
 	return 0;
 }
 
-errcode_t ea_refcount_decrement(ext2_refcount_t refcount, blk64_t blk, int *ret)
+errcode_t ea_refcount_decrement(ext2_refcount_t refcount, ea_key_t ea_key,
+				ea_value_t *ret)
 {
 	struct ea_refcount_el	*el;
 
-	el = get_refcount_el(refcount, blk, 0);
-	if (!el || el->ea_count == 0)
+	el = get_refcount_el(refcount, ea_key, 0);
+	if (!el || el->ea_value == 0)
 		return EXT2_ET_INVALID_ARGUMENT;
 
-	el->ea_count--;
+	el->ea_value--;
 
 	if (ret)
-		*ret = el->ea_count;
+		*ret = el->ea_value;
 	return 0;
 }
 
-errcode_t ea_refcount_store(ext2_refcount_t refcount, blk64_t blk, int count)
+errcode_t ea_refcount_store(ext2_refcount_t refcount, ea_key_t ea_key,
+			    ea_value_t ea_value)
 {
 	struct ea_refcount_el	*el;
 
 	/*
 	 * Get the refcount element
 	 */
-	el = get_refcount_el(refcount, blk, count ? 1 : 0);
+	el = get_refcount_el(refcount, ea_key, ea_value ? 1 : 0);
 	if (!el)
-		return count ? EXT2_ET_NO_MEMORY : 0;
-	el->ea_count = count;
+		return ea_value ? EXT2_ET_NO_MEMORY : 0;
+	el->ea_value = ea_value;
 	return 0;
 }
 
-blk_t ext2fs_get_refcount_size(ext2_refcount_t refcount)
+size_t ext2fs_get_refcount_size(ext2_refcount_t refcount)
 {
 	if (!refcount)
 		return 0;
@@ -276,9 +280,8 @@ void ea_refcount_intr_begin(ext2_refcount_t refcount)
 	refcount->cursor = 0;
 }
 
-
-blk64_t ea_refcount_intr_next(ext2_refcount_t refcount,
-				int *ret)
+ea_key_t ea_refcount_intr_next(ext2_refcount_t refcount,
+				ea_value_t *ret)
 {
 	struct ea_refcount_el	*list;
 
@@ -286,10 +289,10 @@ blk64_t ea_refcount_intr_next(ext2_refcount_t refcount,
 		if (refcount->cursor >= refcount->count)
 			return 0;
 		list = refcount->list;
-		if (list[refcount->cursor].ea_count) {
+		if (list[refcount->cursor].ea_value) {
 			if (ret)
-				*ret = list[refcount->cursor].ea_count;
-			return list[refcount->cursor++].ea_blk;
+				*ret = list[refcount->cursor].ea_value;
+			return list[refcount->cursor++].ea_key;
 		}
 		refcount->cursor++;
 	}
@@ -309,11 +312,11 @@ errcode_t ea_refcount_validate(ext2_refcount_t refcount, FILE *out)
 		return EXT2_ET_INVALID_ARGUMENT;
 	}
 	for (i=1; i < refcount->count; i++) {
-		if (refcount->list[i-1].ea_blk >= refcount->list[i].ea_blk) {
+		if (refcount->list[i-1].ea_key >= refcount->list[i].ea_key) {
 			fprintf(out,
-				"%s: list[%d].blk=%llu, list[%d].blk=%llu\n",
-				bad, i-1, refcount->list[i-1].ea_blk,
-				i, refcount->list[i].ea_blk);
+				"%s: list[%d].ea_key=%llu, list[%d].ea_key=%llu\n",
+				bad, i-1, refcount->list[i-1].ea_key, i,
+				refcount->list[i].ea_key);
 			ret = EXT2_ET_INVALID_ARGUMENT;
 		}
 	}
@@ -370,8 +373,9 @@ int main(int argc, char **argv)
 {
 	int	i = 0;
 	ext2_refcount_t refcount;
-	int		size, arg;
-	blk64_t		blk;
+	size_t		size;
+	ea_key_t	ea_key;
+	ea_value_t	arg;
 	errcode_t	retval;
 
 	while (1) {
@@ -383,10 +387,10 @@ int main(int argc, char **argv)
 			retval = ea_refcount_create(size, &refcount);
 			if (retval) {
 				com_err("ea_refcount_create", retval,
-					"while creating size %d", size);
+					"while creating size %zu", size);
 				exit(1);
 			} else
-				printf("Creating refcount with size %d\n",
+				printf("Creating refcount with size %zu\n",
 				       size);
 			break;
 		case BCODE_FREE:
@@ -395,43 +399,46 @@ int main(int argc, char **argv)
 			printf("Freeing refcount\n");
 			break;
 		case BCODE_STORE:
-			blk = (blk_t) bcode_program[i++];
+			ea_key = (size_t) bcode_program[i++];
 			arg = bcode_program[i++];
-			printf("Storing blk %llu with value %d\n", blk, arg);
-			retval = ea_refcount_store(refcount, blk, arg);
+			printf("Storing ea_key %llu with value %llu\n", ea_key,
+			       arg);
+			retval = ea_refcount_store(refcount, ea_key, arg);
 			if (retval)
 				com_err("ea_refcount_store", retval,
-					"while storing blk %llu", blk);
+					"while storing ea_key %llu", ea_key);
 			break;
 		case BCODE_FETCH:
-			blk = (blk_t) bcode_program[i++];
-			retval = ea_refcount_fetch(refcount, blk, &arg);
+			ea_key = (size_t) bcode_program[i++];
+			retval = ea_refcount_fetch(refcount, ea_key, &arg);
 			if (retval)
 				com_err("ea_refcount_fetch", retval,
-					"while fetching blk %llu", blk);
+					"while fetching ea_key %llu", ea_key);
 			else
-				printf("bcode_fetch(%llu) returns %d\n",
-				       blk, arg);
+				printf("bcode_fetch(%llu) returns %llu\n",
+				       ea_key, arg);
 			break;
 		case BCODE_INCR:
-			blk = (blk_t) bcode_program[i++];
-			retval = ea_refcount_increment(refcount, blk, &arg);
+			ea_key = (size_t) bcode_program[i++];
+			retval = ea_refcount_increment(refcount, ea_key, &arg);
 			if (retval)
 				com_err("ea_refcount_increment", retval,
-					"while incrementing blk %llu", blk);
+					"while incrementing ea_key %llu",
+					ea_key);
 			else
-				printf("bcode_increment(%llu) returns %d\n",
-				       blk, arg);
+				printf("bcode_increment(%llu) returns %llu\n",
+				       ea_key, arg);
 			break;
 		case BCODE_DECR:
-			blk = (blk_t) bcode_program[i++];
-			retval = ea_refcount_decrement(refcount, blk, &arg);
+			ea_key = (size_t) bcode_program[i++];
+			retval = ea_refcount_decrement(refcount, ea_key, &arg);
 			if (retval)
 				com_err("ea_refcount_decrement", retval,
-					"while decrementing blk %llu", blk);
+					"while decrementing ea_key %llu",
+					ea_key);
 			else
-				printf("bcode_decrement(%llu) returns %d\n",
-				       blk, arg);
+				printf("bcode_decrement(%llu) returns %llu\n",
+				       ea_key, arg);
 			break;
 		case BCODE_VALIDATE:
 			retval = ea_refcount_validate(refcount, stderr);
@@ -444,10 +451,11 @@ int main(int argc, char **argv)
 		case BCODE_LIST:
 			ea_refcount_intr_begin(refcount);
 			while (1) {
-				blk = ea_refcount_intr_next(refcount, &arg);
-				if (!blk)
+				ea_key = ea_refcount_intr_next(refcount, &arg);
+				if (!ea_key)
 					break;
-				printf("\tblk=%llu, count=%d\n", blk, arg);
+				printf("\tea_key=%llu, count=%llu\n", ea_key,
+				       arg);
 			}
 			break;
 		case BCODE_COLLAPSE:
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 1532fd2067f2..9ee2c5e89a61 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -2269,7 +2269,7 @@ static void adjust_extattr_refcount(e2fsck_t ctx, ext2_refcount_t refcount,
 	ext2_filsys			fs = ctx->fs;
 	blk64_t				blk;
 	__u32				should_be;
-	int				count;
+	ea_value_t			count;
 
 	clear_problem_context(&pctx);
 
@@ -2286,7 +2286,7 @@ static void adjust_extattr_refcount(e2fsck_t ctx, ext2_refcount_t refcount,
 		}
 		header = (struct ext2_ext_attr_header *) block_buf;
 		pctx.blkcount = header->h_refcount;
-		should_be = header->h_refcount + adjust_sign * count;
+		should_be = header->h_refcount + adjust_sign * (int)count;
 		pctx.num = should_be;
 		if (fix_problem(ctx, PR_1_EXTATTR_REFCOUNT, &pctx)) {
 			header->h_refcount = should_be;
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (3 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 05/12] e2fsck: generalize ea_refcount Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 21:36   ` Andreas Dilger
  2017-06-26 13:43 ` [PATCH 07/12] e2fsck: track ea_inode references Tahsin Erdogan
                   ` (6 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

With ea_inode feature, i_blocks include the disk space used by
referenced xattr inodes. Make e2fsck aware of that.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/e2fsck.c |   4 ++
 e2fsck/e2fsck.h |   5 +++
 e2fsck/pass1.c  | 124 +++++++++++++++++++++++++++++++++++++++++---------------
 3 files changed, 100 insertions(+), 33 deletions(-)

diff --git a/e2fsck/e2fsck.c b/e2fsck/e2fsck.c
index 0f9da46a5a12..e0f449ee2047 100644
--- a/e2fsck/e2fsck.c
+++ b/e2fsck/e2fsck.c
@@ -98,6 +98,10 @@ errcode_t e2fsck_reset_context(e2fsck_t ctx)
 		ea_refcount_free(ctx->refcount_extra);
 		ctx->refcount_extra = 0;
 	}
+	if (ctx->ea_block_quota) {
+		ea_refcount_free(ctx->ea_block_quota);
+		ctx->ea_block_quota = 0;
+	}
 	if (ctx->block_dup_map) {
 		ext2fs_free_block_bitmap(ctx->block_dup_map);
 		ctx->block_dup_map = 0;
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index f9285d01978c..79eeda9fbafb 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -269,6 +269,11 @@ struct e2fsck_struct {
 	ext2_refcount_t	refcount;
 	ext2_refcount_t refcount_extra;
 
+	/*
+	 * Quota blocks to be charged for each ea block.
+	 */
+	ext2_refcount_t ea_block_quota;
+
 	/*
 	 * Array of flags indicating whether an inode bitmap, block
 	 * bitmap, or inode table is invalid
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 9ee2c5e89a61..cc88a7d05cef 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -66,7 +66,7 @@ static int process_bad_block(ext2_filsys fs, blk64_t *block_nr,
 			     e2_blkcnt_t blockcnt, blk64_t ref_blk,
 			     int ref_offset, void *priv_data);
 static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
-			 char *block_buf);
+			 char *block_buf, blk64_t ea_ibody_quota_blocks);
 static void mark_table_blocks(e2fsck_t ctx);
 static void alloc_bb_map(e2fsck_t ctx);
 static void alloc_imagic_map(e2fsck_t ctx);
@@ -103,6 +103,7 @@ struct process_block_struct {
 
 struct process_inode_block {
 	ext2_ino_t ino;
+	blk64_t ea_ibody_quota_blocks;
 	struct ext2_inode_large inode;
 };
 
@@ -351,13 +352,25 @@ static void mark_inode_ea_map(e2fsck_t ctx, struct problem_context *pctx,
 	ext2fs_mark_inode_bitmap2(ctx->inode_ea_map, ino);
 }
 
+/*
+ * For a given size, calculate how many blocks would be charged towards quota.
+ */
+static blk64_t size_to_quota_blocks(ext2_filsys fs, size_t size)
+{
+	blk64_t clusters;
+
+	clusters = DIV_ROUND_UP(size, fs->blocksize << fs->cluster_ratio_bits);
+	return EXT2FS_C2B(fs, clusters);
+}
+
 /*
  * Check validity of EA inode. Return 0 if EA inode is valid, otherwise return
  * the problem code.
  */
 static problem_t check_large_ea_inode(e2fsck_t ctx,
 				      struct ext2_ext_attr_entry *entry,
-				      struct problem_context *pctx)
+				      struct problem_context *pctx,
+				      blk64_t *quota_blocks)
 {
 	struct ext2_inode inode;
 	__u32 hash;
@@ -380,11 +393,15 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
 		fatal_error(ctx, 0);
 	}
 
-	if (hash != entry->e_hash) {
+	if (hash == entry->e_hash) {
+		*quota_blocks = size_to_quota_blocks(ctx->fs,
+						     entry->e_value_size);
+	} else {
 		/* This might be an old Lustre-style ea_inode reference. */
-		if (inode.i_mtime != pctx->ino ||
-		    inode.i_generation != pctx->inode->i_generation) {
-
+		if (inode.i_mtime == pctx->ino &&
+		    inode.i_generation == pctx->inode->i_generation) {
+			*quota_blocks = 0;
+		} else {
 			/* If target inode is also missing EA_INODE flag,
 			 * this is likely to be a bad reference.
 			 */
@@ -411,7 +428,8 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
 	return 0;
 }
 
-static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
+static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx,
+			      blk64_t *ea_ibody_quota_blocks)
 {
 	struct ext2_super_block *sb = ctx->fs->super;
 	struct ext2_inode_large *inode;
@@ -420,6 +438,9 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 	unsigned int storage_size, remain;
 	problem_t problem = 0;
 	region_t region = 0;
+	blk64_t quota_blocks = 0;
+
+	*ea_ibody_quota_blocks = 0;
 
 	inode = (struct ext2_inode_large *) pctx->inode;
 	storage_size = EXT2_INODE_SIZE(ctx->fs->super) - EXT2_GOOD_OLD_INODE_SIZE -
@@ -496,11 +517,15 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 				goto fix;
 			}
 		} else {
-			problem = check_large_ea_inode(ctx, entry, pctx);
+			blk64_t entry_quota_blocks;
+
+			problem = check_large_ea_inode(ctx, entry, pctx,
+						       &entry_quota_blocks);
 			if (problem != 0)
 				goto fix;
 
 			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
+			quota_blocks += entry_quota_blocks;
 		}
 
 		/* If EA value is stored in external inode then it does not
@@ -523,8 +548,10 @@ fix:
 	 * it seems like a corruption. it's very unlikely we could repair
 	 * EA(s) in automatic fashion -bzzz
 	 */
-	if (problem == 0 || !fix_problem(ctx, problem, pctx))
+	if (problem == 0 || !fix_problem(ctx, problem, pctx)) {
+		*ea_ibody_quota_blocks = quota_blocks;
 		return;
+	}
 
 	/* simply remove all possible EA(s) */
 	*((__u32 *)header) = 0UL;
@@ -547,13 +574,16 @@ static int check_inode_extra_negative_epoch(__u32 xtime, __u32 extra) {
  */
 #define EXT4_EXTRA_NEGATIVE_DATE_CUTOFF 2 * (1LL << 32)
 
-static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
+static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx,
+				    blk64_t *ea_ibody_quota_blocks)
 {
 	struct ext2_super_block *sb = ctx->fs->super;
 	struct ext2_inode_large *inode;
 	__u32 *eamagic;
 	int min, max;
 
+	*ea_ibody_quota_blocks = 0;
+
 	inode = (struct ext2_inode_large *) pctx->inode;
 	if (EXT2_INODE_SIZE(sb) == EXT2_GOOD_OLD_INODE_SIZE) {
 		/* this isn't large inode. so, nothing to check */
@@ -592,7 +622,7 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
 			inode->i_extra_isize);
 	if (*eamagic == EXT2_EXT_ATTR_MAGIC) {
 		/* it seems inode has an extended attribute(s) in body */
-		check_ea_in_inode(ctx, pctx);
+		check_ea_in_inode(ctx, pctx, ea_ibody_quota_blocks);
 	}
 
 	/*
@@ -1150,6 +1180,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 	int		failed_csum = 0;
 	ext2_ino_t	ino_threshold = 0;
 	dgrp_t		ra_group = 0;
+	blk64_t		ea_ibody_quota_blocks;
 
 	init_resource_track(&rtrack, ctx->fs->io);
 	clear_problem_context(&pctx);
@@ -1695,7 +1726,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 							   "pass1");
 					failed_csum = 0;
 				}
-				check_blocks(ctx, &pctx, block_buf);
+				check_blocks(ctx, &pctx, block_buf, 0);
 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
 				continue;
 			}
@@ -1722,7 +1753,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 							"pass1");
 					failed_csum = 0;
 				}
-				check_blocks(ctx, &pctx, block_buf);
+				check_blocks(ctx, &pctx, block_buf, 0);
 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
 				continue;
 			}
@@ -1760,7 +1791,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 					failed_csum = 0;
 				}
 			}
-			check_blocks(ctx, &pctx, block_buf);
+			check_blocks(ctx, &pctx, block_buf, 0);
 			FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
 			continue;
 		}
@@ -1825,7 +1856,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 			}
 		}
 
-		check_inode_extra_space(ctx, &pctx);
+		check_inode_extra_space(ctx, &pctx, &ea_ibody_quota_blocks);
 		check_is_really_dir(ctx, &pctx, block_buf);
 
 		/*
@@ -1872,7 +1903,8 @@ void e2fsck_pass1(e2fsck_t ctx)
 				continue;
 			} else if (ext2fs_inode_data_blocks(fs, inode) == 0) {
 				ctx->fs_fast_symlinks_count++;
-				check_blocks(ctx, &pctx, block_buf);
+				check_blocks(ctx, &pctx, block_buf,
+					     ea_ibody_quota_blocks);
 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
 				continue;
 			}
@@ -1906,17 +1938,19 @@ void e2fsck_pass1(e2fsck_t ctx)
 		     inode->i_block[EXT2_DIND_BLOCK] ||
 		     inode->i_block[EXT2_TIND_BLOCK] ||
 		     ext2fs_file_acl_block(fs, inode))) {
-			struct ext2_inode_large *ip;
+			struct process_inode_block *itp;
 
-			inodes_to_process[process_inode_count].ino = ino;
-			ip = &inodes_to_process[process_inode_count].inode;
+			itp = &inodes_to_process[process_inode_count];
+			itp->ino = ino;
+			itp->ea_ibody_quota_blocks = ea_ibody_quota_blocks;
 			if (inode_size < sizeof(struct ext2_inode_large))
-				memcpy(ip, inode, inode_size);
+				memcpy(&itp->inode, inode, inode_size);
 			else
-				memcpy(ip, inode, sizeof(*ip));
+				memcpy(&itp->inode, inode, sizeof(itp->inode));
 			process_inode_count++;
 		} else
-			check_blocks(ctx, &pctx, block_buf);
+			check_blocks(ctx, &pctx, block_buf,
+				     ea_ibody_quota_blocks);
 
 		FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
 
@@ -2086,7 +2120,8 @@ static void process_inodes(e2fsck_t ctx, char *block_buf)
 		sprintf(buf, _("reading indirect blocks of inode %u"),
 			pctx.ino);
 		ehandler_operation(buf);
-		check_blocks(ctx, &pctx, block_buf);
+		check_blocks(ctx, &pctx, block_buf,
+			     inodes_to_process[i].ea_ibody_quota_blocks);
 		if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
 			break;
 	}
@@ -2306,7 +2341,7 @@ static void adjust_extattr_refcount(e2fsck_t ctx, ext2_refcount_t refcount,
  * Handle processing the extended attribute blocks
  */
 static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
-			   char *block_buf)
+			   char *block_buf, blk64_t *ea_block_quota_blocks)
 {
 	ext2_filsys fs = ctx->fs;
 	ext2_ino_t	ino = pctx->ino;
@@ -2315,7 +2350,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 	char *		end;
 	struct ext2_ext_attr_header *header;
 	struct ext2_ext_attr_entry *entry;
-	int		count;
+	blk64_t		quota_blocks = EXT2FS_C2B(fs, 1);
 	region_t	region = 0;
 	int		failed_csum = 0;
 
@@ -2369,6 +2404,12 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 
 	/* Have we seen this EA block before? */
 	if (ext2fs_fast_test_block_bitmap2(ctx->block_ea_map, blk)) {
+		if (ctx->ea_block_quota)
+			ea_refcount_fetch(ctx->ea_block_quota, blk,
+					  ea_block_quota_blocks);
+		else
+			*ea_block_quota_blocks = 0;
+
 		if (ea_refcount_decrement(ctx->refcount, blk, 0) == 0)
 			return 1;
 		/* Ooops, this EA was referenced more than it stated */
@@ -2477,13 +2518,17 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 			}
 		} else {
 			problem_t problem;
+			blk64_t entry_quota_blocks;
 
-			problem = check_large_ea_inode(ctx, entry, pctx);
+			problem = check_large_ea_inode(ctx, entry, pctx,
+						       &entry_quota_blocks);
 			if (problem == 0)
 				mark_inode_ea_map(ctx, pctx,
 						  entry->e_value_inum);
 			else if (fix_problem(ctx, problem, pctx))
 				goto clear_extattr;
+
+			quota_blocks += entry_quota_blocks;
 		}
 
 		entry = EXT2_EXT_ATTR_NEXT(entry);
@@ -2506,9 +2551,21 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 			return 0;
 	}
 
-	count = header->h_refcount - 1;
-	if (count)
-		ea_refcount_store(ctx->refcount, blk, count);
+	*ea_block_quota_blocks = quota_blocks;
+	if (quota_blocks) {
+		if (!ctx->ea_block_quota) {
+			pctx->errcode = ea_refcount_create(0,
+							&ctx->ea_block_quota);
+			if (pctx->errcode) {
+				pctx->num = 3;
+				fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
+				ctx->flags |= E2F_FLAG_ABORT;
+				return 0;
+			}
+		}
+		ea_refcount_store(ctx->ea_block_quota, blk, quota_blocks);
+	}
+	ea_refcount_store(ctx->refcount, blk, header->h_refcount - 1);
 	mark_block_used(ctx, blk);
 	ext2fs_fast_mark_block_bitmap2(ctx->block_ea_map, blk);
 	return 1;
@@ -3162,7 +3219,7 @@ err:
  * blocks used by that inode.
  */
 static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
-			 char *block_buf)
+			 char *block_buf, blk64_t ea_ibody_quota_blocks)
 {
 	ext2_filsys fs = ctx->fs;
 	struct process_block_struct pb;
@@ -3173,9 +3230,10 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
 	int		extent_fs;
 	int		inlinedata_fs;
 	__u64		size;
+	blk64_t		ea_block_quota_blocks = 0;
 
 	pb.ino = ino;
-	pb.num_blocks = 0;
+	pb.num_blocks = EXT2FS_B2C(ctx->fs, ea_ibody_quota_blocks);
 	pb.last_block = ~0;
 	pb.last_init_lblock = -1;
 	pb.last_db_block = -1;
@@ -3198,10 +3256,10 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
 	extent_fs = ext2fs_has_feature_extents(ctx->fs->super);
 	inlinedata_fs = ext2fs_has_feature_inline_data(ctx->fs->super);
 
-	if (check_ext_attr(ctx, pctx, block_buf)) {
+	if (check_ext_attr(ctx, pctx, block_buf, &ea_block_quota_blocks)) {
 		if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
 			goto out;
-		pb.num_blocks++;
+		pb.num_blocks += EXT2FS_B2C(ctx->fs, ea_block_quota_blocks);
 	}
 
 	if (inlinedata_fs && (inode->i_flags & EXT4_INLINE_DATA_FL))
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 07/12] e2fsck: track ea_inode references
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (4 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 21:45   ` Andreas Dilger
  2017-06-26 13:43 ` [PATCH 08/12] e2fsck: add test for ea_inode feature Tahsin Erdogan
                   ` (5 subsequent siblings)
  11 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

An extended attribute inode has a ref count to track how many entries
point to it. Update e2fsck to verify that the stored ref count is
correct.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 e2fsck/e2fsck.c  |   4 +++
 e2fsck/e2fsck.h  |   6 +++-
 e2fsck/message.c |   7 ++++
 e2fsck/pass1.c   |  66 ++++++++++++++++++++++--------------
 e2fsck/pass4.c   | 101 +++++++++++++++++++++++++++++++++++++++++++++----------
 e2fsck/problem.c |   4 +++
 e2fsck/problem.h |   5 ++-
 7 files changed, 148 insertions(+), 45 deletions(-)

diff --git a/e2fsck/e2fsck.c b/e2fsck/e2fsck.c
index e0f449ee2047..63a986b92af9 100644
--- a/e2fsck/e2fsck.c
+++ b/e2fsck/e2fsck.c
@@ -102,6 +102,10 @@ errcode_t e2fsck_reset_context(e2fsck_t ctx)
 		ea_refcount_free(ctx->ea_block_quota);
 		ctx->ea_block_quota = 0;
 	}
+	if (ctx->ea_inode_refs) {
+		ea_refcount_free(ctx->ea_inode_refs);
+		ctx->ea_inode_refs = 0;
+	}
 	if (ctx->block_dup_map) {
 		ext2fs_free_block_bitmap(ctx->block_dup_map);
 		ctx->block_dup_map = 0;
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 79eeda9fbafb..b25c5eb9179b 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -254,7 +254,6 @@ struct e2fsck_struct {
 	ext2fs_inode_bitmap inode_bb_map; /* Inodes which are in bad blocks */
 	ext2fs_inode_bitmap inode_imagic_map; /* AFS inodes */
 	ext2fs_inode_bitmap inode_reg_map; /* Inodes which are regular files*/
-	ext2fs_inode_bitmap inode_ea_map; /* EA inodes which are non-orphan */
 
 	ext2fs_block_bitmap block_found_map; /* Blocks which are in use */
 	ext2fs_block_bitmap block_dup_map; /* Blks referenced more than once */
@@ -274,6 +273,11 @@ struct e2fsck_struct {
 	 */
 	ext2_refcount_t ea_block_quota;
 
+	/*
+	 * ea_inode references from attr entries.
+	 */
+	ext2_refcount_t ea_inode_refs;
+
 	/*
 	 * Array of flags indicating whether an inode bitmap, block
 	 * bitmap, or inode table is invalid
diff --git a/e2fsck/message.c b/e2fsck/message.c
index 34201a37fd4b..525f4a1e7628 100644
--- a/e2fsck/message.c
+++ b/e2fsck/message.c
@@ -466,6 +466,13 @@ static _INLINE_ void expand_percent_expression(FILE *f, ext2_filsys fs,
 		fprintf(f, "%*u", width, ctx->num);
 #else
 		fprintf(f, "%*llu", width, (long long)ctx->num);
+#endif
+		break;
+	case 'n':
+#ifdef EXT2_NO_64_TYPE
+		fprintf(f, "%*u", width, ctx->num2);
+#else
+		fprintf(f, "%*llu", width, (long long)ctx->num2);
 #endif
 		break;
 	case 'p':
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index cc88a7d05cef..1033b7646cb6 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -26,7 +26,7 @@
  * 	- A bitmap of which blocks are in use.		(block_found_map)
  * 	- A bitmap of which blocks are in use by two inodes	(block_dup_map)
  * 	- The data blocks of the directory inodes.	(dir_map)
- *	- A bitmap of EA inodes.			(inode_ea_map)
+ * 	- Ref counts for ea_inodes.			(ea_inode_refs)
  *
  * Pass 1 is designed to stash away enough information so that the
  * other passes should not need to read in the inode information
@@ -335,23 +335,6 @@ static void check_size(e2fsck_t ctx, struct problem_context *pctx)
 	e2fsck_write_inode(ctx, pctx->ino, pctx->inode, "pass1");
 }
 
-static void mark_inode_ea_map(e2fsck_t ctx, struct problem_context *pctx,
-			      ext2_ino_t ino)
-{
-	if (!ctx->inode_ea_map) {
-		pctx->errcode = ext2fs_allocate_inode_bitmap(ctx->fs,
-					 _("EA inode map"),
-					 &ctx->inode_ea_map);
-		if (pctx->errcode) {
-			fix_problem(ctx, PR_1_ALLOCATE_IBITMAP_ERROR,
-				    pctx);
-			exit(1);
-		}
-	}
-
-	ext2fs_mark_inode_bitmap2(ctx->inode_ea_map, ino);
-}
-
 /*
  * For a given size, calculate how many blocks would be charged towards quota.
  */
@@ -428,13 +411,38 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
 	return 0;
 }
 
+static void inc_ea_inode_refs(e2fsck_t ctx, struct problem_context *pctx,
+			      struct ext2_ext_attr_entry *first, void *end)
+{
+	struct ext2_ext_attr_entry *entry;
+
+	for (entry = first;
+	     (void *)entry < end && !EXT2_EXT_IS_LAST_ENTRY(entry);
+	     entry = EXT2_EXT_ATTR_NEXT(entry)) {
+		if (!entry->e_value_inum)
+			continue;
+		if (!ctx->ea_inode_refs) {
+			pctx->errcode = ea_refcount_create(0,
+							   &ctx->ea_inode_refs);
+			if (pctx->errcode) {
+				pctx->num = 4;
+				fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
+				ctx->flags |= E2F_FLAG_ABORT;
+				return;
+			}
+		}
+		ea_refcount_increment(ctx->ea_inode_refs, entry->e_value_inum,
+				      0);
+	}
+}
+
 static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx,
 			      blk64_t *ea_ibody_quota_blocks)
 {
 	struct ext2_super_block *sb = ctx->fs->super;
 	struct ext2_inode_large *inode;
 	struct ext2_ext_attr_entry *entry;
-	char *start, *header;
+	char *start, *header, *end;
 	unsigned int storage_size, remain;
 	problem_t problem = 0;
 	region_t region = 0;
@@ -447,6 +455,7 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx,
 		inode->i_extra_isize;
 	header = ((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE +
 		 inode->i_extra_isize;
+	end = header + storage_size;
 	start = header + sizeof(__u32);
 	entry = (struct ext2_ext_attr_entry *) start;
 
@@ -524,7 +533,6 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx,
 			if (problem != 0)
 				goto fix;
 
-			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
 			quota_blocks += entry_quota_blocks;
 		}
 
@@ -549,6 +557,8 @@ fix:
 	 * EA(s) in automatic fashion -bzzz
 	 */
 	if (problem == 0 || !fix_problem(ctx, problem, pctx)) {
+		inc_ea_inode_refs(ctx, pctx,
+				  (struct ext2_ext_attr_entry *)start, end);
 		*ea_ibody_quota_blocks = quota_blocks;
 		return;
 	}
@@ -1988,6 +1998,11 @@ void e2fsck_pass1(e2fsck_t ctx)
 		ctx->refcount_extra = 0;
 	}
 
+	if (ctx->ea_block_quota) {
+		ea_refcount_free(ctx->ea_block_quota);
+		ctx->ea_block_quota = 0;
+	}
+
 	if (ctx->invalid_bitmaps)
 		handle_fs_bad_blocks(ctx);
 
@@ -2349,7 +2364,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 	blk64_t		blk;
 	char *		end;
 	struct ext2_ext_attr_header *header;
-	struct ext2_ext_attr_entry *entry;
+	struct ext2_ext_attr_entry *first, *entry;
 	blk64_t		quota_blocks = EXT2FS_C2B(fs, 1);
 	region_t	region = 0;
 	int		failed_csum = 0;
@@ -2473,8 +2488,9 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 			goto clear_extattr;
 	}
 
-	entry = (struct ext2_ext_attr_entry *)(header+1);
+	first = (struct ext2_ext_attr_entry *)(header+1);
 	end = block_buf + fs->blocksize;
+	entry = first;
 	while ((char *)entry < end && *(__u32 *)entry) {
 		__u32 hash;
 
@@ -2522,10 +2538,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 
 			problem = check_large_ea_inode(ctx, entry, pctx,
 						       &entry_quota_blocks);
-			if (problem == 0)
-				mark_inode_ea_map(ctx, pctx,
-						  entry->e_value_inum);
-			else if (fix_problem(ctx, problem, pctx))
+			if (problem && fix_problem(ctx, problem, pctx))
 				goto clear_extattr;
 
 			quota_blocks += entry_quota_blocks;
@@ -2565,6 +2578,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 		}
 		ea_refcount_store(ctx->ea_block_quota, blk, quota_blocks);
 	}
+	inc_ea_inode_refs(ctx, pctx, first, end);
 	ea_refcount_store(ctx->refcount, blk, header->h_refcount - 1);
 	mark_block_used(ctx, blk);
 	ext2fs_fast_mark_block_bitmap2(ctx->block_ea_map, blk);
diff --git a/e2fsck/pass4.c b/e2fsck/pass4.c
index e44fc69c1613..663f87ab59c0 100644
--- a/e2fsck/pass4.c
+++ b/e2fsck/pass4.c
@@ -11,7 +11,7 @@
  * Pass 4 frees the following data structures:
  * 	- A bitmap of which inodes are in bad blocks.	(inode_bb_map)
  * 	- A bitmap of which inodes are imagic inodes.	(inode_imagic_map)
- *	- A bitmap of EA inodes.			(inode_ea_map)
+ *	- Ref counts for ea_inodes.			(ea_inode_refs)
  */
 
 #include "config.h"
@@ -40,20 +40,6 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
 	if (EXT2_INODE_SIZE(fs->super) > EXT2_GOOD_OLD_INODE_SIZE)
 		extra_size = inode->i_extra_isize;
 
-	if (inode->i_flags & EXT4_EA_INODE_FL) {
-		if (ext2fs_test_inode_bitmap2(ctx->inode_ea_map, i)) {
-			ext2fs_icount_store(ctx->inode_count, i, 1);
-			return 0;
-		} else {
-			/* Zero the link count so that when inode is linked to
-			 * lost+found it has correct link count */
-			inode->i_links_count = 0;
-			e2fsck_write_inode(ctx, i, EXT2_INODE(inode),
-					   "disconnect_inode");
-			ext2fs_icount_store(ctx->inode_link_info, i, 0);
-		}
-	}
-
 	clear_problem_context(&pctx);
 	pctx.ino = i;
 	pctx.inode = EXT2_INODE(inode);
@@ -101,6 +87,77 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
 	return 0;
 }
 
+/*
+ * Get/set ref functions below could later be moved to somewhere in lib/ext2fs/.
+ * Currently the only user is e2fsck so we rather not expose it in common
+ * library until there are more users.
+ */
+static __u64 ea_inode_get_ref(struct ext2_inode_large *inode)
+{
+	return ((__u64)inode->i_ctime << 32) | inode->osd1.linux1.l_i_version;
+}
+
+static void ea_inode_set_ref(struct ext2_inode_large *inode, __u64 ref_count)
+{
+	inode->i_ctime = (__u32)(ref_count >> 32);
+	inode->osd1.linux1.l_i_version = (__u32)ref_count;
+}
+
+static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i,
+			   struct ext2_inode_large *inode, __u16 *link_counted)
+{
+	__u64 actual_refs = 0;
+	__u64 ref_count;
+
+	/*
+	 * This function is called when link_counted is zero. So this may not
+	 * be an xattr inode at all. Return immediately if EA_INODE flag is not
+	 * set.
+	 */
+	e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
+			       EXT2_INODE_SIZE(ctx->fs->super),
+			       "pass4: check_ea_inode");
+	if (!(inode->i_flags & EXT4_EA_INODE_FL))
+		return;
+
+	if (ctx->ea_inode_refs)
+		ea_refcount_fetch(ctx->ea_inode_refs, i, &actual_refs);
+	if (!actual_refs) {
+		/*
+		 * There are no attribute references to the ea_inode.
+		 * Zero the link count so that when  inode is linked to
+		 * lost+found it has correct link count.
+		 */
+		inode->i_links_count = 0;
+		e2fsck_write_inode(ctx, i, EXT2_INODE(inode), "check_ea_inode");
+		ext2fs_icount_store(ctx->inode_link_info, i, 0);
+		return;
+	}
+
+	/*
+	 * There are some attribute references, link_counted is now considered
+	 * to be 1.
+	 */
+	*link_counted = 1;
+
+	ref_count = ea_inode_get_ref(inode);
+
+	/* Old Lustre-style xattr inodes do not have a stored refcount.
+	 * However, their i_ctime and i_atime should be the same.
+	 */
+	if (ref_count != actual_refs && inode->i_ctime != inode->i_atime) {
+		struct problem_context pctx;
+
+		clear_problem_context(&pctx);
+		pctx.ino = i;
+		pctx.num = ref_count;
+		pctx.num2 = actual_refs;
+		if (fix_problem(ctx, PR_4_EA_INODE_REF_COUNT, &pctx)) {
+			ea_inode_set_ref(inode, actual_refs);
+			e2fsck_write_inode(ctx, i, EXT2_INODE(inode), "pass4");
+		}
+	}
+}
 
 void e2fsck_pass4(e2fsck_t ctx)
 {
@@ -168,6 +225,16 @@ void e2fsck_pass4(e2fsck_t ctx)
 			continue;
 		ext2fs_icount_fetch(ctx->inode_link_info, i, &link_count);
 		ext2fs_icount_fetch(ctx->inode_count, i, &link_counted);
+
+		if (link_counted == 0) {
+			/*
+			 * link_counted is expected to be 0 for an ea_inode.
+			 * check_ea_inode() will update link_counted if
+			 * necessary.
+			 */
+			check_ea_inode(ctx, i, inode, &link_counted);
+		}
+
 		if (link_counted == 0) {
 			if (!buf)
 				buf = e2fsck_allocate_memory(ctx,
@@ -211,10 +278,10 @@ void e2fsck_pass4(e2fsck_t ctx)
 	}
 	ext2fs_free_icount(ctx->inode_link_info); ctx->inode_link_info = 0;
 	ext2fs_free_icount(ctx->inode_count); ctx->inode_count = 0;
-	ext2fs_free_inode_bitmap(ctx->inode_ea_map);
-	ctx->inode_ea_map = 0;
 	ext2fs_free_inode_bitmap(ctx->inode_bb_map);
 	ctx->inode_bb_map = 0;
+	ea_refcount_free(ctx->ea_inode_refs);
+	ctx->ea_inode_refs = 0;
 	ext2fs_free_inode_bitmap(ctx->inode_imagic_map);
 	ctx->inode_imagic_map = 0;
 errout:
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index deffa4d66a89..97069335156c 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1869,6 +1869,10 @@ static struct e2fsck_problem problem_table[] = {
 	  "They @s the same!\n"),
 	  PROMPT_NONE, 0 },
 
+	{ PR_4_EA_INODE_REF_COUNT,
+	  N_("@a @i %i ref count is %N, @s %n. "),
+	  PROMPT_FIX, PR_PREEN_OK },
+
 	/* Pass 5 errors */
 
 	/* Pass 5: Checking group summary information */
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index 8e07c10895c1..54eb1cf9a519 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -20,7 +20,7 @@ struct problem_context {
 	e2_blkcnt_t	blkcount;
 	dgrp_t		group;
 	__u32		csum1, csum2;
-	__u64	num;
+	__u64		num, num2;
 	const char *str;
 };
 
@@ -1131,6 +1131,9 @@ struct problem_context {
 /* Inconsistent inode count information cached */
 #define PR_4_INCONSISTENT_COUNT		0x040004
 
+/* Extended attribute inode ref count wrong */
+#define PR_4_EA_INODE_REF_COUNT		0x040005
+
 /*
  * Pass 5 errors
  */
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 08/12] e2fsck: add test for ea_inode feature
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (5 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 07/12] e2fsck: track ea_inode references Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 09/12] tune2fs: update ea_inode hashes when fs uuid changes Tahsin Erdogan
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

f_ea_inode test covers the following scenarios:

- a file that contains old Lustre-style valid ea_inode references
  in inode body and xattr block

- a file that contains new style valid ea_inode references in inode
  body and xattr block

- a file with an extended attribute that references an invalid inode
  number (e_value_inum > s_inodes_count)

- an ea entry with bad e_hash and points to an inode that does not
  have EA_INODE flag set

- an ea entry with bad e_hash but points to a valid ea_inode

- an ea entry with valid e_hash that points to an inode that is
  missing EA_INODE flag

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 tests/f_ea_inode/expect.1 |  31 +++++++++++++++++++++++++++++++
 tests/f_ea_inode/expect.2 |   7 +++++++
 tests/f_ea_inode/image.gz | Bin 0 -> 1389 bytes
 3 files changed, 38 insertions(+)
 create mode 100644 tests/f_ea_inode/expect.1
 create mode 100644 tests/f_ea_inode/expect.2
 create mode 100644 tests/f_ea_inode/image.gz

diff --git a/tests/f_ea_inode/expect.1 b/tests/f_ea_inode/expect.1
new file mode 100644
index 000000000000..aaa0bead9c41
--- /dev/null
+++ b/tests/f_ea_inode/expect.1
@@ -0,0 +1,31 @@
+Pass 1: Checking inodes, blocks, and sizes
+Inode 17 has illegal extended attribute value inode 4008636142.
+Clear? yes
+
+Inode 17, i_blocks is 8, should be 0.  Fix? yes
+
+Inode 18 has illegal extended attribute value inode 19.
+Clear? yes
+
+Inode 18, i_blocks is 8, should be 0.  Fix? yes
+
+Extended attribute in inode 20 has a hash (1145324612) which is invalid
+Clear? yes
+
+Inode 20, i_blocks is 8, should be 0.  Fix? yes
+
+EA inode 19 for parent inode 21 missing EA_INODE flag.
+ Fix? yes
+
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Extended attribute inode 16 ref count is 51, should be 2. Fix? yes
+
+Extended attribute inode 19 ref count is 2, should be 1. Fix? yes
+
+Pass 5: Checking group summary information
+
+test_filesys: ***** FILE SYSTEM WAS MODIFIED *****
+test_filesys: 21/32 files (0.0% non-contiguous), 18/64 blocks
+Exit status is 1
diff --git a/tests/f_ea_inode/expect.2 b/tests/f_ea_inode/expect.2
new file mode 100644
index 000000000000..f9276fba341d
--- /dev/null
+++ b/tests/f_ea_inode/expect.2
@@ -0,0 +1,7 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 21/32 files (0.0% non-contiguous), 18/64 blocks
+Exit status is 0
diff --git a/tests/f_ea_inode/image.gz b/tests/f_ea_inode/image.gz
new file mode 100644
index 0000000000000000000000000000000000000000..68a397580d2ecca9fa30e33528a286419c757c7e
GIT binary patch
literal 1389
zcmb`F{a4a=6vw~I)`M2G<2*f}kh8Efou#81LrN=CThJmON-@huhNxv?o59zeRx?|&
zGIh?>mQ#-vGQi@LPsem6C8C+4_<;F@CF(>J5x(Eg;D`MIozwl{-q$(zzVG{-doKR^
zvSnR*S~z5r6m!xemL1JJ?F~)zsjt(IIzs=FB#$#l6~$X@*Lm!5K3-+(N7>N&nisek
zfj+a%j}CPTW7Z=#D==>Q9KRal-&cO3V;nl727-S}AnV7DV3uP$FSiA^a#{-3r8qIW
zisbL-jZKC^lA*W8z|-_Uxsz1my(2Jf=}~Gr+4*Zf{V!wp4=F-ic7<NsslKACZ&>3W
zllAVM6f|$l^I{*L9(bz~Z|@L4SQvaltFF8+)~cx>k*y9eQPB8~KciY0=x`Xq(e6Bq
z>)W=S7BRP&X`FlG;#6X@6o(8-^RY0ZI}i8vM?8w>;9ixunqT>pr<R3(3PEQ6Ni5~D
z!rz-*398)KJP3mB3Jy|J=)qRvl1ryB7GPI!@Vm_@%Ogbl{m%}GGyixsh3e|U5MEMA
zsY`fP4(nEnS466N7BAo)fyb35t?N|V4}Pan=AUX*VpJx~!#hcuhJp8mOElo%tXX<7
zGFga4hWAkWSQ^Tif(If*Qg+z%<#8k~q-D|UMA6RLA1xb>7@l?%0e20vvuEBcC6E&Y
zHQBv4fu9x`z4GiSA(`y#391ZocYA-ui}Z2+NV8*idEDr`3vtZkpz_a4uigmC%@ZEN
zKD<3-ce@bGcpEkGLTL_|JBy}B;Ut;4(VX8=Vw|Fw_InHx8_ySBXNXIN)nr9-a~|9U
z9vzs$j4Wyot5i)#nb>7^BY8tG>zF4pkMH(1QfmVvsp+zELkJZmpL5wC0dHlFp5d9x
zUah#?KQ1)WNvNG2y$94NNxWZSSQG(_>OyVM`YA9w(+v9zs~>vG!Gd?}rS)G_P%@=K
zY@l4IP0`fp5ehK?&Gk+FCKqSp{Mzil%R7MMwuU&s-!NJBOtK?$1^>p}O8f*Vd@~*!
zF-+*|%rL*uyaWQQaUR$B{}JTTHn_}UXArqZ<kgx|?RCC-YK-<aEb51gl?gG47jE{1
zkWg1jZw^s`9l^u)_u8iGq6^38Pxu-`UF4M8cJfoAV<JG6XT#NKyARO5xf-r{%B?Rj
z!P6$+v-{98iz6~Ac*Hspi*~T09lFasB$+^wswVp|*1CD9T`eDja}*`y25;|1dxpB>
zV0Z+?6F0FW{tYh0R3|5#JBD6i7|CjBZtZFslhwbR<zLd#2yDJ<HabwcF*mP|qW-;+
zh>jkV_>PyZ?zy!xZ$I~suH|dP>NXYESRXRj-ei=R04xyd!6y)uMr;>X39R5y46OYL
zD!^NcH6~ehT*hV#B!X)SWv0T=@|SWE;rX}_6XU-x#Zl%evwyO2%c|$GS<`K>Z1=*B
z076Dd4nu5Fq&*hDRxYu^9L<s;Rln8*qLBOZT!g%A8U^}kD7rNjk+EhCV2!P?AQ4b3
zQ(GDC3*fW`?oJ5wU#+qVKRi_5a@_2%ZsQbnb4M#wv^J*zU7&8SXJE$Vb1GWE9iP=F
zFPWS6is*R>k;%2W4F|RqOldCE7C1Bb?LmT<%O}Z0hP&B3>W3SDOhsDi3m7Em(jjOu
F^dA(df!qK9

literal 0
HcmV?d00001

-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 09/12] tune2fs: update ea_inode hashes when fs uuid changes
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (6 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 08/12] e2fsck: add test for ea_inode feature Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 10/12] fuse2fs: refuse to mount fs with ea_inode feature Tahsin Erdogan
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

Extended attribute inodes maintain a crc32c hash that is used for
deduplication. The crc seed derives from uuid so ea_inode hashes
must be updated when uuid changes.

The ea_inode hash is also incorporated into the xattr entry e_hash
so the entries that reference the inode also must be updated.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 lib/ext2fs/csum.c     |   3 +-
 lib/ext2fs/ext2fs.h   |   4 +
 lib/ext2fs/ext_attr.c |  44 +++++++-
 misc/tune2fs.c        | 299 ++++++++++++++++++++++++++++++++++----------------
 4 files changed, 249 insertions(+), 101 deletions(-)

diff --git a/lib/ext2fs/csum.c b/lib/ext2fs/csum.c
index e67850fa47af..214a099762ee 100644
--- a/lib/ext2fs/csum.c
+++ b/lib/ext2fs/csum.c
@@ -34,7 +34,8 @@ void ext2fs_init_csum_seed(ext2_filsys fs)
 {
 	if (ext2fs_has_feature_csum_seed(fs->super))
 		fs->csum_seed = fs->super->s_checksum_seed;
-	else if (ext2fs_has_feature_metadata_csum(fs->super))
+	else if (ext2fs_has_feature_metadata_csum(fs->super) ||
+		 ext2fs_has_feature_ea_inode(fs->super))
 		fs->csum_seed = ext2fs_crc32c_le(~0, fs->super->s_uuid,
 						 sizeof(fs->super->s_uuid));
 }
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index f72cbe17a8cd..90df2182bd7b 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1247,6 +1247,10 @@ errcode_t ext2fs_xattr_inode_max_size(ext2_filsys fs, ext2_ino_t ino,
 #define XATTR_HANDLE_FLAG_RAW	0x0001
 errcode_t ext2fs_xattrs_flags(struct ext2_xattr_handle *handle,
 			      unsigned int *new_flags, unsigned int *old_flags);
+extern void ext2fs_ext_attr_block_rehash(struct ext2_ext_attr_header *header,
+					 struct ext2_ext_attr_entry *end);
+extern __u32 ext2fs_get_ea_inode_hash(struct ext2_inode *inode);
+extern void ext2fs_set_ea_inode_hash(struct ext2_inode *inode, __u32 hash);
 
 /* extent.c */
 extern errcode_t ext2fs_extent_header_verify(void *ptr, int size);
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index d48ab5c55270..41789c7dcbf2 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -33,7 +33,7 @@ static errcode_t read_ea_inode_hash(ext2_filsys fs, ext2_ino_t ino, __u32 *hash)
 	retval = ext2fs_read_inode(fs, ino, &inode);
 	if (retval)
 		return retval;
-	*hash = inode.i_atime;
+	*hash = ext2fs_get_ea_inode_hash(&inode);
 	return 0;
 }
 
@@ -100,6 +100,45 @@ errcode_t ext2fs_ext_attr_hash_entry2(ext2_filsys fs,
 	return 0;
 }
 
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+#define BLOCK_HASH_SHIFT 16
+
+/* Mirrors ext4_xattr_rehash() implementation in kernel. */
+void ext2fs_ext_attr_block_rehash(struct ext2_ext_attr_header *header,
+				  struct ext2_ext_attr_entry *end)
+{
+	struct ext2_ext_attr_entry *here;
+	__u32 hash = 0;
+
+	here = (struct ext2_ext_attr_entry *)(header+1);
+	while (here < end && !EXT2_EXT_IS_LAST_ENTRY(here)) {
+		if (!here->e_hash) {
+			/* Block is not shared if an entry's hash value == 0 */
+			hash = 0;
+			break;
+		}
+		hash = (hash << BLOCK_HASH_SHIFT) ^
+		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
+		       here->e_hash;
+		here = EXT2_EXT_ATTR_NEXT(here);
+	}
+	header->h_hash = hash;
+}
+
+#undef BLOCK_HASH_SHIFT
+
+__u32 ext2fs_get_ea_inode_hash(struct ext2_inode *inode)
+{
+	return inode->i_atime;
+}
+
+void ext2fs_set_ea_inode_hash(struct ext2_inode *inode, __u32 hash)
+{
+	inode->i_atime = hash;
+}
+
 static errcode_t check_ext_attr_header(struct ext2_ext_attr_header *header)
 {
 	if ((header->h_magic != EXT2_EXT_ATTR_MAGIC_v1 &&
@@ -110,9 +149,6 @@ static errcode_t check_ext_attr_header(struct ext2_ext_attr_header *header)
 	return 0;
 }
 
-#undef NAME_HASH_SHIFT
-#undef VALUE_HASH_SHIFT
-
 errcode_t ext2fs_read_ext_attr3(ext2_filsys fs, blk64_t block, void *buf,
 				ext2_ino_t inum)
 {
diff --git a/misc/tune2fs.c b/misc/tune2fs.c
index 2a437b9fc7b7..9b53dc28e9cb 100644
--- a/misc/tune2fs.c
+++ b/misc/tune2fs.c
@@ -718,115 +718,224 @@ static errcode_t rewrite_directory(ext2_filsys fs, ext2_ino_t dir,
 	return ctx.errcode;
 }
 
+/*
+ * Context information that does not change across rewrite_one_inode()
+ * invocations.
+ */
+struct rewrite_context {
+	ext2_filsys fs;
+	struct ext2_inode *zero_inode;
+	char *ea_buf;
+	int inode_size;
+};
+
+#define fatal_err(code, args...)		\
+	do {					\
+		com_err(__func__, code, args);	\
+		exit(1);			\
+	} while (0);
+
+static void update_ea_inode_hash(struct rewrite_context *ctx, ext2_ino_t ino,
+				 struct ext2_inode *inode)
+{
+	errcode_t retval;
+	ext2_file_t file;
+	__u32 hash;
+
+	retval = ext2fs_file_open(ctx->fs, ino, 0, &file);
+	if (retval)
+		fatal_err(retval, "open ea_inode");
+	retval = ext2fs_file_read(file, ctx->ea_buf, inode->i_size,
+				  NULL);
+	if (retval)
+		fatal_err(retval, "read ea_inode");
+	retval = ext2fs_file_close(file);
+	if (retval)
+		fatal_err(retval, "close ea_inode");
+
+	hash = ext2fs_crc32c_le(ctx->fs->csum_seed, ctx->ea_buf, inode->i_size);
+	ext2fs_set_ea_inode_hash(inode, hash);
+}
+
+static int update_xattr_entry_hashes(ext2_filsys fs,
+				     struct ext2_ext_attr_entry *entry,
+				     struct ext2_ext_attr_entry *end)
+{
+	int modified = 0;
+	errcode_t retval;
+
+	while (entry < end && !EXT2_EXT_IS_LAST_ENTRY(entry)) {
+		if (entry->e_value_inum) {
+			retval = ext2fs_ext_attr_hash_entry2(fs, entry, NULL,
+							     &entry->e_hash);
+			if (retval)
+				fatal_err(retval, "hash ea_inode entry");
+			modified = 1;
+		}
+		entry = EXT2_EXT_ATTR_NEXT(entry);
+	}
+	return modified;
+}
+
+static void update_inline_xattr_hashes(struct rewrite_context *ctx,
+				       struct ext2_inode_large *inode)
+{
+	struct ext2_ext_attr_entry *start, *end;
+	__u32 *ea_magic;
+
+	if (inode->i_extra_isize == 0)
+		return;
+
+	if (inode->i_extra_isize & 3 ||
+	    inode->i_extra_isize > ctx->inode_size - EXT2_GOOD_OLD_INODE_SIZE)
+		fatal_err(EXT2_ET_INODE_CORRUPTED, "bad i_extra_isize")
+
+	ea_magic = (__u32 *)((char *)inode + EXT2_GOOD_OLD_INODE_SIZE +
+				inode->i_extra_isize);
+	if (*ea_magic != EXT2_EXT_ATTR_MAGIC)
+		return;
+
+	start = (struct ext2_ext_attr_entry *)(ea_magic + 1);
+	end = (struct ext2_ext_attr_entry *)((char *)inode + ctx->inode_size);
+
+	update_xattr_entry_hashes(ctx->fs, start, end);
+}
+
+static void update_block_xattr_hashes(struct rewrite_context *ctx,
+				      void *block_buf)
+{
+	struct ext2_ext_attr_header *header;
+	struct ext2_ext_attr_entry *start, *end;
+
+	header = (struct ext2_ext_attr_header *)block_buf;
+	if (header->h_magic != EXT2_EXT_ATTR_MAGIC)
+		return;
+
+	start = (struct ext2_ext_attr_entry *)(header+1);
+	end = (struct ext2_ext_attr_entry *)(block_buf + ctx->fs->blocksize);
+
+	if (update_xattr_entry_hashes(ctx->fs, start, end))
+		ext2fs_ext_attr_block_rehash(header, end);
+}
+
+static void rewrite_one_inode(struct rewrite_context *ctx, ext2_ino_t ino,
+			      struct ext2_inode *inode)
+{
+	blk64_t file_acl_block;
+	errcode_t retval;
+
+	if (!ext2fs_test_inode_bitmap2(ctx->fs->inode_map, ino)) {
+		if (!memcmp(inode, ctx->zero_inode, ctx->inode_size))
+			return;
+		memset(inode, 0, ctx->inode_size);
+	}
+
+	if (inode->i_flags & EXT4_EA_INODE_FL)
+		update_ea_inode_hash(ctx, ino, inode);
+
+	if (ctx->inode_size != EXT2_GOOD_OLD_INODE_SIZE)
+		update_inline_xattr_hashes(ctx,
+					   (struct ext2_inode_large *)inode);
+
+	retval = ext2fs_write_inode_full(ctx->fs, ino, inode, ctx->inode_size);
+	if (retval)
+		fatal_err(retval, "while writing inode");
+
+	retval = rewrite_extents(ctx->fs, ino, inode);
+	if (retval)
+		fatal_err(retval, "while rewriting extents");
+
+	if (LINUX_S_ISDIR(inode->i_mode) &&
+	    ext2fs_inode_has_valid_blocks2(ctx->fs, inode)) {
+		retval = rewrite_directory(ctx->fs, ino, inode);
+		if (retval)
+			fatal_err(retval, "while rewriting directories");
+	}
+
+	file_acl_block = ext2fs_file_acl_block(ctx->fs, inode);
+	if (!file_acl_block)
+		return;
+
+	retval = ext2fs_read_ext_attr3(ctx->fs, file_acl_block, ctx->ea_buf,
+				       ino);
+	if (retval)
+		fatal_err(retval, "while rewriting extended attribute");
+
+	update_block_xattr_hashes(ctx, ctx->ea_buf);
+	retval = ext2fs_write_ext_attr3(ctx->fs, file_acl_block, ctx->ea_buf,
+					ino);
+	if (retval)
+		fatal_err(retval, "while rewriting extended attribute");
+}
+
 /*
  * Forcibly set checksums in all inodes.
  */
 static void rewrite_inodes(ext2_filsys fs)
 {
-	int length = EXT2_INODE_SIZE(fs->super);
-	struct ext2_inode *inode, *zero;
-	char		*ea_buf;
 	ext2_inode_scan	scan;
 	errcode_t	retval;
 	ext2_ino_t	ino;
-	blk64_t		file_acl_block;
-	int		inode_dirty;
+	struct ext2_inode *inode;
+	int pass;
+	struct rewrite_context ctx = {
+		.fs = fs,
+		.inode_size = EXT2_INODE_SIZE(fs->super),
+	};
 
 	if (fs->super->s_creator_os == EXT2_OS_HURD)
 		return;
 
-	retval = ext2fs_open_inode_scan(fs, 0, &scan);
-	if (retval) {
-		com_err("set_csum", retval, "while opening inode scan");
-		exit(1);
-	}
-
-	retval = ext2fs_get_mem(length, &inode);
-	if (retval) {
-		com_err("set_csum", retval, "while allocating memory");
-		exit(1);
-	}
-
-	retval = ext2fs_get_memzero(length, &zero);
-	if (retval) {
-		com_err("set_csum", retval, "while allocating memory");
-		exit(1);
-	}
+	retval = ext2fs_get_mem(ctx.inode_size, &inode);
+	if (retval)
+		fatal_err(retval, "while allocating memory");
 
-	retval = ext2fs_get_mem(fs->blocksize, &ea_buf);
-	if (retval) {
-		com_err("set_csum", retval, "while allocating memory");
-		exit(1);
-	}
+	retval = ext2fs_get_memzero(ctx.inode_size, &ctx.zero_inode);
+	if (retval)
+		fatal_err(retval, "while allocating memory");
 
-	do {
-		retval = ext2fs_get_next_inode_full(scan, &ino, inode, length);
-		if (retval) {
-			com_err("set_csum", retval, "while getting next inode");
-			exit(1);
-		}
-		if (!ino)
-			break;
-		if (ext2fs_test_inode_bitmap2(fs->inode_map, ino)) {
-			inode_dirty = 1;
-		} else {
-			if (memcmp(inode, zero, length) != 0) {
-				memset(inode, 0, length);
-				inode_dirty = 1;
-			} else {
-				inode_dirty = 0;
-			}
-		}
+	retval = ext2fs_get_mem(64 * 1024, &ctx.ea_buf);
+	if (retval)
+		fatal_err(retval, "while allocating memory");
 
-		if (inode_dirty) {
-			retval = ext2fs_write_inode_full(fs, ino, inode,
-							 length);
-			if (retval) {
-				com_err("set_csum", retval, "while writing "
-					"inode");
-				exit(1);
-			}
-		}
+	/*
+	 * Extended attribute inodes have a lookup hash that needs to be
+	 * recalculated with the new csum_seed. Other inodes referencing xattr
+	 * inodes need this value to be up to date. That's why we do two passes:
+	 *
+	 * pass 1: update xattr inodes to update their lookup hash as well as
+	 *         other checksums.
+	 *
+	 * pass 2: go over other inodes to update their checksums.
+	 */
+	if (ext2fs_has_feature_ea_inode(fs->super))
+		pass = 1;
+	else
+		pass = 2;
+	for (;pass <= 2; pass++) {
+		retval = ext2fs_open_inode_scan(fs, 0, &scan);
+		if (retval)
+			fatal_err(retval, "while opening inode scan");
 
-		retval = rewrite_extents(fs, ino, inode);
-		if (retval) {
-			com_err("rewrite_extents", retval,
-				"while rewriting extents");
-			exit(1);
-		}
+		do {
+			retval = ext2fs_get_next_inode_full(scan, &ino, inode,
+							    ctx.inode_size);
+			if (retval)
+				fatal_err(retval, "while getting next inode");
+			if (!ino)
+				break;
 
-		if (LINUX_S_ISDIR(inode->i_mode) &&
-		    ext2fs_inode_has_valid_blocks2(fs, inode)) {
-			retval = rewrite_directory(fs, ino, inode);
-			if (retval) {
-				com_err("rewrite_directory", retval,
-					"while rewriting directories");
-				exit(1);
-			}
-		}
+			if (pass == 1 && (inode->i_flags & EXT4_EA_INODE_FL) ||
+			    pass == 2 && !(inode->i_flags & EXT4_EA_INODE_FL))
+				rewrite_one_inode(&ctx, ino, inode);
+		} while (ino);
 
-		file_acl_block = ext2fs_file_acl_block(fs, inode);
-		if (!file_acl_block)
-			continue;
-		retval = ext2fs_read_ext_attr3(fs, file_acl_block, ea_buf, ino);
-		if (retval) {
-			com_err("rewrite_eablock", retval,
-				"while rewriting extended attribute");
-			exit(1);
-		}
-		retval = ext2fs_write_ext_attr3(fs, file_acl_block, ea_buf,
-						ino);
-		if (retval) {
-			com_err("rewrite_eablock", retval,
-				"while rewriting extended attribute");
-			exit(1);
-		}
-	} while (ino);
+		ext2fs_close_inode_scan(scan);
+	}
 
-	ext2fs_free_mem(&zero);
+	ext2fs_free_mem(&ctx.zero_inode);
+	ext2fs_free_mem(&ctx.ea_buf);
 	ext2fs_free_mem(&inode);
-	ext2fs_free_mem(&ea_buf);
-	ext2fs_close_inode_scan(scan);
 }
 
 static void rewrite_metadata_checksums(ext2_filsys fs)
@@ -839,11 +948,8 @@ static void rewrite_metadata_checksums(ext2_filsys fs)
 	for (i = 0; i < fs->group_desc_count; i++)
 		ext2fs_group_desc_csum_set(fs, i);
 	retval = ext2fs_read_bitmaps(fs);
-	if (retval) {
-		com_err("rewrite_metadata_checksums", retval,
-			"while reading bitmaps");
-		exit(1);
-	}
+	if (retval)
+		fatal_err(retval, "while reading bitmaps");
 	rewrite_inodes(fs);
 	ext2fs_mark_ib_dirty(fs);
 	ext2fs_mark_bb_dirty(fs);
@@ -3135,8 +3241,9 @@ retry_open:
 		}
 
 		ext2fs_mark_super_dirty(fs);
-		if (ext2fs_has_feature_metadata_csum(fs->super) &&
-		    !ext2fs_has_feature_csum_seed(fs->super))
+		if (!ext2fs_has_feature_csum_seed(fs->super) &&
+		    (ext2fs_has_feature_metadata_csum(fs->super) ||
+		     ext2fs_has_feature_ea_inode(fs->super)))
 			rewrite_checksums = 1;
 	}
 
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 10/12] fuse2fs: refuse to mount fs with ea_inode feature
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (7 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 09/12] tune2fs: update ea_inode hashes when fs uuid changes Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 11/12] mke2fs: ea_inode is not supported for hurd Tahsin Erdogan
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

ext2fs_xattr_set() currently does not support creating xattr inodes,
so allowing fuse2fs to mount a filesystem with ea_inode feature could
lead to corruption. Refuse to mount if the ea_inode feature is set.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 misc/fuse2fs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index b5897685c466..956348f8f876 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -3786,6 +3786,12 @@ int main(int argc, char *argv[])
 	global_fs->priv_data = &fctx;
 
 	ret = 3;
+	if (ext2fs_has_feature_ea_inode(global_fs->super)) {
+		printf(_("%s: fuse2fs does not support ea_inode feature.\n"),
+		       fctx.device);
+		goto out;
+	}
+
 	if (ext2fs_has_feature_journal_needs_recovery(global_fs->super)) {
 		if (!fctx.ro) {
 			printf(_("%s: recovering journal\n"), fctx.device);
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 11/12] mke2fs: ea_inode is not supported for hurd
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (8 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 10/12] fuse2fs: refuse to mount fs with ea_inode feature Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-06-26 13:43 ` [PATCH 12/12] resize2fs: moving xattr inodes is not supported Tahsin Erdogan
  2017-07-05  4:04 ` [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Theodore Ts'o
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

Extended attribute inodes store their refcount in inode.l_i_version
field which is not available for Hurd. Fail mke2fs for this
combination.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 misc/mke2fs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/misc/mke2fs.c b/misc/mke2fs.c
index 1ef46f44f35d..85d88edc2375 100644
--- a/misc/mke2fs.c
+++ b/misc/mke2fs.c
@@ -1992,6 +1992,7 @@ profile_error:
 		ext2fs_clear_feature_filetype(&fs_param);
 		ext2fs_clear_feature_huge_file(&fs_param);
 		ext2fs_clear_feature_metadata_csum(&fs_param);
+		ext2fs_clear_feature_ea_inode(&fs_param);
 	}
 	edit_feature(fs_features ? fs_features : tmp,
 		     &fs_param.s_feature_compat);
@@ -2017,6 +2018,11 @@ profile_error:
 						"metadata_csum feature.\n"));
 			exit(1);
 		}
+		if (ext2fs_has_feature_ea_inode(&fs_param)) {
+			fprintf(stderr, "%s", _("The HURD does not support the "
+						"ea_inode feature.\n"));
+			exit(1);
+		}
 	}
 
 	/* Get the hardware sector sizes, if available */
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 12/12] resize2fs: moving xattr inodes is not supported
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (9 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 11/12] mke2fs: ea_inode is not supported for hurd Tahsin Erdogan
@ 2017-06-26 13:43 ` Tahsin Erdogan
  2017-07-05  4:04 ` [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Theodore Ts'o
  11 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 13:43 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

In some cases, resize2fs needs to move inodes because their inode
number is greater than the maximum allowed. Moving extended attribute
inodes would require updating all the references to them. This
is currently not supported.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
 lib/ext2fs/ext2_err.et.in | 3 +++
 resize/resize2fs.c        | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/lib/ext2fs/ext2_err.et.in b/lib/ext2fs/ext2_err.et.in
index ac96964d93d0..c5a9ffcc420c 100644
--- a/lib/ext2fs/ext2_err.et.in
+++ b/lib/ext2fs/ext2_err.et.in
@@ -542,4 +542,7 @@ ec	EXT2_ET_CORRUPT_JOURNAL_SB,
 ec	EXT2_ET_INODE_CORRUPTED,
 	"Inode is corrupted"
 
+ec	EXT2_ET_CANNOT_MOVE_EA_INODE,
+	"Cannot move extended attribute inode"
+
 	end
diff --git a/resize/resize2fs.c b/resize/resize2fs.c
index 8f6d95e76dc8..a54564f08ae5 100644
--- a/resize/resize2fs.c
+++ b/resize/resize2fs.c
@@ -2057,6 +2057,15 @@ static errcode_t inode_scan_and_fix(ext2_resize_t rfs)
 		if (ino <= start_to_move)
 			goto remap_blocks; /* Don't need to move inode. */
 
+		/*
+		 * Moving an extended attribute inode requires updating all inodes
+		 * that reference it which is a lot more involved.
+		 */
+		if (inode->i_flags & EXT4_EA_INODE_FL) {
+			retval = EXT2_ET_CANNOT_MOVE_EA_INODE;
+			goto errout;
+		}
+
 		/*
 		 * Find a new inode.  Now that extents and directory blocks
 		 * are tied to the inode number through the checksum, we must
-- 
2.13.1.611.g7e3b11ae1-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 03/12] e2fsck: ea_inode hash validation
  2017-06-26 13:43 ` [PATCH 03/12] e2fsck: ea_inode hash validation Tahsin Erdogan
@ 2017-06-26 21:21   ` Andreas Dilger
  2017-06-28  1:41     ` [PATCH v2 " Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2017-06-26 21:21 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Darrick J . Wong, Theodore Ts'o, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 3706 bytes --]

On Jun 26, 2017, at 7:43 AM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
> In original implementation of ea_inode feature, each xattr inode had
> a single parent. Child inode tracked the parent by storing its inode
> number in i_mtime field. Also, child's i_generation matched parent's
> i_generation.
> 
> With deduplication support, xattr inodes can now be shared so a single
> backpointer is not sufficient to achieve strong binding. This is now
> replaced by hash validation.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> e2fsck/pass1.c        | 91 +++++++++++++++++++++++++++++++++------------------
> e2fsck/problem.c      |  5 ---
> e2fsck/problem.h      |  7 ++--
> lib/ext2fs/ext2fs.h   |  3 ++
> lib/ext2fs/ext_attr.c | 67 +++++++++++++++++++++++++++++++++++--
> 5 files changed, 128 insertions(+), 45 deletions(-)
> 
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index 1c68af8990b4..32152f3ec926 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -2439,6 +2466,16 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> 						pctx))
> 					goto clear_extattr;
> 			}
> +
> +			hash = ext2fs_ext_attr_hash_entry(entry, block_buf +
> +							  entry->e_value_offs);
> +
> +			if (entry->e_hash != hash) {
> +				pctx->num = entry->e_hash;
> +				if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
> +					goto clear_extattr;

Shouldn't this be "if (!fix_problem(...))" ?


> diff --git a/e2fsck/problem.h b/e2fsck/problem.h
> index dad608c94fd0..8e07c10895c1 100644
> --- a/e2fsck/problem.h
> +++ b/e2fsck/problem.h
> @@ -678,15 +678,12 @@ struct problem_context {
> /* Inode has illegal EA value inode */
> #define PR_1_ATTR_VALUE_EA_INODE		0x010083
> 
> -/* Invalid backpointer from EA inode to parent inode */
> -#define PR_1_ATTR_INVAL_EA_INODE		0x010084
> -
> /* Parent inode has invalid EA entry. EA inode does not have
>  * EXT4_EA_INODE_FL flag. Delete EA entry? */
> -#define PR_1_ATTR_NO_EA_INODE_FL		0x010085
> +#define PR_1_ATTR_NO_EA_INODE_FL		0x010084
> 
> /* EA inode for parent inode does not have EXT4_EA_INODE_FL flag */
> -#define PR_1_ATTR_SET_EA_INODE_FL		0x010086
> +#define PR_1_ATTR_SET_EA_INODE_FL		0x010085

It would also be OK to keep the old values, and just skip 0x10084.

> diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
> index 14d05c7e57fb..d48ab5c55270 100644
> --- a/lib/ext2fs/ext_attr.c
> +++ b/lib/ext2fs/ext_attr.c
> @@ -914,9 +955,29 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
> 			void *data = (entry->e_value_inum != 0) ?
> 					0 : value_start + entry->e_value_offs;
> 
> -			hash = ext2fs_ext_attr_hash_entry(entry, data);
> -			if (entry->e_hash != hash)
> -				return EXT2_ET_BAD_EA_HASH;
> +			err = ext2fs_ext_attr_hash_entry2(handle->fs, entry,
> +							  data, &hash);
> +			if (err)
> +				return err;
> +			if (entry->e_hash != hash) {
> +				struct ext2_inode parent, child;
> +
> +				/* Check whether this is an old Lustre-style
> +				 * ea_inode reference.
> +				 */
> +				err = ext2fs_read_inode(handle->fs, handle->ino,
> +							&parent);

Rather than reading the inode again here, it would make more sense to pass the
parent inode from the caller.

> +				if (err)
> +					return err;
> +				err = ext2fs_read_inode(handle->fs,
> +							entry->e_value_inum,
> +							&child);
> +				if (err)
> +					return err;
> +				if (child.i_mtime != handle->ino ||
> +				    child.i_generation != parent.i_generation)
> +					return EXT2_ET_BAD_EA_HASH;
> +			}
> 		}
> 
> 		x++;
> --
> 2.13.1.611.g7e3b11ae1-goog
> 


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 04/12] e2fsck: do not early terminate extra space check
  2017-06-26 13:43 ` [PATCH 04/12] e2fsck: do not early terminate extra space check Tahsin Erdogan
@ 2017-06-26 21:23   ` Andreas Dilger
  2017-06-26 23:57     ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2017-06-26 21:23 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Darrick J . Wong, Theodore Ts'o, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 1121 bytes --]


> On Jun 26, 2017, at 7:43 AM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
> When check_inode_extra_space() detects a problem with the value of
> i_extra_isize, it adjusts it and then returns without further validation
> of contents in the inode body. Change this so that it will proceed to
> check inline extended attributes.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> e2fsck/pass1.c | 1 -
> 1 file changed, 1 deletion(-)
> 
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index 32152f3ec926..1532fd2067f2 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -582,7 +582,6 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
> 			inode->i_extra_isize = (inode->i_extra_isize + 3) & ~3;
> 		e2fsck_write_inode_full(ctx, pctx->ino, pctx->inode,
> 					EXT2_INODE_SIZE(sb), "pass1");
> -		return;
> 	}
> 
> 	/* check if there is no place for an EA header */

The problem is that if i_extra_isize is changed, then the EA magic and data
will no longer be aligned properly, so there isn't anything to check?


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature
  2017-06-26 13:43 ` [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature Tahsin Erdogan
@ 2017-06-26 21:36   ` Andreas Dilger
  2017-06-27 11:37     ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2017-06-26 21:36 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Darrick J . Wong, Theodore Ts'o, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 14436 bytes --]

On Jun 26, 2017, at 7:43 AM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
> With ea_inode feature, i_blocks include the disk space used by
> referenced xattr inodes. Make e2fsck aware of that.

This makes it all the more important that the "fast symlink" checking be
divorced from i_blocks, and instead depend on i_size.  Otherwise, we will
have yet another bunch of problems as large xattrs on a fast symlink will
incorrectly result in problems with the symlink.

This was most recently discussed in the thread:
[PATCH] libext2fs: correctly subtract xattr blocks on bigalloc filesystems
https://www.spinics.net/lists/linux-ext4/msg56601.html

All indications are that using i_size to distinguish fast symlinks is safe,
while using i_blocks has repeatedly caused breakage as new blocks are
allocated to the inode.

Cheers, Andreas

> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> e2fsck/e2fsck.c |   4 ++
> e2fsck/e2fsck.h |   5 +++
> e2fsck/pass1.c  | 124 +++++++++++++++++++++++++++++++++++++++++---------------
> 3 files changed, 100 insertions(+), 33 deletions(-)
> 
> diff --git a/e2fsck/e2fsck.c b/e2fsck/e2fsck.c
> index 0f9da46a5a12..e0f449ee2047 100644
> --- a/e2fsck/e2fsck.c
> +++ b/e2fsck/e2fsck.c
> @@ -98,6 +98,10 @@ errcode_t e2fsck_reset_context(e2fsck_t ctx)
> 		ea_refcount_free(ctx->refcount_extra);
> 		ctx->refcount_extra = 0;
> 	}
> +	if (ctx->ea_block_quota) {
> +		ea_refcount_free(ctx->ea_block_quota);
> +		ctx->ea_block_quota = 0;
> +	}
> 	if (ctx->block_dup_map) {
> 		ext2fs_free_block_bitmap(ctx->block_dup_map);
> 		ctx->block_dup_map = 0;
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index f9285d01978c..79eeda9fbafb 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -269,6 +269,11 @@ struct e2fsck_struct {
> 	ext2_refcount_t	refcount;
> 	ext2_refcount_t refcount_extra;
> 
> +	/*
> +	 * Quota blocks to be charged for each ea block.
> +	 */
> +	ext2_refcount_t ea_block_quota;
> +
> 	/*
> 	 * Array of flags indicating whether an inode bitmap, block
> 	 * bitmap, or inode table is invalid
> diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
> index 9ee2c5e89a61..cc88a7d05cef 100644
> --- a/e2fsck/pass1.c
> +++ b/e2fsck/pass1.c
> @@ -66,7 +66,7 @@ static int process_bad_block(ext2_filsys fs, blk64_t *block_nr,
> 			     e2_blkcnt_t blockcnt, blk64_t ref_blk,
> 			     int ref_offset, void *priv_data);
> static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
> -			 char *block_buf);
> +			 char *block_buf, blk64_t ea_ibody_quota_blocks);
> static void mark_table_blocks(e2fsck_t ctx);
> static void alloc_bb_map(e2fsck_t ctx);
> static void alloc_imagic_map(e2fsck_t ctx);
> @@ -103,6 +103,7 @@ struct process_block_struct {
> 
> struct process_inode_block {
> 	ext2_ino_t ino;
> +	blk64_t ea_ibody_quota_blocks;
> 	struct ext2_inode_large inode;
> };
> 
> @@ -351,13 +352,25 @@ static void mark_inode_ea_map(e2fsck_t ctx, struct problem_context *pctx,
> 	ext2fs_mark_inode_bitmap2(ctx->inode_ea_map, ino);
> }
> 
> +/*
> + * For a given size, calculate how many blocks would be charged towards quota.
> + */
> +static blk64_t size_to_quota_blocks(ext2_filsys fs, size_t size)
> +{
> +	blk64_t clusters;
> +
> +	clusters = DIV_ROUND_UP(size, fs->blocksize << fs->cluster_ratio_bits);
> +	return EXT2FS_C2B(fs, clusters);
> +}
> +
> /*
>  * Check validity of EA inode. Return 0 if EA inode is valid, otherwise return
>  * the problem code.
>  */
> static problem_t check_large_ea_inode(e2fsck_t ctx,
> 				      struct ext2_ext_attr_entry *entry,
> -				      struct problem_context *pctx)
> +				      struct problem_context *pctx,
> +				      blk64_t *quota_blocks)
> {
> 	struct ext2_inode inode;
> 	__u32 hash;
> @@ -380,11 +393,15 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
> 		fatal_error(ctx, 0);
> 	}
> 
> -	if (hash != entry->e_hash) {
> +	if (hash == entry->e_hash) {
> +		*quota_blocks = size_to_quota_blocks(ctx->fs,
> +						     entry->e_value_size);
> +	} else {
> 		/* This might be an old Lustre-style ea_inode reference. */
> -		if (inode.i_mtime != pctx->ino ||
> -		    inode.i_generation != pctx->inode->i_generation) {
> -
> +		if (inode.i_mtime == pctx->ino &&
> +		    inode.i_generation == pctx->inode->i_generation) {
> +			*quota_blocks = 0;
> +		} else {
> 			/* If target inode is also missing EA_INODE flag,
> 			 * this is likely to be a bad reference.
> 			 */
> @@ -411,7 +428,8 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
> 	return 0;
> }
> 
> -static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
> +static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx,
> +			      blk64_t *ea_ibody_quota_blocks)
> {
> 	struct ext2_super_block *sb = ctx->fs->super;
> 	struct ext2_inode_large *inode;
> @@ -420,6 +438,9 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
> 	unsigned int storage_size, remain;
> 	problem_t problem = 0;
> 	region_t region = 0;
> +	blk64_t quota_blocks = 0;
> +
> +	*ea_ibody_quota_blocks = 0;
> 
> 	inode = (struct ext2_inode_large *) pctx->inode;
> 	storage_size = EXT2_INODE_SIZE(ctx->fs->super) - EXT2_GOOD_OLD_INODE_SIZE -
> @@ -496,11 +517,15 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
> 				goto fix;
> 			}
> 		} else {
> -			problem = check_large_ea_inode(ctx, entry, pctx);
> +			blk64_t entry_quota_blocks;
> +
> +			problem = check_large_ea_inode(ctx, entry, pctx,
> +						       &entry_quota_blocks);
> 			if (problem != 0)
> 				goto fix;
> 
> 			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
> +			quota_blocks += entry_quota_blocks;
> 		}
> 
> 		/* If EA value is stored in external inode then it does not
> @@ -523,8 +548,10 @@ fix:
> 	 * it seems like a corruption. it's very unlikely we could repair
> 	 * EA(s) in automatic fashion -bzzz
> 	 */
> -	if (problem == 0 || !fix_problem(ctx, problem, pctx))
> +	if (problem == 0 || !fix_problem(ctx, problem, pctx)) {
> +		*ea_ibody_quota_blocks = quota_blocks;
> 		return;
> +	}
> 
> 	/* simply remove all possible EA(s) */
> 	*((__u32 *)header) = 0UL;
> @@ -547,13 +574,16 @@ static int check_inode_extra_negative_epoch(__u32 xtime, __u32 extra) {
>  */
> #define EXT4_EXTRA_NEGATIVE_DATE_CUTOFF 2 * (1LL << 32)
> 
> -static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
> +static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx,
> +				    blk64_t *ea_ibody_quota_blocks)
> {
> 	struct ext2_super_block *sb = ctx->fs->super;
> 	struct ext2_inode_large *inode;
> 	__u32 *eamagic;
> 	int min, max;
> 
> +	*ea_ibody_quota_blocks = 0;
> +
> 	inode = (struct ext2_inode_large *) pctx->inode;
> 	if (EXT2_INODE_SIZE(sb) == EXT2_GOOD_OLD_INODE_SIZE) {
> 		/* this isn't large inode. so, nothing to check */
> @@ -592,7 +622,7 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
> 			inode->i_extra_isize);
> 	if (*eamagic == EXT2_EXT_ATTR_MAGIC) {
> 		/* it seems inode has an extended attribute(s) in body */
> -		check_ea_in_inode(ctx, pctx);
> +		check_ea_in_inode(ctx, pctx, ea_ibody_quota_blocks);
> 	}
> 
> 	/*
> @@ -1150,6 +1180,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> 	int		failed_csum = 0;
> 	ext2_ino_t	ino_threshold = 0;
> 	dgrp_t		ra_group = 0;
> +	blk64_t		ea_ibody_quota_blocks;
> 
> 	init_resource_track(&rtrack, ctx->fs->io);
> 	clear_problem_context(&pctx);
> @@ -1695,7 +1726,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> 							   "pass1");
> 					failed_csum = 0;
> 				}
> -				check_blocks(ctx, &pctx, block_buf);
> +				check_blocks(ctx, &pctx, block_buf, 0);
> 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
> 				continue;
> 			}
> @@ -1722,7 +1753,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> 							"pass1");
> 					failed_csum = 0;
> 				}
> -				check_blocks(ctx, &pctx, block_buf);
> +				check_blocks(ctx, &pctx, block_buf, 0);
> 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
> 				continue;
> 			}
> @@ -1760,7 +1791,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> 					failed_csum = 0;
> 				}
> 			}
> -			check_blocks(ctx, &pctx, block_buf);
> +			check_blocks(ctx, &pctx, block_buf, 0);
> 			FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
> 			continue;
> 		}
> @@ -1825,7 +1856,7 @@ void e2fsck_pass1(e2fsck_t ctx)
> 			}
> 		}
> 
> -		check_inode_extra_space(ctx, &pctx);
> +		check_inode_extra_space(ctx, &pctx, &ea_ibody_quota_blocks);
> 		check_is_really_dir(ctx, &pctx, block_buf);
> 
> 		/*
> @@ -1872,7 +1903,8 @@ void e2fsck_pass1(e2fsck_t ctx)
> 				continue;
> 			} else if (ext2fs_inode_data_blocks(fs, inode) == 0) {
> 				ctx->fs_fast_symlinks_count++;
> -				check_blocks(ctx, &pctx, block_buf);
> +				check_blocks(ctx, &pctx, block_buf,
> +					     ea_ibody_quota_blocks);
> 				FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
> 				continue;
> 			}
> @@ -1906,17 +1938,19 @@ void e2fsck_pass1(e2fsck_t ctx)
> 		     inode->i_block[EXT2_DIND_BLOCK] ||
> 		     inode->i_block[EXT2_TIND_BLOCK] ||
> 		     ext2fs_file_acl_block(fs, inode))) {
> -			struct ext2_inode_large *ip;
> +			struct process_inode_block *itp;
> 
> -			inodes_to_process[process_inode_count].ino = ino;
> -			ip = &inodes_to_process[process_inode_count].inode;
> +			itp = &inodes_to_process[process_inode_count];
> +			itp->ino = ino;
> +			itp->ea_ibody_quota_blocks = ea_ibody_quota_blocks;
> 			if (inode_size < sizeof(struct ext2_inode_large))
> -				memcpy(ip, inode, inode_size);
> +				memcpy(&itp->inode, inode, inode_size);
> 			else
> -				memcpy(ip, inode, sizeof(*ip));
> +				memcpy(&itp->inode, inode, sizeof(itp->inode));
> 			process_inode_count++;
> 		} else
> -			check_blocks(ctx, &pctx, block_buf);
> +			check_blocks(ctx, &pctx, block_buf,
> +				     ea_ibody_quota_blocks);
> 
> 		FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
> 
> @@ -2086,7 +2120,8 @@ static void process_inodes(e2fsck_t ctx, char *block_buf)
> 		sprintf(buf, _("reading indirect blocks of inode %u"),
> 			pctx.ino);
> 		ehandler_operation(buf);
> -		check_blocks(ctx, &pctx, block_buf);
> +		check_blocks(ctx, &pctx, block_buf,
> +			     inodes_to_process[i].ea_ibody_quota_blocks);
> 		if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
> 			break;
> 	}
> @@ -2306,7 +2341,7 @@ static void adjust_extattr_refcount(e2fsck_t ctx, ext2_refcount_t refcount,
>  * Handle processing the extended attribute blocks
>  */
> static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> -			   char *block_buf)
> +			   char *block_buf, blk64_t *ea_block_quota_blocks)
> {
> 	ext2_filsys fs = ctx->fs;
> 	ext2_ino_t	ino = pctx->ino;
> @@ -2315,7 +2350,7 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> 	char *		end;
> 	struct ext2_ext_attr_header *header;
> 	struct ext2_ext_attr_entry *entry;
> -	int		count;
> +	blk64_t		quota_blocks = EXT2FS_C2B(fs, 1);
> 	region_t	region = 0;
> 	int		failed_csum = 0;
> 
> @@ -2369,6 +2404,12 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> 
> 	/* Have we seen this EA block before? */
> 	if (ext2fs_fast_test_block_bitmap2(ctx->block_ea_map, blk)) {
> +		if (ctx->ea_block_quota)
> +			ea_refcount_fetch(ctx->ea_block_quota, blk,
> +					  ea_block_quota_blocks);
> +		else
> +			*ea_block_quota_blocks = 0;
> +
> 		if (ea_refcount_decrement(ctx->refcount, blk, 0) == 0)
> 			return 1;
> 		/* Ooops, this EA was referenced more than it stated */
> @@ -2477,13 +2518,17 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> 			}
> 		} else {
> 			problem_t problem;
> +			blk64_t entry_quota_blocks;
> 
> -			problem = check_large_ea_inode(ctx, entry, pctx);
> +			problem = check_large_ea_inode(ctx, entry, pctx,
> +						       &entry_quota_blocks);
> 			if (problem == 0)
> 				mark_inode_ea_map(ctx, pctx,
> 						  entry->e_value_inum);
> 			else if (fix_problem(ctx, problem, pctx))
> 				goto clear_extattr;
> +
> +			quota_blocks += entry_quota_blocks;
> 		}
> 
> 		entry = EXT2_EXT_ATTR_NEXT(entry);
> @@ -2506,9 +2551,21 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
> 			return 0;
> 	}
> 
> -	count = header->h_refcount - 1;
> -	if (count)
> -		ea_refcount_store(ctx->refcount, blk, count);
> +	*ea_block_quota_blocks = quota_blocks;
> +	if (quota_blocks) {
> +		if (!ctx->ea_block_quota) {
> +			pctx->errcode = ea_refcount_create(0,
> +							&ctx->ea_block_quota);
> +			if (pctx->errcode) {
> +				pctx->num = 3;
> +				fix_problem(ctx, PR_1_ALLOCATE_REFCOUNT, pctx);
> +				ctx->flags |= E2F_FLAG_ABORT;
> +				return 0;
> +			}
> +		}
> +		ea_refcount_store(ctx->ea_block_quota, blk, quota_blocks);
> +	}
> +	ea_refcount_store(ctx->refcount, blk, header->h_refcount - 1);
> 	mark_block_used(ctx, blk);
> 	ext2fs_fast_mark_block_bitmap2(ctx->block_ea_map, blk);
> 	return 1;
> @@ -3162,7 +3219,7 @@ err:
>  * blocks used by that inode.
>  */
> static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
> -			 char *block_buf)
> +			 char *block_buf, blk64_t ea_ibody_quota_blocks)
> {
> 	ext2_filsys fs = ctx->fs;
> 	struct process_block_struct pb;
> @@ -3173,9 +3230,10 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
> 	int		extent_fs;
> 	int		inlinedata_fs;
> 	__u64		size;
> +	blk64_t		ea_block_quota_blocks = 0;
> 
> 	pb.ino = ino;
> -	pb.num_blocks = 0;
> +	pb.num_blocks = EXT2FS_B2C(ctx->fs, ea_ibody_quota_blocks);
> 	pb.last_block = ~0;
> 	pb.last_init_lblock = -1;
> 	pb.last_db_block = -1;
> @@ -3198,10 +3256,10 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
> 	extent_fs = ext2fs_has_feature_extents(ctx->fs->super);
> 	inlinedata_fs = ext2fs_has_feature_inline_data(ctx->fs->super);
> 
> -	if (check_ext_attr(ctx, pctx, block_buf)) {
> +	if (check_ext_attr(ctx, pctx, block_buf, &ea_block_quota_blocks)) {
> 		if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
> 			goto out;
> -		pb.num_blocks++;
> +		pb.num_blocks += EXT2FS_B2C(ctx->fs, ea_block_quota_blocks);
> 	}
> 
> 	if (inlinedata_fs && (inode->i_flags & EXT4_INLINE_DATA_FL))
> --
> 2.13.1.611.g7e3b11ae1-goog
> 


Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 07/12] e2fsck: track ea_inode references
  2017-06-26 13:43 ` [PATCH 07/12] e2fsck: track ea_inode references Tahsin Erdogan
@ 2017-06-26 21:45   ` Andreas Dilger
  2017-06-27  1:25     ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2017-06-26 21:45 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Darrick J . Wong, Theodore Ts'o, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 1261 bytes --]

On Jun 26, 2017, at 7:43 AM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
> An extended attribute inode has a ref count to track how many entries
> point to it. Update e2fsck to verify that the stored ref count is
> correct.
> 
> Signed-off-by: Tahsin Erdogan <tahsin@google.com>
> ---
> diff --git a/e2fsck/message.c b/e2fsck/message.c
> index 34201a37fd4b..525f4a1e7628 100644
> --- a/e2fsck/message.c
> +++ b/e2fsck/message.c
> @@ -466,6 +466,13 @@ static _INLINE_ void expand_percent_expression(FILE *f, ext2_filsys fs,
> 		fprintf(f, "%*u", width, ctx->num);
> #else
> 		fprintf(f, "%*llu", width, (long long)ctx->num);
> +#endif
> +		break;
> +	case 'n':
> +#ifdef EXT2_NO_64_TYPE
> +		fprintf(f, "%*u", width, ctx->num2);
> +#else
> +		fprintf(f, "%*llu", width, (long long)ctx->num2);
> #endif

Rather than a series of "#ifdef EXT2_NO_64_TYPE" checks, it would be cleaner
to have a single #define to set the printf type, like:

#ifdef EXT2_NO_64_TYPE
#define EXT2_64U "%*u"
#define EXT2_64D "%*d"
#define EXT2_64X "%*x"
#else
#define EXT2_64U "%*llu"
#define EXT2_64D "%*lld"
#define EXT2_64X "%*llx"
#endif

and then use "EXT2_64D" or "EXT2_64U" in all of these fprintf() statements.

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 04/12] e2fsck: do not early terminate extra space check
  2017-06-26 21:23   ` Andreas Dilger
@ 2017-06-26 23:57     ` Tahsin Erdogan
  0 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-26 23:57 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

>> @@ -582,7 +582,6 @@ static void check_inode_extra_space(e2fsck_t ctx, struct problem_context *pctx)
>>                       inode->i_extra_isize = (inode->i_extra_isize + 3) & ~3;
>>               e2fsck_write_inode_full(ctx, pctx->ino, pctx->inode,
>>                                       EXT2_INODE_SIZE(sb), "pass1");
>> -             return;
>>       }
>>
>>       /* check if there is no place for an EA header */
>
> The problem is that if i_extra_isize is changed, then the EA magic and data
> will no longer be aligned properly, so there isn't anything to check?
>
In the rest of the function, inline extended attributes and
i_*time_extra fields are checked. If i_extra_size moves from its
original location, magic field won't be found and inline extended
attribute check will be skipped. But if it happens to be corrected to
its original location, then we should check the ea contents. Also, we
should check the time extra fields.

I don't really see a good reason to return early. Please let me know
if I missing something.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 07/12] e2fsck: track ea_inode references
  2017-06-26 21:45   ` Andreas Dilger
@ 2017-06-27  1:25     ` Tahsin Erdogan
  2017-06-27 18:43       ` Andreas Dilger
  0 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-27  1:25 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

>> +#ifdef EXT2_NO_64_TYPE
>> +             fprintf(f, "%*u", width, ctx->num2);
>> +#else
>> +             fprintf(f, "%*llu", width, (long long)ctx->num2);
>> #endif
>
> Rather than a series of "#ifdef EXT2_NO_64_TYPE" checks, it would be cleaner
> to have a single #define to set the printf type, like:
>
> #ifdef EXT2_NO_64_TYPE
> #define EXT2_64U "%*u"
> #define EXT2_64D "%*d"
> #define EXT2_64X "%*x"
> #else
> #define EXT2_64U "%*llu"
> #define EXT2_64D "%*lld"
> #define EXT2_64X "%*llx"
> #endif

I am trying to figure out the purpose of #ifdef EXT2_NO_64_TYPE checks
now. Who defines it and what problem does it solve?

thanks

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature
  2017-06-26 21:36   ` Andreas Dilger
@ 2017-06-27 11:37     ` Tahsin Erdogan
  2017-06-30  1:39       ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-27 11:37 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

On Mon, Jun 26, 2017 at 2:36 PM, Andreas Dilger <adilger@dilger.ca> wrote:
> On Jun 26, 2017, at 7:43 AM, Tahsin Erdogan <tahsin@google.com> wrote:
>>
>> With ea_inode feature, i_blocks include the disk space used by
>> referenced xattr inodes. Make e2fsck aware of that.
>
> This makes it all the more important that the "fast symlink" checking be
> divorced from i_blocks, and instead depend on i_size.  Otherwise, we will
> have yet another bunch of problems as large xattrs on a fast symlink will
> incorrectly result in problems with the symlink.
>
Totally agree. The changes to i_blocks accounting for ea_inode feature
likely broke ext4_inode_is_fast_symlink() in kernel. I will try to fix
that first.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 07/12] e2fsck: track ea_inode references
  2017-06-27  1:25     ` Tahsin Erdogan
@ 2017-06-27 18:43       ` Andreas Dilger
  2017-06-28  1:26         ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Andreas Dilger @ 2017-06-27 18:43 UTC (permalink / raw)
  To: Tahsin Erdogan; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

[-- Attachment #1: Type: text/plain, Size: 1022 bytes --]


> On Jun 26, 2017, at 7:25 PM, Tahsin Erdogan <tahsin@google.com> wrote:
> 
>>> +#ifdef EXT2_NO_64_TYPE
>>> +             fprintf(f, "%*u", width, ctx->num2);
>>> +#else
>>> +             fprintf(f, "%*llu", width, (long long)ctx->num2);
>>> #endif
>> 
>> Rather than a series of "#ifdef EXT2_NO_64_TYPE" checks, it would be cleaner
>> to have a single #define to set the printf type, like:
>> 
>> #ifdef EXT2_NO_64_TYPE
>> #define EXT2_64U "%*u"
>> #define EXT2_64D "%*d"
>> #define EXT2_64X "%*x"
>> #else
>> #define EXT2_64U "%*llu"
>> #define EXT2_64D "%*lld"
>> #define EXT2_64X "%*llx"
>> #endif
> 
> I am trying to figure out the purpose of #ifdef EXT2_NO_64_TYPE checks
> now. Who defines it and what problem does it solve?

Sorry, I can't answer that.  Git history shows that it has been around since
2000 but is not set by the configure script.  I just thought it made sense to
clean up the code, but it might be even better to get rid of it completely.

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 195 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 07/12] e2fsck: track ea_inode references
  2017-06-27 18:43       ` Andreas Dilger
@ 2017-06-28  1:26         ` Tahsin Erdogan
  0 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-28  1:26 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

> Sorry, I can't answer that.  Git history shows that it has been around since
> 2000 but is not set by the configure script.  I just thought it made sense to
> clean up the code, but it might be even better to get rid of it completely.

Sounds good. Sent it out as a separate patch ("[PATCH] e2fsck: remove
#ifdef EXT2_NO_64_TYPE blocks").

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH v2 03/12] e2fsck: ea_inode hash validation
  2017-06-26 21:21   ` Andreas Dilger
@ 2017-06-28  1:41     ` Tahsin Erdogan
  2017-06-28  1:47       ` Tahsin Erdogan
  0 siblings, 1 reply; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-28  1:41 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o, linux-ext4
  Cc: Tahsin Erdogan

In original implementation of ea_inode feature, each xattr inode had
a single parent. Child inode tracked the parent by storing its inode
number in i_mtime field. Also, child's i_generation matched parent's
i_generation.

With deduplication support, xattr inodes can now be shared so a single
backpointer is not sufficient to achieve strong binding. This is now
replaced by hash validation.

Signed-off-by: Tahsin Erdogan <tahsin@google.com>
---
v2:
 - restored original problem number definitions
 - pass parent inode to read_xattrs_from_buffer() instead of
   reading it again
 
 e2fsck/pass1.c        | 91 +++++++++++++++++++++++++++++++++------------------
 e2fsck/problem.c      |  5 ---
 e2fsck/problem.h      |  3 --
 lib/ext2fs/ext2fs.h   |  3 ++
 lib/ext2fs/ext_attr.c | 68 +++++++++++++++++++++++++++++++++++---
 5 files changed, 125 insertions(+), 45 deletions(-)

diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index 1c68af8990b4..32152f3ec926 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -360,27 +360,54 @@ static problem_t check_large_ea_inode(e2fsck_t ctx,
 				      struct problem_context *pctx)
 {
 	struct ext2_inode inode;
+	__u32 hash;
+	errcode_t retval;
 
 	/* Check if inode is within valid range */
 	if ((entry->e_value_inum < EXT2_FIRST_INODE(ctx->fs->super)) ||
-	    (entry->e_value_inum > ctx->fs->super->s_inodes_count))
+	    (entry->e_value_inum > ctx->fs->super->s_inodes_count)) {
+		pctx->num = entry->e_value_inum;
 		return PR_1_ATTR_VALUE_EA_INODE;
+	}
 
 	e2fsck_read_inode(ctx, entry->e_value_inum, &inode, "pass1");
+
+	retval = ext2fs_ext_attr_hash_entry2(ctx->fs, entry, NULL, &hash);
+	if (retval) {
+		com_err("check_large_ea_inode", retval,
+			_("while hashing entry with e_value_inum = %u"),
+			entry->e_value_inum);
+		fatal_error(ctx, 0);
+	}
+
+	if (hash != entry->e_hash) {
+		/* This might be an old Lustre-style ea_inode reference. */
+		if (inode.i_mtime != pctx->ino ||
+		    inode.i_generation != pctx->inode->i_generation) {
+
+			/* If target inode is also missing EA_INODE flag,
+			 * this is likely to be a bad reference.
+			 */
+			if (!(inode.i_flags & EXT4_EA_INODE_FL)) {
+				pctx->num = entry->e_value_inum;
+				return PR_1_ATTR_VALUE_EA_INODE;
+			} else {
+				pctx->num = entry->e_hash;
+				return PR_1_ATTR_HASH;
+			}
+		}
+	}
+
 	if (!(inode.i_flags & EXT4_EA_INODE_FL)) {
-		/* If EXT4_EA_INODE_FL flag is not present but back-pointer
-		 * matches then we should set this flag */
-		if (inode.i_mtime == pctx->ino &&
-		    inode.i_generation == pctx->inode->i_generation &&
-		    fix_problem(ctx, PR_1_ATTR_SET_EA_INODE_FL, pctx)) {
+		pctx->num = entry->e_value_inum;
+		if (fix_problem(ctx, PR_1_ATTR_SET_EA_INODE_FL, pctx)) {
 			inode.i_flags |= EXT4_EA_INODE_FL;
-			ext2fs_write_inode(ctx->fs, entry->e_value_inum,&inode);
-		} else
+			ext2fs_write_inode(ctx->fs, entry->e_value_inum,
+					   &inode);
+		} else {
 			return PR_1_ATTR_NO_EA_INODE_FL;
-	} else if (inode.i_mtime != pctx->ino ||
-		   inode.i_generation != pctx->inode->i_generation)
-		return PR_1_ATTR_INVAL_EA_INODE;
-
+		}
+	}
 	return 0;
 }
 
@@ -458,6 +485,16 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 				problem = PR_1_INODE_EA_ALLOC_COLLISION;
 				goto fix;
 			}
+
+			hash = ext2fs_ext_attr_hash_entry(entry,
+							  start + entry->e_value_offs);
+
+			/* e_hash may be 0 in older inode's ea */
+			if (entry->e_hash != 0 && entry->e_hash != hash) {
+				pctx->num = entry->e_hash;
+				problem = PR_1_ATTR_HASH;
+				goto fix;
+			}
 		} else {
 			problem = check_large_ea_inode(ctx, entry, pctx);
 			if (problem != 0)
@@ -466,16 +503,6 @@ static void check_ea_in_inode(e2fsck_t ctx, struct problem_context *pctx)
 			mark_inode_ea_map(ctx, pctx, entry->e_value_inum);
 		}
 
-		hash = ext2fs_ext_attr_hash_entry(entry,
-						  start + entry->e_value_offs);
-
-		/* e_hash may be 0 in older inode's ea */
-		if (entry->e_hash != 0 && entry->e_hash != hash) {
-			pctx->num = entry->e_hash;
-			problem = PR_1_ATTR_HASH;
-			goto fix;
-		}
-
 		/* If EA value is stored in external inode then it does not
 		 * consume space here */
 		if (entry->e_value_inum == 0)
@@ -2439,6 +2466,16 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 						pctx))
 					goto clear_extattr;
 			}
+
+			hash = ext2fs_ext_attr_hash_entry(entry, block_buf +
+							  entry->e_value_offs);
+
+			if (entry->e_hash != hash) {
+				pctx->num = entry->e_hash;
+				if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
+					goto clear_extattr;
+				entry->e_hash = hash;
+			}
 		} else {
 			problem_t problem;
 
@@ -2450,16 +2487,6 @@ static int check_ext_attr(e2fsck_t ctx, struct problem_context *pctx,
 				goto clear_extattr;
 		}
 
-		hash = ext2fs_ext_attr_hash_entry(entry, block_buf +
-							 entry->e_value_offs);
-
-		if (entry->e_hash != hash) {
-			pctx->num = entry->e_hash;
-			if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
-				goto clear_extattr;
-			entry->e_hash = hash;
-		}
-
 		entry = EXT2_EXT_ATTR_NEXT(entry);
 	}
 	if (region_allocate(region, (char *)entry - (char *)header, 4)) {
diff --git a/e2fsck/problem.c b/e2fsck/problem.c
index dcdf9abe1abd..deffa4d66a89 100644
--- a/e2fsck/problem.c
+++ b/e2fsck/problem.c
@@ -1150,11 +1150,6 @@ static struct e2fsck_problem problem_table[] = {
 	  N_("@i %i has @I @a value @i %N.\n"),
 	  PROMPT_CLEAR, PR_PREEN_OK },
 
-	/* Invalid backpointer from extended attribute inode to parent inode */
-	{ PR_1_ATTR_INVAL_EA_INODE,
-	  N_("@n backpointer from @a @i %N to parent @i %i.\n"),
-	  PROMPT_CLEAR, PR_PREEN_OK },
-
 	/* Inode has invalid extended attribute. EA inode missing
 	 * EA_INODE flag. */
 	{ PR_1_ATTR_NO_EA_INODE_FL,
diff --git a/e2fsck/problem.h b/e2fsck/problem.h
index dad608c94fd0..60cc764fd2df 100644
--- a/e2fsck/problem.h
+++ b/e2fsck/problem.h
@@ -678,9 +678,6 @@ struct problem_context {
 /* Inode has illegal EA value inode */
 #define PR_1_ATTR_VALUE_EA_INODE		0x010083
 
-/* Invalid backpointer from EA inode to parent inode */
-#define PR_1_ATTR_INVAL_EA_INODE		0x010084
-
 /* Parent inode has invalid EA entry. EA inode does not have
  * EXT4_EA_INODE_FL flag. Delete EA entry? */
 #define PR_1_ATTR_NO_EA_INODE_FL		0x010085
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index f4131a640f9c..f72cbe17a8cd 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1198,6 +1198,9 @@ extern errcode_t ext2fs_expand_dir(ext2_filsys fs, ext2_ino_t dir);
 /* ext_attr.c */
 extern __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry,
 					void *data);
+extern errcode_t ext2fs_ext_attr_hash_entry2(ext2_filsys fs,
+					     struct ext2_ext_attr_entry *entry,
+					     void *data, __u32 *hash);
 extern errcode_t ext2fs_read_ext_attr(ext2_filsys fs, blk_t block, void *buf);
 extern errcode_t ext2fs_read_ext_attr2(ext2_filsys fs, blk64_t block,
 				       void *buf);
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index 14d05c7e57fb..d2ce51bcdcd8 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -25,6 +25,18 @@
 
 #include "ext2fs.h"
 
+static errcode_t read_ea_inode_hash(ext2_filsys fs, ext2_ino_t ino, __u32 *hash)
+{
+	struct ext2_inode inode;
+	errcode_t retval;
+
+	retval = ext2fs_read_inode(fs, ino, &inode);
+	if (retval)
+		return retval;
+	*hash = inode.i_atime;
+	return 0;
+}
+
 #define NAME_HASH_SHIFT 5
 #define VALUE_HASH_SHIFT 16
 
@@ -59,6 +71,35 @@ __u32 ext2fs_ext_attr_hash_entry(struct ext2_ext_attr_entry *entry, void *data)
 	return hash;
 }
 
+/*
+ * ext2fs_ext_attr_hash_entry2()
+ *
+ * Compute the hash of an extended attribute.
+ * This version of the function supports hashing entries that reference
+ * external inodes (ea_inode feature).
+ */
+errcode_t ext2fs_ext_attr_hash_entry2(ext2_filsys fs,
+				      struct ext2_ext_attr_entry *entry,
+				      void *data, __u32 *hash)
+{
+	*hash = ext2fs_ext_attr_hash_entry(entry, data);
+
+	if (entry->e_value_inum) {
+		__u32 ea_inode_hash;
+		errcode_t retval;
+
+		retval = read_ea_inode_hash(fs, entry->e_value_inum,
+					    &ea_inode_hash);
+		if (retval)
+			return retval;
+
+		*hash = (*hash << VALUE_HASH_SHIFT) ^
+			(*hash >> (8*sizeof(*hash) - VALUE_HASH_SHIFT)) ^
+			ea_inode_hash;
+	}
+	return 0;
+}
+
 static errcode_t check_ext_attr_header(struct ext2_ext_attr_header *header)
 {
 	if ((header->h_magic != EXT2_EXT_ATTR_MAGIC_v1 &&
@@ -785,6 +826,7 @@ out:
 }
 
 static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
+					 struct ext2_inode_large *inode,
 					 struct ext2_ext_attr_entry *entries,
 					 unsigned int storage_size,
 					 char *value_start,
@@ -914,9 +956,25 @@ static errcode_t read_xattrs_from_buffer(struct ext2_xattr_handle *handle,
 			void *data = (entry->e_value_inum != 0) ?
 					0 : value_start + entry->e_value_offs;
 
-			hash = ext2fs_ext_attr_hash_entry(entry, data);
-			if (entry->e_hash != hash)
-				return EXT2_ET_BAD_EA_HASH;
+			err = ext2fs_ext_attr_hash_entry2(handle->fs, entry,
+							  data, &hash);
+			if (err)
+				return err;
+			if (entry->e_hash != hash) {
+				struct ext2_inode child;
+
+				/* Check whether this is an old Lustre-style
+				 * ea_inode reference.
+				 */
+				err = ext2fs_read_inode(handle->fs,
+							entry->e_value_inum,
+							&child);
+				if (err)
+					return err;
+				if (child.i_mtime != handle->ino ||
+				    child.i_generation != inode->i_generation)
+					return EXT2_ET_BAD_EA_HASH;
+			}
 		}
 
 		x++;
@@ -989,7 +1047,7 @@ errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
 		start = ((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE +
 			inode->i_extra_isize + sizeof(__u32);
 
-		err = read_xattrs_from_buffer(handle,
+		err = read_xattrs_from_buffer(handle, inode,
 			(struct ext2_ext_attr_entry *) start, storage_size,
 					      start, &handle->count);
 		if (err)
@@ -1026,7 +1084,7 @@ read_ea_block:
 		storage_size = handle->fs->blocksize -
 			sizeof(struct ext2_ext_attr_header);
 		start = block_buf + sizeof(struct ext2_ext_attr_header);
-		err = read_xattrs_from_buffer(handle,
+		err = read_xattrs_from_buffer(handle, inode,
 			(struct ext2_ext_attr_entry *) start, storage_size,
 					      block_buf, &handle->count);
 		if (err)
-- 
2.13.2.725.g09c95d1e9-goog

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH v2 03/12] e2fsck: ea_inode hash validation
  2017-06-28  1:41     ` [PATCH v2 " Tahsin Erdogan
@ 2017-06-28  1:47       ` Tahsin Erdogan
  0 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-28  1:47 UTC (permalink / raw)
  To: Andreas Dilger, Darrick J . Wong, Theodore Ts'o,
	Ext4 Developers List
  Cc: Tahsin Erdogan

>> +                             if (fix_problem(ctx, PR_1_ATTR_HASH, pctx))
>> +                                     goto clear_extattr;
>
> Shouldn't this be "if (!fix_problem(...))" ?

Unfortunately here "fix" means clearing all entries so it is as
intended. This is the way it was before, I haven't changed the logic.


>> -#define PR_1_ATTR_SET_EA_INODE_FL            0x010086
>> +#define PR_1_ATTR_SET_EA_INODE_FL            0x010085
>
> It would also be OK to keep the old values, and just skip 0x10084.

Done.

>> +                             err = ext2fs_read_inode(handle->fs, handle->ino,
>> +                                                     &parent);
>
> Rather than reading the inode again here, it would make more sense to pass the
> parent inode from the caller.

Done.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature
  2017-06-27 11:37     ` Tahsin Erdogan
@ 2017-06-30  1:39       ` Tahsin Erdogan
  0 siblings, 0 replies; 25+ messages in thread
From: Tahsin Erdogan @ 2017-06-30  1:39 UTC (permalink / raw)
  To: Andreas Dilger; +Cc: Darrick J . Wong, Theodore Ts'o, Ext4 Developers List

>> This makes it all the more important that the "fast symlink" checking be
>> divorced from i_blocks, and instead depend on i_size.  Otherwise, we will
>> have yet another bunch of problems as large xattrs on a fast symlink will
>> incorrectly result in problems with the symlink.
>>
> Totally agree. The changes to i_blocks accounting for ea_inode feature
> likely broke ext4_inode_is_fast_symlink() in kernel. I will try to fix
> that first.

FYI, I sent out a patch to fix e2fsprogs part too: ("[PATCH]
e2fsprogs: add ext2fs_is_fast_symlink() function")

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 01/12] e2fsck: add support for large xattrs in external inodes
  2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
                   ` (10 preceding siblings ...)
  2017-06-26 13:43 ` [PATCH 12/12] resize2fs: moving xattr inodes is not supported Tahsin Erdogan
@ 2017-07-05  4:04 ` Theodore Ts'o
  11 siblings, 0 replies; 25+ messages in thread
From: Theodore Ts'o @ 2017-07-05  4:04 UTC (permalink / raw)
  To: Tahsin Erdogan
  Cc: Andreas Dilger, Darrick J . Wong, linux-ext4, Andreas Dilger,
	Kalpak Shah

Thanks, I've applied this patch series to the e2fsprogs "next" branch.

	     	     	  	       	  - Ted

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2017-07-05  4:04 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-06-26 13:43 [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 02/12] tune2fs: do not allow disabling ea_inode feature Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 03/12] e2fsck: ea_inode hash validation Tahsin Erdogan
2017-06-26 21:21   ` Andreas Dilger
2017-06-28  1:41     ` [PATCH v2 " Tahsin Erdogan
2017-06-28  1:47       ` Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 04/12] e2fsck: do not early terminate extra space check Tahsin Erdogan
2017-06-26 21:23   ` Andreas Dilger
2017-06-26 23:57     ` Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 05/12] e2fsck: generalize ea_refcount Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 06/12] e2fsck: update i_blocks accounting for ea_inode feature Tahsin Erdogan
2017-06-26 21:36   ` Andreas Dilger
2017-06-27 11:37     ` Tahsin Erdogan
2017-06-30  1:39       ` Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 07/12] e2fsck: track ea_inode references Tahsin Erdogan
2017-06-26 21:45   ` Andreas Dilger
2017-06-27  1:25     ` Tahsin Erdogan
2017-06-27 18:43       ` Andreas Dilger
2017-06-28  1:26         ` Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 08/12] e2fsck: add test for ea_inode feature Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 09/12] tune2fs: update ea_inode hashes when fs uuid changes Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 10/12] fuse2fs: refuse to mount fs with ea_inode feature Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 11/12] mke2fs: ea_inode is not supported for hurd Tahsin Erdogan
2017-06-26 13:43 ` [PATCH 12/12] resize2fs: moving xattr inodes is not supported Tahsin Erdogan
2017-07-05  4:04 ` [PATCH 01/12] e2fsck: add support for large xattrs in external inodes Theodore Ts'o

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.