All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ext4: shrink directory when last block is empty
@ 2019-01-15 22:55 harshadshirwadkar
  2019-01-16 14:27 ` Alexey Lyashkov
  0 siblings, 1 reply; 7+ messages in thread
From: harshadshirwadkar @ 2019-01-15 22:55 UTC (permalink / raw)
  To: linux-ext4; +Cc: Harshad Shirwadkar

From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

This patch is the first step towards shrinking htree based directories
when files are deleted. We truncate directory inode when a directory
entry removal causes last directory block to be empty. This patch just
removes the last block. We may end up in a situation where many
intermediate dirent blocks in directory inode are empty. Removing
those blocks would be handled later.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/namei.c | 146 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 125 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a4c25c4681d..bbcbd59c44ce 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		struct ext4_filename *fname,
-		struct ext4_dir_entry_2 **res_dir);
+		struct ext4_dir_entry_2 **res_dir,
+		struct dx_frame *dx_frame);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			     struct inode *dir, struct inode *inode);
 
@@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
 					   (void *)t - (void *)dirent);
 }
 
+
 int ext4_handle_dirty_dirent_node(handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *bh)
@@ -797,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	dxtrace(printk("Look up %x", hash));
 	while (1) {
 		count = dx_get_count(entries);
-		if (!count || count > dx_get_limit(entries)) {
+		if (count > dx_get_limit(entries)) {
 			ext4_warning_inode(dir,
 					   "dx entry: count %u beyond limit %u",
 					   count, dx_get_limit(entries));
@@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	return ret_err;
 }
 
+static void
+ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
+		     struct dx_frame *dx_frame, __le64 block)
+{
+	struct dx_entry *entries;
+	unsigned int count, limit;
+	int err, i;
+
+	entries = dx_frame->entries;
+	count = dx_get_count(entries);
+	limit = dx_get_limit(entries);
+
+	for (i = 0; i < count; i++)
+		if (entries[i].block == block)
+			break;
+
+	if (i >= count)
+		return;
+
+	err = ext4_journal_get_write_access(handle, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return;
+	}
+
+	for (; i < count - 1; i++)
+		entries[i] = entries[i + 1];
+
+	dx_set_count(entries, count - 1);
+	dx_set_limit(entries, limit);
+
+	err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
+	if (err)
+		ext4_std_error(dir->i_sb, err);
+}
+
 static void dx_release(struct dx_frame *frames)
 {
 	struct dx_root_info *info;
@@ -1309,6 +1347,28 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 	return 0;
 }
 
+static int count_dentries(char *search_buf, int buf_size,
+			  unsigned int blocksize)
+{
+	struct ext4_dir_entry_2 *de;
+	char *dlimit;
+	int de_len;
+	int count = 0;
+
+	de = (struct ext4_dir_entry_2 *)search_buf;
+	dlimit = search_buf + buf_size;
+	while ((char *)de < dlimit) {
+		de_len = ext4_rec_len_from_disk(de->rec_len, blocksize);
+		if (de_len <= 0)
+			return count;
+		if (de->inode != 0)
+			count++;
+		de = (struct ext4_dir_entry_2 *)((char *)de + de_len);
+	}
+
+	return count;
+}
+
 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 			       struct ext4_dir_entry *de)
 {
@@ -1339,7 +1399,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 static struct buffer_head * ext4_find_entry (struct inode *dir,
 					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
-					int *inlined)
+					int *inlined,
+					struct dx_frame *dx_frame)
 {
 	struct super_block *sb;
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
@@ -1388,7 +1449,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 		goto restart;
 	}
 	if (is_dx(dir)) {
-		ret = ext4_dx_find_entry(dir, &fname, res_dir);
+		ret = ext4_dx_find_entry(dir, &fname, res_dir, dx_frame);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
@@ -1486,9 +1547,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	return ret;
 }
 
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
-			struct ext4_filename *fname,
-			struct ext4_dir_entry_2 **res_dir)
+static struct buffer_head *ext4_dx_find_entry(
+			struct inode *dir, struct ext4_filename *fname,
+			struct ext4_dir_entry_2 **res_dir,
+			struct dx_frame *dx_frame)
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
@@ -1502,6 +1564,10 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 	frame = dx_probe(fname, dir, NULL, frames);
 	if (IS_ERR(frame))
 		return (struct buffer_head *) frame;
+	if (dx_frame) {
+		*dx_frame = *frame;
+		get_bh(dx_frame->bh);
+	}
 	do {
 		block = dx_get_block(frame->at);
 		bh = ext4_read_dirblock(dir, block, DIRENT);
@@ -1553,7 +1619,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	inode = NULL;
@@ -1597,7 +1663,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
-	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
+	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	if (!bh)
@@ -2337,9 +2403,13 @@ int ext4_generic_delete_entry(handle_t *handle,
 static int ext4_delete_entry(handle_t *handle,
 			     struct inode *dir,
 			     struct ext4_dir_entry_2 *de_del,
-			     struct buffer_head *bh)
+			     struct buffer_head *bh,
+			     struct dx_frame *dx_frame)
 {
+	struct ext4_map_blocks map;
 	int err, csum_size = 0;
+	int should_truncate = 0;
+
 
 	if (ext4_has_inline_data(dir)) {
 		int has_inline_data = 1;
@@ -2363,11 +2433,37 @@ static int ext4_delete_entry(handle_t *handle,
 	if (err)
 		goto out;
 
+	if (dx_frame && dx_frame->bh &&
+	    count_dentries(bh->b_data, bh->b_size,
+			   dir->i_sb->s_blocksize) == 0) {
+
+		map.m_lblk = (dir->i_size - 1) >>
+			     (dir->i_sb->s_blocksize_bits);
+		map.m_len = 1;
+		err = ext4_map_blocks(handle, dir, &map, 0);
+
+		if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {
+			ext4_dx_delete_entry(handle, dir, dx_frame,
+					     cpu_to_le64(map.m_lblk));
+			should_truncate = 1;
+		}
+	}
+
 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+
 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
 	if (unlikely(err))
 		goto out;
 
+	if (should_truncate) {
+		dir->i_size -= dir->i_sb->s_blocksize;
+		ext4_truncate(dir);
+		if (dir->i_size == dir->i_sb->s_blocksize) {
+			ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
+			ext4_mark_inode_dirty(handle, dir);
+		}
+	}
+
 	return 0;
 out:
 	if (err != -ENOENT)
@@ -2923,7 +3019,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -2950,7 +3046,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 	if (retval)
 		goto end_rmdir;
 	if (!EXT4_DIR_LINK_EMPTY(inode))
@@ -2985,6 +3081,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 	handle_t *handle = NULL;
+	struct dx_frame dx_frame;
+
+	memset(&dx_frame, 0, sizeof(dx_frame));
 
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
 		return -EIO;
@@ -3000,7 +3099,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, &dx_frame);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -3028,9 +3127,10 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 				   dentry->d_name.len, dentry->d_name.name);
 		set_nlink(inode, 1);
 	}
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, &dx_frame);
 	if (retval)
 		goto end_unlink;
+
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	ext4_update_dx_flag(dir);
 	ext4_mark_inode_dirty(handle, dir);
@@ -3042,6 +3142,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 end_unlink:
 	brelse(bh);
+	if (dx_frame.bh)
+		brelse(dx_frame.bh);
 	if (handle)
 		ext4_journal_stop(handle);
 	trace_ext4_unlink_exit(dentry, retval);
@@ -3362,11 +3464,11 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 
-	bh = ext4_find_entry(dir, d_name, &de, NULL);
+	bh = ext4_find_entry(dir, d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (bh) {
-		retval = ext4_delete_entry(handle, dir, de, bh);
+		retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 		brelse(bh);
 	}
 	return retval;
@@ -3390,7 +3492,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
 		retval = ext4_find_delete_entry(handle, ent->dir,
 						&ent->dentry->d_name);
 	} else {
-		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
+		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh,
+					   NULL);
 		if (retval == -ENOENT) {
 			retval = ext4_find_delete_entry(handle, ent->dir,
 							&ent->dentry->d_name);
@@ -3497,7 +3600,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			return retval;
 	}
 
-	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL,
+				 NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3511,7 +3615,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;
@@ -3691,7 +3795,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return retval;
 
 	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
-				 &old.de, &old.inlined);
+				 &old.de, &old.inlined, NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3705,7 +3809,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;
-- 
2.20.1.97.g81188d93c3-goog

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] ext4: shrink directory when last block is empty
  2019-01-15 22:55 [PATCH] ext4: shrink directory when last block is empty harshadshirwadkar
@ 2019-01-16 14:27 ` Alexey Lyashkov
  2019-01-16 20:51   ` harshad shirwadkar
  0 siblings, 1 reply; 7+ messages in thread
From: Alexey Lyashkov @ 2019-01-16 14:27 UTC (permalink / raw)
  To: harshadshirwadkar; +Cc: linux-ext4

Hi Harshad,

I quickly look to the patch, but as i see you have called a count_dentries - it have called on each delete entry.
If you have lots create/delete in loop for one block it’s costly and decrease a delete performance.
as i see it have used just for checking against zero, so can it be replaced with more simple check like is first dir entry cover a whole block ?


> 16 янв. 2019 г., в 1:55, harshadshirwadkar@gmail.com написал(а):
> 
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> This patch is the first step towards shrinking htree based directories
> when files are deleted. We truncate directory inode when a directory
> entry removal causes last directory block to be empty. This patch just
> removes the last block. We may end up in a situation where many
> intermediate dirent blocks in directory inode are empty. Removing
> those blocks would be handled later.
> 
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> ---
> fs/ext4/namei.c | 146 +++++++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 125 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 2a4c25c4681d..bbcbd59c44ce 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
> 				 __u32 *start_hash);
> static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> 		struct ext4_filename *fname,
> -		struct ext4_dir_entry_2 **res_dir);
> +		struct ext4_dir_entry_2 **res_dir,
> +		struct dx_frame *dx_frame);
> static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
> 			     struct inode *dir, struct inode *inode);
> 
> @@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
> 					   (void *)t - (void *)dirent);
> }
> 
> +
> int ext4_handle_dirty_dirent_node(handle_t *handle,
> 				  struct inode *inode,
> 				  struct buffer_head *bh)
> @@ -797,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> 	dxtrace(printk("Look up %x", hash));
> 	while (1) {
> 		count = dx_get_count(entries);
> -		if (!count || count > dx_get_limit(entries)) {
> +		if (count > dx_get_limit(entries)) {
> 			ext4_warning_inode(dir,
> 					   "dx entry: count %u beyond limit %u",
> 					   count, dx_get_limit(entries));
> @@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> 	return ret_err;
> }
> 
> +static void
> +ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
> +		     struct dx_frame *dx_frame, __le64 block)
> +{
> +	struct dx_entry *entries;
> +	unsigned int count, limit;
> +	int err, i;
> +
> +	entries = dx_frame->entries;
> +	count = dx_get_count(entries);
> +	limit = dx_get_limit(entries);
> +
> +	for (i = 0; i < count; i++)
> +		if (entries[i].block == block)
> +			break;
> +
> +	if (i >= count)
> +		return;
> +
> +	err = ext4_journal_get_write_access(handle, dx_frame->bh);
> +	if (err) {
> +		ext4_std_error(dir->i_sb, err);
> +		return;
> +	}
> +
> +	for (; i < count - 1; i++)
> +		entries[i] = entries[i + 1];
> +
> +	dx_set_count(entries, count - 1);
> +	dx_set_limit(entries, limit);
> +
> +	err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
> +	if (err)
> +		ext4_std_error(dir->i_sb, err);
> +}
> +
> static void dx_release(struct dx_frame *frames)
> {
> 	struct dx_root_info *info;
> @@ -1309,6 +1347,28 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
> 	return 0;
> }
> 
> +static int count_dentries(char *search_buf, int buf_size,
> +			  unsigned int blocksize)
> +{
> +	struct ext4_dir_entry_2 *de;
> +	char *dlimit;
> +	int de_len;
> +	int count = 0;
> +
> +	de = (struct ext4_dir_entry_2 *)search_buf;
> +	dlimit = search_buf + buf_size;
> +	while ((char *)de < dlimit) {
> +		de_len = ext4_rec_len_from_disk(de->rec_len, blocksize);
> +		if (de_len <= 0)
> +			return count;
> +		if (de->inode != 0)
> +			count++;
> +		de = (struct ext4_dir_entry_2 *)((char *)de + de_len);
> +	}
> +
> +	return count;
> +}
> +
> static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> 			       struct ext4_dir_entry *de)
> {
> @@ -1339,7 +1399,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> static struct buffer_head * ext4_find_entry (struct inode *dir,
> 					const struct qstr *d_name,
> 					struct ext4_dir_entry_2 **res_dir,
> -					int *inlined)
> +					int *inlined,
> +					struct dx_frame *dx_frame)
> {
> 	struct super_block *sb;
> 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
> @@ -1388,7 +1449,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> 		goto restart;
> 	}
> 	if (is_dx(dir)) {
> -		ret = ext4_dx_find_entry(dir, &fname, res_dir);
> +		ret = ext4_dx_find_entry(dir, &fname, res_dir, dx_frame);
> 		/*
> 		 * On success, or if the error was file not found,
> 		 * return.  Otherwise, fall back to doing a search the
> @@ -1486,9 +1547,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> 	return ret;
> }
> 
> -static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> -			struct ext4_filename *fname,
> -			struct ext4_dir_entry_2 **res_dir)
> +static struct buffer_head *ext4_dx_find_entry(
> +			struct inode *dir, struct ext4_filename *fname,
> +			struct ext4_dir_entry_2 **res_dir,
> +			struct dx_frame *dx_frame)
> {
> 	struct super_block * sb = dir->i_sb;
> 	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
> @@ -1502,6 +1564,10 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> 	frame = dx_probe(fname, dir, NULL, frames);
> 	if (IS_ERR(frame))
> 		return (struct buffer_head *) frame;
> +	if (dx_frame) {
> +		*dx_frame = *frame;
> +		get_bh(dx_frame->bh);
> +	}
> 	do {
> 		block = dx_get_block(frame->at);
> 		bh = ext4_read_dirblock(dir, block, DIRENT);
> @@ -1553,7 +1619,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
> 	if (dentry->d_name.len > EXT4_NAME_LEN)
> 		return ERR_PTR(-ENAMETOOLONG);
> 
> -	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> +	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
> 	if (IS_ERR(bh))
> 		return (struct dentry *) bh;
> 	inode = NULL;
> @@ -1597,7 +1663,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
> 	struct ext4_dir_entry_2 * de;
> 	struct buffer_head *bh;
> 
> -	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
> +	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL, NULL);
> 	if (IS_ERR(bh))
> 		return (struct dentry *) bh;
> 	if (!bh)
> @@ -2337,9 +2403,13 @@ int ext4_generic_delete_entry(handle_t *handle,
> static int ext4_delete_entry(handle_t *handle,
> 			     struct inode *dir,
> 			     struct ext4_dir_entry_2 *de_del,
> -			     struct buffer_head *bh)
> +			     struct buffer_head *bh,
> +			     struct dx_frame *dx_frame)
> {
> +	struct ext4_map_blocks map;
> 	int err, csum_size = 0;
> +	int should_truncate = 0;
> +
> 
> 	if (ext4_has_inline_data(dir)) {
> 		int has_inline_data = 1;
> @@ -2363,11 +2433,37 @@ static int ext4_delete_entry(handle_t *handle,
> 	if (err)
> 		goto out;
> 
> +	if (dx_frame && dx_frame->bh &&
> +	    count_dentries(bh->b_data, bh->b_size,
> +			   dir->i_sb->s_blocksize) == 0) {
> +
> +		map.m_lblk = (dir->i_size - 1) >>
> +			     (dir->i_sb->s_blocksize_bits);
> +		map.m_len = 1;
> +		err = ext4_map_blocks(handle, dir, &map, 0);
> +
> +		if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {
> +			ext4_dx_delete_entry(handle, dir, dx_frame,
> +					     cpu_to_le64(map.m_lblk));
> +			should_truncate = 1;
> +		}
> +	}
> +
> 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
> +
> 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
> 	if (unlikely(err))
> 		goto out;
> 
> +	if (should_truncate) {
> +		dir->i_size -= dir->i_sb->s_blocksize;
> +		ext4_truncate(dir);
> +		if (dir->i_size == dir->i_sb->s_blocksize) {
> +			ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
> +			ext4_mark_inode_dirty(handle, dir);
> +		}
> +	}
> +
> 	return 0;
> out:
> 	if (err != -ENOENT)
> @@ -2923,7 +3019,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
> 		return retval;
> 
> 	retval = -ENOENT;
> -	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> +	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
> 	if (IS_ERR(bh))
> 		return PTR_ERR(bh);
> 	if (!bh)
> @@ -2950,7 +3046,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
> 	if (IS_DIRSYNC(dir))
> 		ext4_handle_sync(handle);
> 
> -	retval = ext4_delete_entry(handle, dir, de, bh);
> +	retval = ext4_delete_entry(handle, dir, de, bh, NULL);
> 	if (retval)
> 		goto end_rmdir;
> 	if (!EXT4_DIR_LINK_EMPTY(inode))
> @@ -2985,6 +3081,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> 	struct buffer_head *bh;
> 	struct ext4_dir_entry_2 *de;
> 	handle_t *handle = NULL;
> +	struct dx_frame dx_frame;
> +
> +	memset(&dx_frame, 0, sizeof(dx_frame));
> 
> 	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
> 		return -EIO;
> @@ -3000,7 +3099,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> 		return retval;
> 
> 	retval = -ENOENT;
> -	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> +	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, &dx_frame);
> 	if (IS_ERR(bh))
> 		return PTR_ERR(bh);
> 	if (!bh)
> @@ -3028,9 +3127,10 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> 				   dentry->d_name.len, dentry->d_name.name);
> 		set_nlink(inode, 1);
> 	}
> -	retval = ext4_delete_entry(handle, dir, de, bh);
> +	retval = ext4_delete_entry(handle, dir, de, bh, &dx_frame);
> 	if (retval)
> 		goto end_unlink;
> +
> 	dir->i_ctime = dir->i_mtime = current_time(dir);
> 	ext4_update_dx_flag(dir);
> 	ext4_mark_inode_dirty(handle, dir);
> @@ -3042,6 +3142,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> 
> end_unlink:
> 	brelse(bh);
> +	if (dx_frame.bh)
> +		brelse(dx_frame.bh);
> 	if (handle)
> 		ext4_journal_stop(handle);
> 	trace_ext4_unlink_exit(dentry, retval);
> @@ -3362,11 +3464,11 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
> 	struct buffer_head *bh;
> 	struct ext4_dir_entry_2 *de;
> 
> -	bh = ext4_find_entry(dir, d_name, &de, NULL);
> +	bh = ext4_find_entry(dir, d_name, &de, NULL, NULL);
> 	if (IS_ERR(bh))
> 		return PTR_ERR(bh);
> 	if (bh) {
> -		retval = ext4_delete_entry(handle, dir, de, bh);
> +		retval = ext4_delete_entry(handle, dir, de, bh, NULL);
> 		brelse(bh);
> 	}
> 	return retval;
> @@ -3390,7 +3492,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
> 		retval = ext4_find_delete_entry(handle, ent->dir,
> 						&ent->dentry->d_name);
> 	} else {
> -		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
> +		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh,
> +					   NULL);
> 		if (retval == -ENOENT) {
> 			retval = ext4_find_delete_entry(handle, ent->dir,
> 							&ent->dentry->d_name);
> @@ -3497,7 +3600,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
> 			return retval;
> 	}
> 
> -	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
> +	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL,
> +				 NULL);
> 	if (IS_ERR(old.bh))
> 		return PTR_ERR(old.bh);
> 	/*
> @@ -3511,7 +3615,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
> 		goto end_rename;
> 
> 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
> -				 &new.de, &new.inlined);
> +				 &new.de, &new.inlined, NULL);
> 	if (IS_ERR(new.bh)) {
> 		retval = PTR_ERR(new.bh);
> 		new.bh = NULL;
> @@ -3691,7 +3795,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> 		return retval;
> 
> 	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
> -				 &old.de, &old.inlined);
> +				 &old.de, &old.inlined, NULL);
> 	if (IS_ERR(old.bh))
> 		return PTR_ERR(old.bh);
> 	/*
> @@ -3705,7 +3809,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> 		goto end_rename;
> 
> 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
> -				 &new.de, &new.inlined);
> +				 &new.de, &new.inlined, NULL);
> 	if (IS_ERR(new.bh)) {
> 		retval = PTR_ERR(new.bh);
> 		new.bh = NULL;
> -- 
> 2.20.1.97.g81188d93c3-goog
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] ext4: shrink directory when last block is empty
  2019-01-16 14:27 ` Alexey Lyashkov
@ 2019-01-16 20:51   ` harshad shirwadkar
  2019-01-23 18:32     ` harshadshirwadkar
  0 siblings, 1 reply; 7+ messages in thread
From: harshad shirwadkar @ 2019-01-16 20:51 UTC (permalink / raw)
  To: Alexey Lyashkov; +Cc: linux-ext4

Hi Alexey,

Thanks, yes that's a good point. I'll incorporate it and send another patch.

- Harshad.


On Wed, Jan 16, 2019 at 6:28 AM Alexey Lyashkov <umka@cloudlinux.com> wrote:
>
> Hi Harshad,
>
> I quickly look to the patch, but as i see you have called a count_dentries - it have called on each delete entry.
> If you have lots create/delete in loop for one block it’s costly and decrease a delete performance.
> as i see it have used just for checking against zero, so can it be replaced with more simple check like is first dir entry cover a whole block ?
>
>
> > 16 янв. 2019 г., в 1:55, harshadshirwadkar@gmail.com написал(а):
> >
> > From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> >
> > This patch is the first step towards shrinking htree based directories
> > when files are deleted. We truncate directory inode when a directory
> > entry removal causes last directory block to be empty. This patch just
> > removes the last block. We may end up in a situation where many
> > intermediate dirent blocks in directory inode are empty. Removing
> > those blocks would be handled later.
> >
> > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> > ---
> > fs/ext4/namei.c | 146 +++++++++++++++++++++++++++++++++++++++++-------
> > 1 file changed, 125 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> > index 2a4c25c4681d..bbcbd59c44ce 100644
> > --- a/fs/ext4/namei.c
> > +++ b/fs/ext4/namei.c
> > @@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
> >                                __u32 *start_hash);
> > static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> >               struct ext4_filename *fname,
> > -             struct ext4_dir_entry_2 **res_dir);
> > +             struct ext4_dir_entry_2 **res_dir,
> > +             struct dx_frame *dx_frame);
> > static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
> >                            struct inode *dir, struct inode *inode);
> >
> > @@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
> >                                          (void *)t - (void *)dirent);
> > }
> >
> > +
> > int ext4_handle_dirty_dirent_node(handle_t *handle,
> >                                 struct inode *inode,
> >                                 struct buffer_head *bh)
> > @@ -797,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> >       dxtrace(printk("Look up %x", hash));
> >       while (1) {
> >               count = dx_get_count(entries);
> > -             if (!count || count > dx_get_limit(entries)) {
> > +             if (count > dx_get_limit(entries)) {
> >                       ext4_warning_inode(dir,
> >                                          "dx entry: count %u beyond limit %u",
> >                                          count, dx_get_limit(entries));
> > @@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> >       return ret_err;
> > }
> >
> > +static void
> > +ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
> > +                  struct dx_frame *dx_frame, __le64 block)
> > +{
> > +     struct dx_entry *entries;
> > +     unsigned int count, limit;
> > +     int err, i;
> > +
> > +     entries = dx_frame->entries;
> > +     count = dx_get_count(entries);
> > +     limit = dx_get_limit(entries);
> > +
> > +     for (i = 0; i < count; i++)
> > +             if (entries[i].block == block)
> > +                     break;
> > +
> > +     if (i >= count)
> > +             return;
> > +
> > +     err = ext4_journal_get_write_access(handle, dx_frame->bh);
> > +     if (err) {
> > +             ext4_std_error(dir->i_sb, err);
> > +             return;
> > +     }
> > +
> > +     for (; i < count - 1; i++)
> > +             entries[i] = entries[i + 1];
> > +
> > +     dx_set_count(entries, count - 1);
> > +     dx_set_limit(entries, limit);
> > +
> > +     err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
> > +     if (err)
> > +             ext4_std_error(dir->i_sb, err);
> > +}
> > +
> > static void dx_release(struct dx_frame *frames)
> > {
> >       struct dx_root_info *info;
> > @@ -1309,6 +1347,28 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
> >       return 0;
> > }
> >
> > +static int count_dentries(char *search_buf, int buf_size,
> > +                       unsigned int blocksize)
> > +{
> > +     struct ext4_dir_entry_2 *de;
> > +     char *dlimit;
> > +     int de_len;
> > +     int count = 0;
> > +
> > +     de = (struct ext4_dir_entry_2 *)search_buf;
> > +     dlimit = search_buf + buf_size;
> > +     while ((char *)de < dlimit) {
> > +             de_len = ext4_rec_len_from_disk(de->rec_len, blocksize);
> > +             if (de_len <= 0)
> > +                     return count;
> > +             if (de->inode != 0)
> > +                     count++;
> > +             de = (struct ext4_dir_entry_2 *)((char *)de + de_len);
> > +     }
> > +
> > +     return count;
> > +}
> > +
> > static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> >                              struct ext4_dir_entry *de)
> > {
> > @@ -1339,7 +1399,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> > static struct buffer_head * ext4_find_entry (struct inode *dir,
> >                                       const struct qstr *d_name,
> >                                       struct ext4_dir_entry_2 **res_dir,
> > -                                     int *inlined)
> > +                                     int *inlined,
> > +                                     struct dx_frame *dx_frame)
> > {
> >       struct super_block *sb;
> >       struct buffer_head *bh_use[NAMEI_RA_SIZE];
> > @@ -1388,7 +1449,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> >               goto restart;
> >       }
> >       if (is_dx(dir)) {
> > -             ret = ext4_dx_find_entry(dir, &fname, res_dir);
> > +             ret = ext4_dx_find_entry(dir, &fname, res_dir, dx_frame);
> >               /*
> >                * On success, or if the error was file not found,
> >                * return.  Otherwise, fall back to doing a search the
> > @@ -1486,9 +1547,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> >       return ret;
> > }
> >
> > -static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> > -                     struct ext4_filename *fname,
> > -                     struct ext4_dir_entry_2 **res_dir)
> > +static struct buffer_head *ext4_dx_find_entry(
> > +                     struct inode *dir, struct ext4_filename *fname,
> > +                     struct ext4_dir_entry_2 **res_dir,
> > +                     struct dx_frame *dx_frame)
> > {
> >       struct super_block * sb = dir->i_sb;
> >       struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
> > @@ -1502,6 +1564,10 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> >       frame = dx_probe(fname, dir, NULL, frames);
> >       if (IS_ERR(frame))
> >               return (struct buffer_head *) frame;
> > +     if (dx_frame) {
> > +             *dx_frame = *frame;
> > +             get_bh(dx_frame->bh);
> > +     }
> >       do {
> >               block = dx_get_block(frame->at);
> >               bh = ext4_read_dirblock(dir, block, DIRENT);
> > @@ -1553,7 +1619,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
> >       if (dentry->d_name.len > EXT4_NAME_LEN)
> >               return ERR_PTR(-ENAMETOOLONG);
> >
> > -     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> > +     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
> >       if (IS_ERR(bh))
> >               return (struct dentry *) bh;
> >       inode = NULL;
> > @@ -1597,7 +1663,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
> >       struct ext4_dir_entry_2 * de;
> >       struct buffer_head *bh;
> >
> > -     bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
> > +     bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL, NULL);
> >       if (IS_ERR(bh))
> >               return (struct dentry *) bh;
> >       if (!bh)
> > @@ -2337,9 +2403,13 @@ int ext4_generic_delete_entry(handle_t *handle,
> > static int ext4_delete_entry(handle_t *handle,
> >                            struct inode *dir,
> >                            struct ext4_dir_entry_2 *de_del,
> > -                          struct buffer_head *bh)
> > +                          struct buffer_head *bh,
> > +                          struct dx_frame *dx_frame)
> > {
> > +     struct ext4_map_blocks map;
> >       int err, csum_size = 0;
> > +     int should_truncate = 0;
> > +
> >
> >       if (ext4_has_inline_data(dir)) {
> >               int has_inline_data = 1;
> > @@ -2363,11 +2433,37 @@ static int ext4_delete_entry(handle_t *handle,
> >       if (err)
> >               goto out;
> >
> > +     if (dx_frame && dx_frame->bh &&
> > +         count_dentries(bh->b_data, bh->b_size,
> > +                        dir->i_sb->s_blocksize) == 0) {
> > +
> > +             map.m_lblk = (dir->i_size - 1) >>
> > +                          (dir->i_sb->s_blocksize_bits);
> > +             map.m_len = 1;
> > +             err = ext4_map_blocks(handle, dir, &map, 0);
> > +
> > +             if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {
> > +                     ext4_dx_delete_entry(handle, dir, dx_frame,
> > +                                          cpu_to_le64(map.m_lblk));
> > +                     should_truncate = 1;
> > +             }
> > +     }
> > +
> >       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
> > +
> >       err = ext4_handle_dirty_dirent_node(handle, dir, bh);
> >       if (unlikely(err))
> >               goto out;
> >
> > +     if (should_truncate) {
> > +             dir->i_size -= dir->i_sb->s_blocksize;
> > +             ext4_truncate(dir);
> > +             if (dir->i_size == dir->i_sb->s_blocksize) {
> > +                     ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
> > +                     ext4_mark_inode_dirty(handle, dir);
> > +             }
> > +     }
> > +
> >       return 0;
> > out:
> >       if (err != -ENOENT)
> > @@ -2923,7 +3019,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
> >               return retval;
> >
> >       retval = -ENOENT;
> > -     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> > +     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
> >       if (IS_ERR(bh))
> >               return PTR_ERR(bh);
> >       if (!bh)
> > @@ -2950,7 +3046,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
> >       if (IS_DIRSYNC(dir))
> >               ext4_handle_sync(handle);
> >
> > -     retval = ext4_delete_entry(handle, dir, de, bh);
> > +     retval = ext4_delete_entry(handle, dir, de, bh, NULL);
> >       if (retval)
> >               goto end_rmdir;
> >       if (!EXT4_DIR_LINK_EMPTY(inode))
> > @@ -2985,6 +3081,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> >       struct buffer_head *bh;
> >       struct ext4_dir_entry_2 *de;
> >       handle_t *handle = NULL;
> > +     struct dx_frame dx_frame;
> > +
> > +     memset(&dx_frame, 0, sizeof(dx_frame));
> >
> >       if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
> >               return -EIO;
> > @@ -3000,7 +3099,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> >               return retval;
> >
> >       retval = -ENOENT;
> > -     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
> > +     bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, &dx_frame);
> >       if (IS_ERR(bh))
> >               return PTR_ERR(bh);
> >       if (!bh)
> > @@ -3028,9 +3127,10 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> >                                  dentry->d_name.len, dentry->d_name.name);
> >               set_nlink(inode, 1);
> >       }
> > -     retval = ext4_delete_entry(handle, dir, de, bh);
> > +     retval = ext4_delete_entry(handle, dir, de, bh, &dx_frame);
> >       if (retval)
> >               goto end_unlink;
> > +
> >       dir->i_ctime = dir->i_mtime = current_time(dir);
> >       ext4_update_dx_flag(dir);
> >       ext4_mark_inode_dirty(handle, dir);
> > @@ -3042,6 +3142,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> >
> > end_unlink:
> >       brelse(bh);
> > +     if (dx_frame.bh)
> > +             brelse(dx_frame.bh);
> >       if (handle)
> >               ext4_journal_stop(handle);
> >       trace_ext4_unlink_exit(dentry, retval);
> > @@ -3362,11 +3464,11 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
> >       struct buffer_head *bh;
> >       struct ext4_dir_entry_2 *de;
> >
> > -     bh = ext4_find_entry(dir, d_name, &de, NULL);
> > +     bh = ext4_find_entry(dir, d_name, &de, NULL, NULL);
> >       if (IS_ERR(bh))
> >               return PTR_ERR(bh);
> >       if (bh) {
> > -             retval = ext4_delete_entry(handle, dir, de, bh);
> > +             retval = ext4_delete_entry(handle, dir, de, bh, NULL);
> >               brelse(bh);
> >       }
> >       return retval;
> > @@ -3390,7 +3492,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
> >               retval = ext4_find_delete_entry(handle, ent->dir,
> >                                               &ent->dentry->d_name);
> >       } else {
> > -             retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
> > +             retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh,
> > +                                        NULL);
> >               if (retval == -ENOENT) {
> >                       retval = ext4_find_delete_entry(handle, ent->dir,
> >                                                       &ent->dentry->d_name);
> > @@ -3497,7 +3600,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
> >                       return retval;
> >       }
> >
> > -     old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
> > +     old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL,
> > +                              NULL);
> >       if (IS_ERR(old.bh))
> >               return PTR_ERR(old.bh);
> >       /*
> > @@ -3511,7 +3615,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
> >               goto end_rename;
> >
> >       new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
> > -                              &new.de, &new.inlined);
> > +                              &new.de, &new.inlined, NULL);
> >       if (IS_ERR(new.bh)) {
> >               retval = PTR_ERR(new.bh);
> >               new.bh = NULL;
> > @@ -3691,7 +3795,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> >               return retval;
> >
> >       old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
> > -                              &old.de, &old.inlined);
> > +                              &old.de, &old.inlined, NULL);
> >       if (IS_ERR(old.bh))
> >               return PTR_ERR(old.bh);
> >       /*
> > @@ -3705,7 +3809,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> >               goto end_rename;
> >
> >       new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
> > -                              &new.de, &new.inlined);
> > +                              &new.de, &new.inlined, NULL);
> >       if (IS_ERR(new.bh)) {
> >               retval = PTR_ERR(new.bh);
> >               new.bh = NULL;
> > --
> > 2.20.1.97.g81188d93c3-goog
> >
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] ext4: shrink directory when last block is empty
  2019-01-16 20:51   ` harshad shirwadkar
@ 2019-01-23 18:32     ` harshadshirwadkar
  2019-01-25  5:12       ` Theodore Y. Ts'o
  2019-01-30  1:58       ` Andreas Dilger
  0 siblings, 2 replies; 7+ messages in thread
From: harshadshirwadkar @ 2019-01-23 18:32 UTC (permalink / raw)
  To: umka, linux-ext4; +Cc: Harshad Shirwadkar

From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

This patch is the first step towards shrinking htree based directories
when files are deleted. We truncate directory inode when a directory
entry removal causes last directory block to be empty. This patch just
removes the last block. We may end up in a situation where many
intermediate dirent blocks in directory inode are empty. Removing
those blocks would be handled later.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/namei.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 109 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a4c25c4681d..79cb88ba898a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 __u32 *start_hash);
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 		struct ext4_filename *fname,
-		struct ext4_dir_entry_2 **res_dir);
+		struct ext4_dir_entry_2 **res_dir,
+		struct dx_frame *dx_frame);
 static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			     struct inode *dir, struct inode *inode);
 
@@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
 					   (void *)t - (void *)dirent);
 }
 
+
 int ext4_handle_dirty_dirent_node(handle_t *handle,
 				  struct inode *inode,
 				  struct buffer_head *bh)
@@ -797,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	dxtrace(printk("Look up %x", hash));
 	while (1) {
 		count = dx_get_count(entries);
-		if (!count || count > dx_get_limit(entries)) {
+		if (count > dx_get_limit(entries)) {
 			ext4_warning_inode(dir,
 					   "dx entry: count %u beyond limit %u",
 					   count, dx_get_limit(entries));
@@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	return ret_err;
 }
 
+static void
+ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
+		     struct dx_frame *dx_frame, __le64 block)
+{
+	struct dx_entry *entries;
+	unsigned int count, limit;
+	int err, i;
+
+	entries = dx_frame->entries;
+	count = dx_get_count(entries);
+	limit = dx_get_limit(entries);
+
+	for (i = 0; i < count; i++)
+		if (entries[i].block == block)
+			break;
+
+	if (i >= count)
+		return;
+
+	err = ext4_journal_get_write_access(handle, dx_frame->bh);
+	if (err) {
+		ext4_std_error(dir->i_sb, err);
+		return;
+	}
+
+	for (; i < count - 1; i++)
+		entries[i] = entries[i + 1];
+
+	dx_set_count(entries, count - 1);
+	dx_set_limit(entries, limit);
+
+	err = ext4_handle_dirty_dx_node(handle, dir, dx_frame->bh);
+	if (err)
+		ext4_std_error(dir->i_sb, err);
+}
+
 static void dx_release(struct dx_frame *frames)
 {
 	struct dx_root_info *info;
@@ -1309,6 +1347,14 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 	return 0;
 }
 
+static int is_empty_dirent_block(struct inode *dir, struct buffer_head *bh)
+{
+	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
+
+	return (ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)
+		== dir->i_sb->s_blocksize);
+}
+
 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 			       struct ext4_dir_entry *de)
 {
@@ -1339,7 +1385,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
 static struct buffer_head * ext4_find_entry (struct inode *dir,
 					const struct qstr *d_name,
 					struct ext4_dir_entry_2 **res_dir,
-					int *inlined)
+					int *inlined,
+					struct dx_frame *dx_frame)
 {
 	struct super_block *sb;
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
@@ -1388,7 +1435,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 		goto restart;
 	}
 	if (is_dx(dir)) {
-		ret = ext4_dx_find_entry(dir, &fname, res_dir);
+		ret = ext4_dx_find_entry(dir, &fname, res_dir, dx_frame);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
@@ -1486,9 +1533,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	return ret;
 }
 
-static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
-			struct ext4_filename *fname,
-			struct ext4_dir_entry_2 **res_dir)
+static struct buffer_head *ext4_dx_find_entry(
+			struct inode *dir, struct ext4_filename *fname,
+			struct ext4_dir_entry_2 **res_dir,
+			struct dx_frame *dx_frame)
 {
 	struct super_block * sb = dir->i_sb;
 	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
@@ -1502,6 +1550,10 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 	frame = dx_probe(fname, dir, NULL, frames);
 	if (IS_ERR(frame))
 		return (struct buffer_head *) frame;
+	if (dx_frame) {
+		*dx_frame = *frame;
+		get_bh(dx_frame->bh);
+	}
 	do {
 		block = dx_get_block(frame->at);
 		bh = ext4_read_dirblock(dir, block, DIRENT);
@@ -1553,7 +1605,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	inode = NULL;
@@ -1597,7 +1649,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
-	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
+	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return (struct dentry *) bh;
 	if (!bh)
@@ -2337,9 +2389,13 @@ int ext4_generic_delete_entry(handle_t *handle,
 static int ext4_delete_entry(handle_t *handle,
 			     struct inode *dir,
 			     struct ext4_dir_entry_2 *de_del,
-			     struct buffer_head *bh)
+			     struct buffer_head *bh,
+			     struct dx_frame *dx_frame)
 {
+	struct ext4_map_blocks map;
 	int err, csum_size = 0;
+	int should_truncate = 0;
+
 
 	if (ext4_has_inline_data(dir)) {
 		int has_inline_data = 1;
@@ -2363,11 +2419,35 @@ static int ext4_delete_entry(handle_t *handle,
 	if (err)
 		goto out;
 
+	if (dx_frame && dx_frame->bh && is_empty_dirent_block(dir, bh)) {
+
+		map.m_lblk = (dir->i_size - 1) >>
+			     (dir->i_sb->s_blocksize_bits);
+		map.m_len = 1;
+		err = ext4_map_blocks(handle, dir, &map, 0);
+
+		if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {
+			ext4_dx_delete_entry(handle, dir, dx_frame,
+					     cpu_to_le64(map.m_lblk));
+			should_truncate = 1;
+		}
+	}
+
 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+
 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
 	if (unlikely(err))
 		goto out;
 
+	if (should_truncate) {
+		dir->i_size -= dir->i_sb->s_blocksize;
+		ext4_truncate(dir);
+		if (dir->i_size == dir->i_sb->s_blocksize) {
+			ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
+			ext4_mark_inode_dirty(handle, dir);
+		}
+	}
+
 	return 0;
 out:
 	if (err != -ENOENT)
@@ -2923,7 +3003,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -2950,7 +3030,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
 	if (IS_DIRSYNC(dir))
 		ext4_handle_sync(handle);
 
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 	if (retval)
 		goto end_rmdir;
 	if (!EXT4_DIR_LINK_EMPTY(inode))
@@ -2985,6 +3065,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 	handle_t *handle = NULL;
+	struct dx_frame dx_frame;
+
+	memset(&dx_frame, 0, sizeof(dx_frame));
 
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
 		return -EIO;
@@ -3000,7 +3083,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 		return retval;
 
 	retval = -ENOENT;
-	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
+	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL, &dx_frame);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (!bh)
@@ -3028,9 +3111,10 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 				   dentry->d_name.len, dentry->d_name.name);
 		set_nlink(inode, 1);
 	}
-	retval = ext4_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh, &dx_frame);
 	if (retval)
 		goto end_unlink;
+
 	dir->i_ctime = dir->i_mtime = current_time(dir);
 	ext4_update_dx_flag(dir);
 	ext4_mark_inode_dirty(handle, dir);
@@ -3042,6 +3126,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 
 end_unlink:
 	brelse(bh);
+	if (dx_frame.bh)
+		brelse(dx_frame.bh);
 	if (handle)
 		ext4_journal_stop(handle);
 	trace_ext4_unlink_exit(dentry, retval);
@@ -3362,11 +3448,11 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
 	struct buffer_head *bh;
 	struct ext4_dir_entry_2 *de;
 
-	bh = ext4_find_entry(dir, d_name, &de, NULL);
+	bh = ext4_find_entry(dir, d_name, &de, NULL, NULL);
 	if (IS_ERR(bh))
 		return PTR_ERR(bh);
 	if (bh) {
-		retval = ext4_delete_entry(handle, dir, de, bh);
+		retval = ext4_delete_entry(handle, dir, de, bh, NULL);
 		brelse(bh);
 	}
 	return retval;
@@ -3390,7 +3476,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
 		retval = ext4_find_delete_entry(handle, ent->dir,
 						&ent->dentry->d_name);
 	} else {
-		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
+		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh,
+					   NULL);
 		if (retval == -ENOENT) {
 			retval = ext4_find_delete_entry(handle, ent->dir,
 							&ent->dentry->d_name);
@@ -3497,7 +3584,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 			return retval;
 	}
 
-	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL,
+				 NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3511,7 +3599,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;
@@ -3691,7 +3779,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		return retval;
 
 	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
-				 &old.de, &old.inlined);
+				 &old.de, &old.inlined, NULL);
 	if (IS_ERR(old.bh))
 		return PTR_ERR(old.bh);
 	/*
@@ -3705,7 +3793,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
-				 &new.de, &new.inlined);
+				 &new.de, &new.inlined, NULL);
 	if (IS_ERR(new.bh)) {
 		retval = PTR_ERR(new.bh);
 		new.bh = NULL;
-- 
2.20.1.321.g9e740568ce-goog


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] ext4: shrink directory when last block is empty
  2019-01-23 18:32     ` harshadshirwadkar
@ 2019-01-25  5:12       ` Theodore Y. Ts'o
  2019-01-30  1:58       ` Andreas Dilger
  1 sibling, 0 replies; 7+ messages in thread
From: Theodore Y. Ts'o @ 2019-01-25  5:12 UTC (permalink / raw)
  To: harshadshirwadkar; +Cc: umka, linux-ext4

When you send a new version of the patch, it's good add [PATCH -v2] to
the subject prefix.  What I will usually do is run:

	rm -rf /tmp/p; git format-patch -o /tmp/p -1

and then edit the resulting patch in /tmp/p/0001-* before sending it
out via:

	git send-email --to=linux-ext4 --in-reply-to=<msgid> /tmp/p/*"

where I have an the following entry in ~/.mail_aliases:

alias linux-ext4 Ext4 Developers List <linux-ext4@vger.kernel.org>

Or you can use the subject-prefix option for git send-email and
git-format-patch.  I usually call git format-patch explicitly, since
if you are going to comments explaining what changed between the v1
and v2 patch (after the --- line), I'll needed to do that anyway.

> @@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
>  					   (void *)t - (void *)dirent);
>  }
>  
> +
>  int ext4_handle_dirty_dirent_node(handle_t *handle,
>  				  struct inode *inode,
>  				  struct buffer_head *bh)

Extra blank line added above?


> @@ -797,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
>  	dxtrace(printk("Look up %x", hash));
>  	while (1) {
>  		count = dx_get_count(entries);
> -		if (!count || count > dx_get_limit(entries)) {
> +		if (count > dx_get_limit(entries)) {
>  			ext4_warning_inode(dir,
>  					   "dx entry: count %u beyond limit %u",
>  					   count, dx_get_limit(entries));

This was added because ext4_dx_delete_entry() can end up leaving an
interior node with no entries, right?  So this change stops the
warning from triggering.  The problem is after the warning, we fall
back to a brute-force linear search of the directory; and this will
happen if the file system is subsequently mounted on an old kernel
that doesn't have this change.

We have two choices here.  The first to simply not remove the last
directory block if it will result in an empty interior node.  That's
not particularly satisfying, but it's probably the simplest approach.
The other thing we can do is to try to remove the interior node; but
that gets tricky, since we now have to edit the parent node (and if
there is no parent node, handle the case of the now-completely empty
directory).

We also have to figure out what to do with that interior node.  We
can't just deallocate it, since that would leave a hole in the
directory and that could trigger the ext4_error_inode() call in
__ext4_read_dirblock().  So we would need to leave it as an "fake"
interior node which looks like an empty directory entry (in case of
the fallback to linear search option), but with soomething that makes
it clear that it is an orphaned interior node that can get garbage
collected or reused as a leaf block later.

The second choice is the right one, but it's more complicated.  So we
may want to leave that to a future patch, and keep this change small
and simple.

> @@ -1309,6 +1347,14 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
>  	return 0;
>  }
>  
> +static int is_empty_dirent_block(struct inode *dir, struct buffer_head *bh)
> +{
> +	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
> +
> +	return (ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)
> +		== dir->i_sb->s_blocksize);
> +}
> +

You should also check to make sure de->inode is zero.

> @@ -1502,6 +1550,10 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
>  	frame = dx_probe(fname, dir, NULL, frames);
>  	if (IS_ERR(frame))
>  		return (struct buffer_head *) frame;
> +	if (dx_frame) {
> +		*dx_frame = *frame;
> +		get_bh(dx_frame->bh);
> +	}

This should happen at the end of ext4_dx_find_entry(), and only if it
returns success.  On an error case, we should not fill in dx_frame and
bump the refcount on dx_frame->bh.  Otherwise, unless the callers are
very careful to remember to remember to call brelse(dx_frame->bh) in
the error case, or else we'll have a buffer head leak.


Module these issues, it looks good!  What sort of testing have you
done with this patch?  I recommend using gce-xfstests -g quick before
you send out a patch for review, just to save yourself (and me!) time.

Thanks,

					- Ted

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] ext4: shrink directory when last block is empty
  2019-01-23 18:32     ` harshadshirwadkar
  2019-01-25  5:12       ` Theodore Y. Ts'o
@ 2019-01-30  1:58       ` Andreas Dilger
  2019-02-01  8:53         ` harshad shirwadkar
  1 sibling, 1 reply; 7+ messages in thread
From: Andreas Dilger @ 2019-01-30  1:58 UTC (permalink / raw)
  To: harshadshirwadkar; +Cc: umka, linux-ext4

[-- Attachment #1: Type: text/plain, Size: 7187 bytes --]

On Jan 23, 2019, at 11:32 AM, harshadshirwadkar@gmail.com wrote:
> 
> From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> 
> This patch is the first step towards shrinking htree based directories
> when files are deleted. We truncate directory inode when a directory
> entry removal causes last directory block to be empty. This patch just
> removes the last block. We may end up in a situation where many
> intermediate dirent blocks in directory inode are empty. Removing
> those blocks would be handled later.
> 
> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> ---
> fs/ext4/namei.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 109 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 2a4c25c4681d..79cb88ba898a 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
> 				 __u32 *start_hash);
> static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> 		struct ext4_filename *fname,
> -		struct ext4_dir_entry_2 **res_dir);
> +		struct ext4_dir_entry_2 **res_dir,
> +		struct dx_frame *dx_frame);

(style) these should align after '(' on the first line

> @@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
> 					   (void *)t - (void *)dirent);
> }
> 
> +
> int ext4_handle_dirty_dirent_node(handle_t *handle,
> 				  struct inode *inode,
> 				  struct buffer_head *bh)

(style) No need for this hunk.

> @@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> 	return ret_err;
> }
> 
> +static void
> +ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
> +		     struct dx_frame *dx_frame, __le64 block)
> +{
> +	struct dx_entry *entries;
> +	unsigned int count, limit;
> +	int err, i;
> +
> +	entries = dx_frame->entries;
> +	count = dx_get_count(entries);
> +	limit = dx_get_limit(entries);
> +
> +	for (i = 0; i < count; i++)
> +		if (entries[i].block == block)
> +			break;
> +
> +	if (i >= count)
> +		return;
> +
> +	err = ext4_journal_get_write_access(handle, dx_frame->bh);
> +	if (err) {
> +		ext4_std_error(dir->i_sb, err);
> +		return;
> +	}
> +
> +	for (; i < count - 1; i++)
> +		entries[i] = entries[i + 1];
> +
> +	dx_set_count(entries, count - 1);
> +	dx_set_limit(entries, limit);

I don't think you need to update the limit just because of the count changing?
This is just setting it back to the same value that dx_get_limit() returned earlier.

> @@ -1309,6 +1347,14 @@
> +static int is_empty_dirent_block(struct inode *dir, struct buffer_head *bh)

(style) this function should be type bool.  The older "is_*" functions were written
before "bool" was available in the kernel, though a separate cleanup patch to convert
them to bool would be welcome.

> +{
> +	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
> +
> +	return (ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)
> +		== dir->i_sb->s_blocksize);

(style) no need for () around return value.

> 
> @@ -1339,7 +1385,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> static struct buffer_head * ext4_find_entry (struct inode *dir,
> 					const struct qstr *d_name,
> 					struct ext4_dir_entry_2 **res_dir,
> -					int *inlined)
> +					int *inlined,
> +					struct dx_frame *dx_frame)

(style) this can fit on the previous line, and they can all align after '(' on
the first line.  Please also remove the space after '*' in the function declaration.

> @@ -1486,9 +1533,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> 	return ret;
> }
> 
> -static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> -			struct ext4_filename *fname,
> -			struct ext4_dir_entry_2 **res_dir)
> +static struct buffer_head *ext4_dx_find_entry(
> +			struct inode *dir, struct ext4_filename *fname,

(style) it isn't clear why you moved "struct inode *dir" from the previous line?
The continued lines look like they can properly align after '(' on the previous line.

> @@ -2337,9 +2389,13 @@ int ext4_generic_delete_entry(handle_t *handle,
> static int ext4_delete_entry(handle_t *handle,
> 			     struct inode *dir,
> 			     struct ext4_dir_entry_2 *de_del,
> -			     struct buffer_head *bh)
> +			     struct buffer_head *bh,
> +			     struct dx_frame *dx_frame)
> {
> +	struct ext4_map_blocks map;
> 	int err, csum_size = 0;
> +	int should_truncate = 0;

(style) bool should_truncate

> @@ -2363,11 +2419,35 @@ static int ext4_delete_entry(handle_t *handle,
> 	if (err)
> 		goto out;
> 
> +	if (dx_frame && dx_frame->bh && is_empty_dirent_block(dir, bh)) {
> +

(style) no blank line here.

> +		map.m_lblk = (dir->i_size - 1) >>
> +			     (dir->i_sb->s_blocksize_bits);

(style) no need for parenthesis around dir->i_sb->s_blocksize_bits.
(style) this can fit on the previous line and still be under 80 columns.

> +		map.m_len = 1;
> +		err = ext4_map_blocks(handle, dir, &map, 0);
> +
> +		if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {

(style) no need for extra parenthesis around "err > 0" and "b_blocknr == m_pblk".

> +			ext4_dx_delete_entry(handle, dir, dx_frame,
> +					     cpu_to_le64(map.m_lblk));
> +			should_truncate = 1;
> +		}
> +	}
> +
> 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
> +
> 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
> 	if (unlikely(err))
> 		goto out;
> 
> +	if (should_truncate) {
> +		dir->i_size -= dir->i_sb->s_blocksize;
> +		ext4_truncate(dir);

Since we have already mapped the last block above, I wonder if we can do something
lighter weight than calling ext4_truncate() here?  Maybe moving the ext4_truncate()
guts into a helper function like ext4_truncate_internal() that avoids all of the
complexity (orphan list, resizing the transaction, inline data, etc).

> +		if (dir->i_size == dir->i_sb->s_blocksize) {
> +			ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
> +			ext4_mark_inode_dirty(handle, dir);

Are there any places in the code that assume an indexed directory will remain as such?
The tricky part here is that any valid htree directory will have 3 blocks (dx_root,
plus two leaf blocks), so it might be problematic to go back to a single non-htree
block, especially since this doesn't look like it has any locking.  It is probably
worthwhile to check the code for this.

> @@ -2985,6 +3065,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> 	struct buffer_head *bh;
> 	struct ext4_dir_entry_2 *de;
> 	handle_t *handle = NULL;
> +	struct dx_frame dx_frame;
> +
> +	memset(&dx_frame, 0, sizeof(dx_frame));

Rather than an explicit memset(), this can use a struct initializer when it is declared:

	struct dx_frame dx_frame = { NULL };

Are there existing xfstests that cover this case?  Certainly deleting files from a
directory, but it would be useful to have something that checks whether the directory
size is shrinking and this code is working.

Cheers, Andreas






[-- Attachment #2: Message signed with OpenPGP --]
[-- Type: application/pgp-signature, Size: 873 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] ext4: shrink directory when last block is empty
  2019-01-30  1:58       ` Andreas Dilger
@ 2019-02-01  8:53         ` harshad shirwadkar
  0 siblings, 0 replies; 7+ messages in thread
From: harshad shirwadkar @ 2019-02-01  8:53 UTC (permalink / raw)
  To: Andreas Dilger, tytso; +Cc: Alexey Lyashkov, linux-ext4

Thank you Andreas and Ted for reviewing the patch. Ted, thank you for
pointing out about the subject line when sending a new patch - I'll be
careful next time. I have handled all the review comments. I will send
out a patch with handled comments. About testing this patch, I have
created a new xfstest that verifies that directory shrinks after
deleting files in it. I'll send that patch out too.

On Tue, Jan 29, 2019 at 5:58 PM Andreas Dilger <adilger@dilger.ca> wrote:
>
> On Jan 23, 2019, at 11:32 AM, harshadshirwadkar@gmail.com wrote:
> >
> > From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> >
> > This patch is the first step towards shrinking htree based directories
> > when files are deleted. We truncate directory inode when a directory
> > entry removal causes last directory block to be empty. This patch just
> > removes the last block. We may end up in a situation where many
> > intermediate dirent blocks in directory inode are empty. Removing
> > those blocks would be handled later.
> >
> > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
> > ---
> > fs/ext4/namei.c | 130 ++++++++++++++++++++++++++++++++++++++++--------
> > 1 file changed, 109 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> > index 2a4c25c4681d..79cb88ba898a 100644
> > --- a/fs/ext4/namei.c
> > +++ b/fs/ext4/namei.c
> > @@ -273,7 +273,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
> >                                __u32 *start_hash);
> > static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> >               struct ext4_filename *fname,
> > -             struct ext4_dir_entry_2 **res_dir);
> > +             struct ext4_dir_entry_2 **res_dir,
> > +             struct dx_frame *dx_frame);
>
> (style) these should align after '(' on the first line
>
> > @@ -380,6 +381,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
> >                                          (void *)t - (void *)dirent);
> > }
> >
> > +
> > int ext4_handle_dirty_dirent_node(handle_t *handle,
> >                                 struct inode *inode,
> >                                 struct buffer_head *bh)
>
> (style) No need for this hunk.
>
> > @@ -866,6 +868,42 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
> >       return ret_err;
> > }
> >
> > +static void
> > +ext4_dx_delete_entry(handle_t *handle, struct inode *dir,
> > +                  struct dx_frame *dx_frame, __le64 block)
> > +{
> > +     struct dx_entry *entries;
> > +     unsigned int count, limit;
> > +     int err, i;
> > +
> > +     entries = dx_frame->entries;
> > +     count = dx_get_count(entries);
> > +     limit = dx_get_limit(entries);
> > +
> > +     for (i = 0; i < count; i++)
> > +             if (entries[i].block == block)
> > +                     break;
> > +
> > +     if (i >= count)
> > +             return;
> > +
> > +     err = ext4_journal_get_write_access(handle, dx_frame->bh);
> > +     if (err) {
> > +             ext4_std_error(dir->i_sb, err);
> > +             return;
> > +     }
> > +
> > +     for (; i < count - 1; i++)
> > +             entries[i] = entries[i + 1];
> > +
> > +     dx_set_count(entries, count - 1);
> > +     dx_set_limit(entries, limit);
>
> I don't think you need to update the limit just because of the count changing?
> This is just setting it back to the same value that dx_get_limit() returned earlier.
>
> > @@ -1309,6 +1347,14 @@
> > +static int is_empty_dirent_block(struct inode *dir, struct buffer_head *bh)
>
> (style) this function should be type bool.  The older "is_*" functions were written
> before "bool" was available in the kernel, though a separate cleanup patch to convert
> them to bool would be welcome.
>
> > +{
> > +     struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
> > +
> > +     return (ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize)
> > +             == dir->i_sb->s_blocksize);
>
> (style) no need for () around return value.
>
> >
> > @@ -1339,7 +1385,8 @@ static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
> > static struct buffer_head * ext4_find_entry (struct inode *dir,
> >                                       const struct qstr *d_name,
> >                                       struct ext4_dir_entry_2 **res_dir,
> > -                                     int *inlined)
> > +                                     int *inlined,
> > +                                     struct dx_frame *dx_frame)
>
> (style) this can fit on the previous line, and they can all align after '(' on
> the first line.  Please also remove the space after '*' in the function declaration.
>
> > @@ -1486,9 +1533,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
> >       return ret;
> > }
> >
> > -static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
> > -                     struct ext4_filename *fname,
> > -                     struct ext4_dir_entry_2 **res_dir)
> > +static struct buffer_head *ext4_dx_find_entry(
> > +                     struct inode *dir, struct ext4_filename *fname,
>
> (style) it isn't clear why you moved "struct inode *dir" from the previous line?
> The continued lines look like they can properly align after '(' on the previous line.
>
> > @@ -2337,9 +2389,13 @@ int ext4_generic_delete_entry(handle_t *handle,
> > static int ext4_delete_entry(handle_t *handle,
> >                            struct inode *dir,
> >                            struct ext4_dir_entry_2 *de_del,
> > -                          struct buffer_head *bh)
> > +                          struct buffer_head *bh,
> > +                          struct dx_frame *dx_frame)
> > {
> > +     struct ext4_map_blocks map;
> >       int err, csum_size = 0;
> > +     int should_truncate = 0;
>
> (style) bool should_truncate
>
> > @@ -2363,11 +2419,35 @@ static int ext4_delete_entry(handle_t *handle,
> >       if (err)
> >               goto out;
> >
> > +     if (dx_frame && dx_frame->bh && is_empty_dirent_block(dir, bh)) {
> > +
>
> (style) no blank line here.
>
> > +             map.m_lblk = (dir->i_size - 1) >>
> > +                          (dir->i_sb->s_blocksize_bits);
>
> (style) no need for parenthesis around dir->i_sb->s_blocksize_bits.
> (style) this can fit on the previous line and still be under 80 columns.
>
> > +             map.m_len = 1;
> > +             err = ext4_map_blocks(handle, dir, &map, 0);
> > +
> > +             if ((err > 0) && (bh->b_blocknr == map.m_pblk)) {
>
> (style) no need for extra parenthesis around "err > 0" and "b_blocknr == m_pblk".
>
> > +                     ext4_dx_delete_entry(handle, dir, dx_frame,
> > +                                          cpu_to_le64(map.m_lblk));
> > +                     should_truncate = 1;
> > +             }
> > +     }
> > +
> >       BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
> > +
> >       err = ext4_handle_dirty_dirent_node(handle, dir, bh);
> >       if (unlikely(err))
> >               goto out;
> >
> > +     if (should_truncate) {
> > +             dir->i_size -= dir->i_sb->s_blocksize;
> > +             ext4_truncate(dir);
>
> Since we have already mapped the last block above, I wonder if we can do something
> lighter weight than calling ext4_truncate() here?  Maybe moving the ext4_truncate()
> guts into a helper function like ext4_truncate_internal() that avoids all of the
> complexity (orphan list, resizing the transaction, inline data, etc).
>
> > +             if (dir->i_size == dir->i_sb->s_blocksize) {
> > +                     ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
> > +                     ext4_mark_inode_dirty(handle, dir);
>
> Are there any places in the code that assume an indexed directory will remain as such?
> The tricky part here is that any valid htree directory will have 3 blocks (dx_root,
> plus two leaf blocks), so it might be problematic to go back to a single non-htree
> block, especially since this doesn't look like it has any locking.  It is probably
> worthwhile to check the code for this.

After handling Ted's comment about not deleting the last block if it
makes intermediate node empty, this patch will never result in a
situation where dir->i_size == blocksize. So, this hunk is not needed
anymore.

>
> > @@ -2985,6 +3065,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
> >       struct buffer_head *bh;
> >       struct ext4_dir_entry_2 *de;
> >       handle_t *handle = NULL;
> > +     struct dx_frame dx_frame;
> > +
> > +     memset(&dx_frame, 0, sizeof(dx_frame));
>
> Rather than an explicit memset(), this can use a struct initializer when it is declared:
>
>         struct dx_frame dx_frame = { NULL };
>
> Are there existing xfstests that cover this case?  Certainly deleting files from a
> directory, but it would be useful to have something that checks whether the directory
> size is shrinking and this code is working.
>
> Cheers, Andreas
>
>
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2019-02-01  8:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-15 22:55 [PATCH] ext4: shrink directory when last block is empty harshadshirwadkar
2019-01-16 14:27 ` Alexey Lyashkov
2019-01-16 20:51   ` harshad shirwadkar
2019-01-23 18:32     ` harshadshirwadkar
2019-01-25  5:12       ` Theodore Y. Ts'o
2019-01-30  1:58       ` Andreas Dilger
2019-02-01  8:53         ` harshad shirwadkar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.