linux-ext4.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zhang Yi <yi.zhang@huawei.com>
To: <linux-ext4@vger.kernel.org>
Cc: <tytso@mit.edu>, <adilger.kernel@dilger.ca>, <jack@suse.cz>,
	<yi.zhang@huawei.com>, <yukuai3@huawei.com>, <yebin10@huawei.com>
Subject: [RFC PATCH] ext4: convert symlink external data block mapping to bdev
Date: Wed, 6 Apr 2022 16:45:03 +0800	[thread overview]
Message-ID: <20220406084503.1961686-1-yi.zhang@huawei.com> (raw)

Symlink's external data block is one kind of metadata block, and now
that almost all ext4 metadata block's page cache (e.g. directory blocks,
quota blocks...) belongs to bdev backing inode except the symlink. It
is essentially worked in data=journal mode like other regular file's
data block because probably in order to make it simple for generic VFS
code handling symlinks or some other historical reasons, but the logic
of creating external data block in ext4_symlink() is complicated. and it
also make things confused if user do not want to let the filesystem
worked in data=journal mode. This patch convert the final exceptional
case and make things clean, move the mapping of the symlink's external
data block to bdev like any other metadata block does.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
This RFC patch follow the talking of whether if we could unify the
journal mode of ext4 metadata blocks[1], it stop using the data=journal
mode for the final exception case of symlink's external data block. Any
comments are welcome, thanks.

[1]. https://lore.kernel.org/linux-ext4/20220321151141.hypnhr6o4vng2sa6@quack3.lan/T/#m84b942a6bb838ba60ae8afd906ebbb987a577488

 fs/ext4/inode.c   |   9 +---
 fs/ext4/namei.c   | 123 +++++++++++++++++++++-------------------------
 fs/ext4/symlink.c |  44 ++++++++++++++---
 3 files changed, 93 insertions(+), 83 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1ce13f69fbec..f603674df8a8 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -199,8 +199,7 @@ void ext4_evict_inode(struct inode *inode)
 		 */
 		if (inode->i_ino != EXT4_JOURNAL_INO &&
 		    ext4_should_journal_data(inode) &&
-		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
-		    inode->i_data.nrpages) {
+		    S_ISREG(inode->i_mode) && inode->i_data.nrpages) {
 			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
 
@@ -2944,8 +2943,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
 
 	index = pos >> PAGE_SHIFT;
 
-	if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) ||
-	    ext4_verity_in_progress(inode)) {
+	if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
 		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
 		return ext4_write_begin(file, mapping, pos,
 					len, flags, pagep, fsdata);
@@ -4977,7 +4975,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 		}
 		if (IS_ENCRYPTED(inode)) {
 			inode->i_op = &ext4_encrypted_symlink_inode_operations;
-			ext4_set_aops(inode);
 		} else if (ext4_inode_is_fast_symlink(inode)) {
 			inode->i_link = (char *)ei->i_data;
 			inode->i_op = &ext4_fast_symlink_inode_operations;
@@ -4985,9 +4982,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
 				sizeof(ei->i_data) - 1);
 		} else {
 			inode->i_op = &ext4_symlink_inode_operations;
-			ext4_set_aops(inode);
 		}
-		inode_nohighmem(inode);
 	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
 	      S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 		inode->i_op = &ext4_special_inode_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e37da8d5cd0c..8a1026ccaf75 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3249,6 +3249,32 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
 	return retval;
 }
 
+static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
+				   struct fscrypt_str *disk_link)
+{
+	struct buffer_head *bh;
+	char *kaddr;
+	int err = 0;
+
+	bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
+	if (IS_ERR(bh))
+		return PTR_ERR(bh);
+
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
+	if (err)
+		goto out;
+
+	kaddr = (char *)bh->b_data;
+	memcpy(kaddr, disk_link->name, disk_link->len);
+	inode->i_size = disk_link->len - 1;
+	EXT4_I(inode)->i_disksize = inode->i_size;
+	err = ext4_handle_dirty_metadata(handle, inode, bh);
+out:
+	brelse(bh);
+	return err;
+}
+
 static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 			struct dentry *dentry, const char *symname)
 {
@@ -3270,26 +3296,8 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
-		/*
-		 * For non-fast symlinks, we just allocate inode and put it on
-		 * orphan list in the first transaction => we need bitmap,
-		 * group descriptor, sb, inode block, quota blocks, and
-		 * possibly selinux xattr blocks.
-		 */
-		credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
-			  EXT4_XATTR_TRANS_BLOCKS;
-	} else {
-		/*
-		 * Fast symlink. We have to add entry to directory
-		 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
-		 * allocate new inode (bitmap, group descriptor, inode block,
-		 * quota blocks, sb is already counted in previous macros).
-		 */
-		credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
-			  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
-	}
-
+	credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+		  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
 	inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
 					    &dentry->d_name, 0, NULL,
 					    EXT4_HT_DIR, credits);
@@ -3305,73 +3313,52 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 		if (err)
 			goto err_drop_inode;
 		inode->i_op = &ext4_encrypted_symlink_inode_operations;
+	} else {
+		if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
+			inode->i_op = &ext4_symlink_inode_operations;
+		} else {
+			inode->i_op = &ext4_fast_symlink_inode_operations;
+			inode->i_link = (char *)&EXT4_I(inode)->i_data;
+		}
 	}
 
 	if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
-		if (!IS_ENCRYPTED(inode))
-			inode->i_op = &ext4_symlink_inode_operations;
-		inode_nohighmem(inode);
-		ext4_set_aops(inode);
-		/*
-		 * We cannot call page_symlink() with transaction started
-		 * because it calls into ext4_write_begin() which can wait
-		 * for transaction commit if we are running out of space
-		 * and thus we deadlock. So we have to stop transaction now
-		 * and restart it when symlink contents is written.
-		 *
-		 * To keep fs consistent in case of crash, we have to put inode
-		 * to orphan list in the mean time.
-		 */
-		drop_nlink(inode);
-		err = ext4_orphan_add(handle, inode);
-		if (handle)
-			ext4_journal_stop(handle);
-		handle = NULL;
-		if (err)
-			goto err_drop_inode;
-		err = __page_symlink(inode, disk_link.name, disk_link.len, 1);
+		/* alloc symlink block and fill it */
+		err = ext4_init_symlink_block(handle, inode, &disk_link);
 		if (err)
 			goto err_drop_inode;
-		/*
-		 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
-		 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
-		 */
-		handle = ext4_journal_start(dir, EXT4_HT_DIR,
-				EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
-				EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
-		if (IS_ERR(handle)) {
-			err = PTR_ERR(handle);
-			handle = NULL;
-			goto err_drop_inode;
-		}
-		set_nlink(inode, 1);
-		err = ext4_orphan_del(handle, inode);
+		err = ext4_mark_inode_dirty(handle, inode);
+		if (!err)
+			err = ext4_add_entry(handle, dentry, inode);
 		if (err)
 			goto err_drop_inode;
+
+		d_instantiate_new(dentry, inode);
+		if (IS_DIRSYNC(dir))
+			ext4_handle_sync(handle);
+		if (handle)
+			ext4_journal_stop(handle);
 	} else {
 		/* clear the extent format for fast symlink */
 		ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
-		if (!IS_ENCRYPTED(inode)) {
-			inode->i_op = &ext4_fast_symlink_inode_operations;
-			inode->i_link = (char *)&EXT4_I(inode)->i_data;
-		}
 		memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
 		       disk_link.len);
 		inode->i_size = disk_link.len - 1;
+		EXT4_I(inode)->i_disksize = inode->i_size;
+		err = ext4_add_nondir(handle, dentry, &inode);
+		if (handle)
+			ext4_journal_stop(handle);
+		if (inode)
+			iput(inode);
 	}
-	EXT4_I(inode)->i_disksize = inode->i_size;
-	err = ext4_add_nondir(handle, dentry, &inode);
-	if (handle)
-		ext4_journal_stop(handle);
-	if (inode)
-		iput(inode);
 	goto out_free_encrypted_link;
 
 err_drop_inode:
-	if (handle)
-		ext4_journal_stop(handle);
 	clear_nlink(inode);
+	ext4_orphan_add(handle, inode);
 	unlock_new_inode(inode);
+	if (handle)
+		ext4_journal_stop(handle);
 	iput(inode);
 out_free_encrypted_link:
 	if (disk_link.name != (unsigned char *)symname)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 69109746e6e2..f030f8705986 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -27,7 +27,7 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
 					   struct inode *inode,
 					   struct delayed_call *done)
 {
-	struct page *cpage = NULL;
+	struct buffer_head *bh = NULL;
 	const void *caddr;
 	unsigned int max_size;
 	const char *paddr;
@@ -39,16 +39,19 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
 		caddr = EXT4_I(inode)->i_data;
 		max_size = sizeof(EXT4_I(inode)->i_data);
 	} else {
-		cpage = read_mapping_page(inode->i_mapping, 0, NULL);
-		if (IS_ERR(cpage))
-			return ERR_CAST(cpage);
-		caddr = page_address(cpage);
+		bh = ext4_bread(NULL, inode, 0, 0);
+		if (IS_ERR(bh))
+			return ERR_CAST(bh);
+		if (!bh) {
+			EXT4_ERROR_INODE(inode, "bad symlink.");
+			return ERR_PTR(-EFSCORRUPTED);
+		}
+		caddr = bh->b_data;
 		max_size = inode->i_sb->s_blocksize;
 	}
 
 	paddr = fscrypt_get_symlink(inode, caddr, max_size, done);
-	if (cpage)
-		put_page(cpage);
+	brelse(bh);
 	return paddr;
 }
 
@@ -62,6 +65,31 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns,
 	return fscrypt_symlink_getattr(path, stat);
 }
 
+static void ext4_free_link(void *bh)
+{
+	brelse(bh);
+}
+
+static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
+				 struct delayed_call *callback)
+{
+	struct buffer_head *bh;
+
+	if (!dentry)
+		return ERR_PTR(-ECHILD);
+
+	bh = ext4_bread(NULL, inode, 0, 0);
+	if (IS_ERR(bh))
+		return ERR_CAST(bh);
+	if (!bh) {
+		EXT4_ERROR_INODE(inode, "bad symlink.");
+		return ERR_PTR(-EFSCORRUPTED);
+	}
+
+	set_delayed_call(callback, ext4_free_link, bh);
+	return bh->b_data;
+}
+
 const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 	.get_link	= ext4_encrypted_get_link,
 	.setattr	= ext4_setattr,
@@ -70,7 +98,7 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = {
 };
 
 const struct inode_operations ext4_symlink_inode_operations = {
-	.get_link	= page_get_link,
+	.get_link	= ext4_get_link,
 	.setattr	= ext4_setattr,
 	.getattr	= ext4_getattr,
 	.listxattr	= ext4_listxattr,
-- 
2.31.1


             reply	other threads:[~2022-04-06 11:42 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-06  8:45 Zhang Yi [this message]
2022-04-06 17:17 ` Jan Kara
2022-04-07  8:14   ` Zhang Yi
2022-04-07 13:55     ` Jan Kara
2022-04-08  5:45       ` Zhang Yi
2022-04-11  7:55         ` Jan Kara
2022-04-11 10:50           ` Zhang Yi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220406084503.1961686-1-yi.zhang@huawei.com \
    --to=yi.zhang@huawei.com \
    --cc=adilger.kernel@dilger.ca \
    --cc=jack@suse.cz \
    --cc=linux-ext4@vger.kernel.org \
    --cc=tytso@mit.edu \
    --cc=yebin10@huawei.com \
    --cc=yukuai3@huawei.com \
    --subject='Re: [RFC PATCH] ext4: convert symlink external data block mapping to bdev' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).