All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] Btrfs: fix inode leak on failure to setup whiteout inode in rename
@ 2016-05-05  1:34 fdmanana
  2016-05-05  1:34 ` [PATCH 2/3] Btrfs: unpin logs if rename exchange operation fails fdmanana
  2016-05-05  1:34 ` [PATCH 3/3] Btrfs: pin logs earlier when doing a rename exchange operation fdmanana
  0 siblings, 2 replies; 3+ messages in thread
From: fdmanana @ 2016-05-05  1:34 UTC (permalink / raw)
  To: linux-btrfs

From: Filipe Manana <fdmanana@suse.com>

If we failed to fully setup the whiteout inode during a rename operation
with the whiteout flag, we ended up leaking the inode, not decrementing
its link count nor removing all its items from the fs/subvol tree.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 09947cb..ab64721 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9612,21 +9612,21 @@ static int btrfs_whiteout_for_rename(struct btrfs_trans_handle *trans,
 	ret = btrfs_init_inode_security(trans, inode, dir,
 				&dentry->d_name);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = btrfs_add_nondir(trans, dir, dentry,
 				inode, 0, index);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = btrfs_update_inode(trans, root, inode);
-	if (ret)
-		return ret;
-
+out:
 	unlock_new_inode(inode);
+	if (ret)
+		inode_dec_link_count(inode);
 	iput(inode);
 
-	return 0;
+	return ret;
 }
 
 static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-- 
2.7.0.rc3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] Btrfs: unpin logs if rename exchange operation fails
  2016-05-05  1:34 [PATCH 1/3] Btrfs: fix inode leak on failure to setup whiteout inode in rename fdmanana
@ 2016-05-05  1:34 ` fdmanana
  2016-05-05  1:34 ` [PATCH 3/3] Btrfs: pin logs earlier when doing a rename exchange operation fdmanana
  1 sibling, 0 replies; 3+ messages in thread
From: fdmanana @ 2016-05-05  1:34 UTC (permalink / raw)
  To: linux-btrfs

From: Filipe Manana <fdmanana@suse.com>

If rename exchange operations fail at some point after we pinned any of
the logs, we end up aborting the current transaction but never unpin the
logs, which leaves concurrent tasks that are trying to sync the logs (as
part of an fsync request from user space) blocked forever and preventing
the filesystem from being unmountable.

Fix this by safely unpinning the log.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ab64721..503d749 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9412,6 +9412,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	u64 new_idx = 0;
 	u64 root_objectid;
 	int ret;
+	bool root_log_pinned = false;
+	bool dest_log_pinned = false;
 
 	/* we only allow rename subvolume link between subvolumes */
 	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
@@ -9464,6 +9466,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		if (ret)
 			goto out_fail;
 		btrfs_pin_log_trans(root);
+		root_log_pinned = true;
 	}
 
 	/* And now for the dest. */
@@ -9479,6 +9482,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		if (ret)
 			goto out_fail;
 		btrfs_pin_log_trans(dest);
+		dest_log_pinned = true;
 	}
 
 	/* Update inode version and ctime/mtime. */
@@ -9557,17 +9561,47 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	if (new_inode->i_nlink == 1)
 		BTRFS_I(new_inode)->dir_index = new_idx;
 
-	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+	if (root_log_pinned) {
 		parent = new_dentry->d_parent;
 		btrfs_log_new_name(trans, old_inode, old_dir, parent);
 		btrfs_end_log_trans(root);
+		root_log_pinned = false;
 	}
-	if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+	if (dest_log_pinned) {
 		parent = old_dentry->d_parent;
 		btrfs_log_new_name(trans, new_inode, new_dir, parent);
 		btrfs_end_log_trans(dest);
+		dest_log_pinned = false;
 	}
 out_fail:
+	/*
+	 * If we have pinned a log and an error happened, we unpin tasks
+	 * trying to sync the log and force them to fallback to a transaction
+	 * commit if the log currently contains any of the inodes involved in
+	 * this rename operation (to ensure we do not persist a log with an
+	 * inconsistent state for any of these inodes or leading to any
+	 * inconsistencies when replayed). If the transaction was aborted, the
+	 * abortion reason is propagated to userspace when attempting to commit
+	 * the transaction. If the log does not contain any of these inodes, we
+	 * allow the tasks to sync it.
+	 */
+	if (ret && (root_log_pinned || dest_log_pinned)) {
+		if (btrfs_inode_in_log(old_dir, root->fs_info->generation) ||
+		    btrfs_inode_in_log(new_dir, root->fs_info->generation) ||
+		    btrfs_inode_in_log(old_inode, root->fs_info->generation) ||
+		    (new_inode &&
+		     btrfs_inode_in_log(new_inode, root->fs_info->generation)))
+		    btrfs_set_log_full_commit(root->fs_info, trans);
+
+		if (root_log_pinned) {
+			btrfs_end_log_trans(root);
+			root_log_pinned = false;
+		}
+		if (dest_log_pinned) {
+			btrfs_end_log_trans(dest);
+			dest_log_pinned = false;
+		}
+	}
 	ret = btrfs_end_transaction(trans, root);
 out_notrans:
 	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
-- 
2.7.0.rc3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] Btrfs: pin logs earlier when doing a rename exchange operation
  2016-05-05  1:34 [PATCH 1/3] Btrfs: fix inode leak on failure to setup whiteout inode in rename fdmanana
  2016-05-05  1:34 ` [PATCH 2/3] Btrfs: unpin logs if rename exchange operation fails fdmanana
@ 2016-05-05  1:34 ` fdmanana
  1 sibling, 0 replies; 3+ messages in thread
From: fdmanana @ 2016-05-05  1:34 UTC (permalink / raw)
  To: linux-btrfs

From: Filipe Manana <fdmanana@suse.com>

The btrfs_rename_exchange() started as a copy-paste from btrfs_rename(),
which had a race fixed by my previous patch titled "Btrfs: pin log earlier
when renaming", and so it suffers from the same problem.

We pin the logs of the affected roots after we insert the new inode
references, leaving a time window where concurrent tasks logging the
inodes can end up logging both the new and old references, resulting
in log trees that when replayed can turn the metadata into inconsistent
states. This behaviour was added to btrfs_rename() in 2009 without any
explanation about why not pinning the logs earlier, just leaving a
comment about the posibility for the race. As of today it's perfectly
safe and sane to pin the logs before we start doing any of the steps
involved in the rename operation.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/inode.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 503d749..dab6c08f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9458,6 +9458,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		/* force full log commit if subvolume involved. */
 		btrfs_set_log_full_commit(root->fs_info, trans);
 	} else {
+		btrfs_pin_log_trans(root);
+		root_log_pinned = true;
 		ret = btrfs_insert_inode_ref(trans, dest,
 					     new_dentry->d_name.name,
 					     new_dentry->d_name.len,
@@ -9465,8 +9467,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 					     btrfs_ino(new_dir), old_idx);
 		if (ret)
 			goto out_fail;
-		btrfs_pin_log_trans(root);
-		root_log_pinned = true;
 	}
 
 	/* And now for the dest. */
@@ -9474,6 +9474,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 		/* force full log commit if subvolume involved. */
 		btrfs_set_log_full_commit(dest->fs_info, trans);
 	} else {
+		btrfs_pin_log_trans(dest);
+		dest_log_pinned = true;
 		ret = btrfs_insert_inode_ref(trans, root,
 					     old_dentry->d_name.name,
 					     old_dentry->d_name.len,
@@ -9481,8 +9483,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 					     btrfs_ino(old_dir), new_idx);
 		if (ret)
 			goto out_fail;
-		btrfs_pin_log_trans(dest);
-		dest_log_pinned = true;
 	}
 
 	/* Update inode version and ctime/mtime. */
-- 
2.7.0.rc3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-05-05 16:44 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-05  1:34 [PATCH 1/3] Btrfs: fix inode leak on failure to setup whiteout inode in rename fdmanana
2016-05-05  1:34 ` [PATCH 2/3] Btrfs: unpin logs if rename exchange operation fails fdmanana
2016-05-05  1:34 ` [PATCH 3/3] Btrfs: pin logs earlier when doing a rename exchange operation fdmanana

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.