* [PATCH] Btrfs: implement RENAME_EXCHANGE semantic
@ 2015-04-02 3:56 Davide Italiano
2015-04-02 3:56 ` [PATCH] Btrfs: RENAME_EXCHANGE semantic for renameat2() Davide Italiano
0 siblings, 1 reply; 3+ messages in thread
From: Davide Italiano @ 2015-04-02 3:56 UTC (permalink / raw)
To: linux-btrfs; +Cc: Davide Italiano
This is an attempt to implement RENAME_EXCHANGE in btrfs.
It survived basic testing and I think it's ready for others' feedback.
I'll stress test {and, or} rewrite it depending on people's comments.
Davide Italiano (1):
Btrfs: RENAME_EXCHANGE semantic for renameat2()
fs/btrfs/inode.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 189 insertions(+), 1 deletion(-)
--
2.3.4
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] Btrfs: RENAME_EXCHANGE semantic for renameat2()
2015-04-02 3:56 [PATCH] Btrfs: implement RENAME_EXCHANGE semantic Davide Italiano
@ 2015-04-02 3:56 ` Davide Italiano
2015-04-02 10:05 ` Filipe David Manana
0 siblings, 1 reply; 3+ messages in thread
From: Davide Italiano @ 2015-04-02 3:56 UTC (permalink / raw)
To: linux-btrfs; +Cc: Davide Italiano
Signed-off-by: Davide Italiano <dccitaliano@gmail.com>
---
fs/btrfs/inode.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 189 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d2e732d..49b0867 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8890,6 +8890,190 @@ static int btrfs_getattr(struct vfsmount *mnt,
return 0;
}
+static int btrfs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(old_dir)->root;
+ struct btrfs_root *dest = BTRFS_I(new_dir)->root;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct inode *old_inode = old_dentry->d_inode;
+ struct timespec ctime = CURRENT_TIME;
+ u64 old_ino = btrfs_ino(old_inode);
+ u64 new_ino = btrfs_ino(new_inode);
+ u64 old_idx = 0;
+ u64 new_idx = 0;
+ u64 root_objectid;
+ int ret;
+
+ /* we only allow rename subvolume link between subvolumes */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+ return -EXDEV;
+
+ /* close the racy window with snapshot create/destroy ioctl */
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ down_read(&root->fs_info->subvol_sem);
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ down_read(&dest->fs_info->subvol_sem);
+
+ /*
+ * We want to reserve the absolute worst case amount of items. So if
+ * both inodes are subvols and we need to unlink them then that would
+ * require 4 item modifications, but if they are both normal inodes it
+ * would require 5 item modifications, so we'll assume their normal
+ * inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
+ * should cover the worst case number of items we'll modify.
+ */
+ trans = btrfs_start_transaction(root, 12);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_notrans;
+ }
+
+ /*
+ * We need to find a free sequence number both in the source and
+ * in the destination directory for the exchange.
+ */
+ ret = btrfs_set_inode_index(new_dir, &old_idx);
+ if (ret)
+ goto out_fail;
+ ret = btrfs_set_inode_index(old_dir, &new_idx);
+ if (ret)
+ goto out_fail;
+
+ BTRFS_I(old_inode)->dir_index = 0ULL;
+ BTRFS_I(new_inode)->dir_index = 0ULL;
+
+ /* Reference for the source. */
+ if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ /* force full log commit if subvolume involved. */
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ } else {
+ ret = btrfs_insert_inode_ref(trans, dest,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len,
+ old_ino,
+ btrfs_ino(new_dir), old_idx);
+ if (ret)
+ goto out_fail;
+ btrfs_pin_log_trans(root);
+ }
+
+ /* And now for the dest. */
+ if (unlikely(new_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ /* force full log commit if subvolume involved. */
+ btrfs_set_log_full_commit(dest->fs_info, trans);
+ } else {
+ ret = btrfs_insert_inode_ref(trans, root,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len,
+ new_ino,
+ btrfs_ino(old_dir), new_idx);
+ if (ret)
+ goto out_fail;
+ btrfs_pin_log_trans(dest);
+ }
+
+ /*
+ * Update i-node version and ctime/mtime.
+ */
+ inode_inc_iversion(old_dir);
+ inode_inc_iversion(new_dir);
+ inode_inc_iversion(old_inode);
+ inode_inc_iversion(new_inode);
+ old_dir->i_ctime = old_dir->i_mtime = ctime;
+ new_dir->i_ctime = new_dir->i_mtime = ctime;
+ old_inode->i_ctime = ctime;
+ new_inode->i_ctime = ctime;
+
+ if (old_dentry->d_parent != new_dentry->d_parent) {
+ btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
+ btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
+ }
+
+ /* src is a subvolume */
+ if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
+ ret = btrfs_unlink_subvol(trans, root, old_dir,
+ root_objectid,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ } else { /* src is a inode */
+ ret = __btrfs_unlink_inode(trans, root, old_dir,
+ old_dentry->d_inode,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len);
+ if (!ret)
+ ret = btrfs_update_inode(trans, root, old_inode);
+ }
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
+
+ /* dest is a subvolume */
+ if (unlikely(new_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+ root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
+ ret = btrfs_unlink_subvol(trans, dest, new_dir,
+ root_objectid,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ } else { /* dest is an inode */
+ ret = __btrfs_unlink_inode(trans, dest, new_dir,
+ new_dentry->d_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+ if (!ret)
+ ret = btrfs_update_inode(trans, dest, new_inode);
+ }
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
+
+ ret = btrfs_add_link(trans, new_dir, old_inode,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len, 0, old_idx);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
+
+ ret = btrfs_add_link(trans, old_dir, new_inode,
+ old_dentry->d_name.name,
+ old_dentry->d_name.len, 0, new_idx);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_fail;
+ }
+
+ if (old_inode->i_nlink == 1)
+ BTRFS_I(old_inode)->dir_index = old_idx;
+ if (new_inode->i_nlink == 1)
+ BTRFS_I(new_inode)->dir_index = new_idx;
+
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ struct dentry *parent = new_dentry->d_parent;
+ btrfs_log_new_name(trans, old_inode, old_dir, parent);
+ btrfs_end_log_trans(root);
+ }
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ struct dentry *parent = old_dentry->d_parent;
+ btrfs_log_new_name(trans, new_inode, new_dir, parent);
+ btrfs_end_log_trans(dest);
+ }
+
+out_fail:
+ btrfs_end_transaction(trans, root);
+out_notrans:
+ /* Racy window with snapshot create/destroy ioctl */
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ up_read(&dest->fs_info->subvol_sem);
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ up_read(&root->fs_info->subvol_sem);
+ return 0;
+}
+
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -9074,9 +9258,13 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- if (flags & ~RENAME_NOREPLACE)
+ if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
return -EINVAL;
+ if (flags & RENAME_EXCHANGE)
+ return btrfs_cross_rename(old_dir, old_dentry, new_dir,
+ new_dentry);
+
return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
}
--
2.3.4
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] Btrfs: RENAME_EXCHANGE semantic for renameat2()
2015-04-02 3:56 ` [PATCH] Btrfs: RENAME_EXCHANGE semantic for renameat2() Davide Italiano
@ 2015-04-02 10:05 ` Filipe David Manana
0 siblings, 0 replies; 3+ messages in thread
From: Filipe David Manana @ 2015-04-02 10:05 UTC (permalink / raw)
To: Davide Italiano; +Cc: linux-btrfs
On Thu, Apr 2, 2015 at 4:56 AM, Davide Italiano <dccitaliano@gmail.com> wrote:
> Signed-off-by: Davide Italiano <dccitaliano@gmail.com>
Hi, only skimmed through it, a few small comments below.
I haven't surely tested it as well (I assume you ran all xfstests from
the generic group).
Thanks.
> ---
> fs/btrfs/inode.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 189 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index d2e732d..49b0867 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8890,6 +8890,190 @@ static int btrfs_getattr(struct vfsmount *mnt,
> return 0;
> }
>
> +static int btrfs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
> + struct inode *new_dir, struct dentry *new_dentry)
> +{
> + struct btrfs_trans_handle *trans;
> + struct btrfs_root *root = BTRFS_I(old_dir)->root;
> + struct btrfs_root *dest = BTRFS_I(new_dir)->root;
> + struct inode *new_inode = new_dentry->d_inode;
> + struct inode *old_inode = old_dentry->d_inode;
> + struct timespec ctime = CURRENT_TIME;
> + u64 old_ino = btrfs_ino(old_inode);
> + u64 new_ino = btrfs_ino(new_inode);
> + u64 old_idx = 0;
> + u64 new_idx = 0;
> + u64 root_objectid;
> + int ret;
> +
> + /* we only allow rename subvolume link between subvolumes */
> + if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
> + return -EXDEV;
> +
> + /* close the racy window with snapshot create/destroy ioctl */
> + if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
> + down_read(&root->fs_info->subvol_sem);
> + if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
> + down_read(&dest->fs_info->subvol_sem);
> +
> + /*
> + * We want to reserve the absolute worst case amount of items. So if
> + * both inodes are subvols and we need to unlink them then that would
> + * require 4 item modifications, but if they are both normal inodes it
> + * would require 5 item modifications, so we'll assume their normal
> + * inodes. So 5 * 2 is 10, plus 2 for the new links, so 12 total items
> + * should cover the worst case number of items we'll modify.
> + */
> + trans = btrfs_start_transaction(root, 12);
> + if (IS_ERR(trans)) {
> + ret = PTR_ERR(trans);
> + goto out_notrans;
> + }
> +
> + /*
> + * We need to find a free sequence number both in the source and
> + * in the destination directory for the exchange.
> + */
> + ret = btrfs_set_inode_index(new_dir, &old_idx);
> + if (ret)
> + goto out_fail;
> + ret = btrfs_set_inode_index(old_dir, &new_idx);
> + if (ret)
> + goto out_fail;
> +
> + BTRFS_I(old_inode)->dir_index = 0ULL;
> + BTRFS_I(new_inode)->dir_index = 0ULL;
> +
> + /* Reference for the source. */
> + if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
> + /* force full log commit if subvolume involved. */
> + btrfs_set_log_full_commit(root->fs_info, trans);
> + } else {
> + ret = btrfs_insert_inode_ref(trans, dest,
> + new_dentry->d_name.name,
> + new_dentry->d_name.len,
> + old_ino,
> + btrfs_ino(new_dir), old_idx);
> + if (ret)
> + goto out_fail;
> + btrfs_pin_log_trans(root);
> + }
> +
> + /* And now for the dest. */
> + if (unlikely(new_ino == BTRFS_FIRST_FREE_OBJECTID)) {
> + /* force full log commit if subvolume involved. */
> + btrfs_set_log_full_commit(dest->fs_info, trans);
> + } else {
> + ret = btrfs_insert_inode_ref(trans, root,
> + old_dentry->d_name.name,
> + old_dentry->d_name.len,
> + new_ino,
> + btrfs_ino(old_dir), new_idx);
> + if (ret)
> + goto out_fail;
> + btrfs_pin_log_trans(dest);
> + }
> +
> + /*
> + * Update i-node version and ctime/mtime.
> + */
> + inode_inc_iversion(old_dir);
> + inode_inc_iversion(new_dir);
> + inode_inc_iversion(old_inode);
> + inode_inc_iversion(new_inode);
> + old_dir->i_ctime = old_dir->i_mtime = ctime;
> + new_dir->i_ctime = new_dir->i_mtime = ctime;
> + old_inode->i_ctime = ctime;
> + new_inode->i_ctime = ctime;
> +
> + if (old_dentry->d_parent != new_dentry->d_parent) {
> + btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
> + btrfs_record_unlink_dir(trans, new_dir, new_inode, 1);
> + }
> +
> + /* src is a subvolume */
> + if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
> + root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
> + ret = btrfs_unlink_subvol(trans, root, old_dir,
> + root_objectid,
> + old_dentry->d_name.name,
> + old_dentry->d_name.len);
> + } else { /* src is a inode */
> + ret = __btrfs_unlink_inode(trans, root, old_dir,
> + old_dentry->d_inode,
> + old_dentry->d_name.name,
> + old_dentry->d_name.len);
> + if (!ret)
> + ret = btrfs_update_inode(trans, root, old_inode);
> + }
> + if (ret) {
> + btrfs_abort_transaction(trans, root, ret);
> + goto out_fail;
> + }
> +
> + /* dest is a subvolume */
> + if (unlikely(new_ino == BTRFS_FIRST_FREE_OBJECTID)) {
> + root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
> + ret = btrfs_unlink_subvol(trans, dest, new_dir,
> + root_objectid,
> + new_dentry->d_name.name,
> + new_dentry->d_name.len);
> + } else { /* dest is an inode */
> + ret = __btrfs_unlink_inode(trans, dest, new_dir,
> + new_dentry->d_inode,
> + new_dentry->d_name.name,
> + new_dentry->d_name.len);
> + if (!ret)
> + ret = btrfs_update_inode(trans, dest, new_inode);
> + }
> + if (ret) {
> + btrfs_abort_transaction(trans, root, ret);
> + goto out_fail;
> + }
> +
> + ret = btrfs_add_link(trans, new_dir, old_inode,
> + new_dentry->d_name.name,
> + new_dentry->d_name.len, 0, old_idx);
> + if (ret) {
> + btrfs_abort_transaction(trans, root, ret);
> + goto out_fail;
> + }
> +
> + ret = btrfs_add_link(trans, old_dir, new_inode,
> + old_dentry->d_name.name,
> + old_dentry->d_name.len, 0, new_idx);
> + if (ret) {
> + btrfs_abort_transaction(trans, root, ret);
> + goto out_fail;
> + }
> +
> + if (old_inode->i_nlink == 1)
> + BTRFS_I(old_inode)->dir_index = old_idx;
> + if (new_inode->i_nlink == 1)
> + BTRFS_I(new_inode)->dir_index = new_idx;
> +
> + if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
> + struct dentry *parent = new_dentry->d_parent;
> + btrfs_log_new_name(trans, old_inode, old_dir, parent);
> + btrfs_end_log_trans(root);
> + }
> + if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
> + struct dentry *parent = old_dentry->d_parent;
> + btrfs_log_new_name(trans, new_inode, new_dir, parent);
> + btrfs_end_log_trans(dest);
> + }
> +
> +out_fail:
> + btrfs_end_transaction(trans, root);
btrfs_end_transaction can return an error - for example it decided to
call btrfs_commit_transaction() or the transaction was aborted in the
meanwhile (I know several places in the codebase ignore its return
value, which is wrong).
> +out_notrans:
> + /* Racy window with snapshot create/destroy ioctl */
Comment can go away, it's a bit confusing as it seems to suggest this
is racy - but above when doing the down_read() calls it is said those
are done to avoid races.
> + if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
> + up_read(&dest->fs_info->subvol_sem);
> + if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
> + up_read(&root->fs_info->subvol_sem);
> + return 0;
return ret;
> +}
> +
> static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> struct inode *new_dir, struct dentry *new_dentry)
> {
> @@ -9074,9 +9258,13 @@ static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
> struct inode *new_dir, struct dentry *new_dentry,
> unsigned int flags)
> {
> - if (flags & ~RENAME_NOREPLACE)
> + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
> return -EINVAL;
>
> + if (flags & RENAME_EXCHANGE)
> + return btrfs_cross_rename(old_dir, old_dentry, new_dir,
> + new_dentry);
Nitpick, naming the function as btrfs_rename_exchange instead would
make it much more clear about what it does
> +
> return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
> }
>
> --
> 2.3.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Filipe David Manana,
"Reasonable men adapt themselves to the world.
Unreasonable men adapt the world to themselves.
That's why all progress depends on unreasonable men."
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-04-02 10:05 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-02 3:56 [PATCH] Btrfs: implement RENAME_EXCHANGE semantic Davide Italiano
2015-04-02 3:56 ` [PATCH] Btrfs: RENAME_EXCHANGE semantic for renameat2() Davide Italiano
2015-04-02 10:05 ` Filipe David Manana
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.