From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-wi0-f178.google.com ([209.85.212.178]:48138 "EHLO mail-wi0-f178.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752643Ab2G1NtU (ORCPT ); Sat, 28 Jul 2012 09:49:20 -0400 Received: by wibhr14 with SMTP id hr14so492339wib.1 for ; Sat, 28 Jul 2012 06:49:18 -0700 (PDT) Message-ID: <5013EDD9.7080408@googlemail.com> Date: Sat, 28 Jul 2012 15:49:13 +0200 From: Alexander Block MIME-Version: 1.0 To: Arne Jansen CC: linux-btrfs@vger.kernel.org Subject: Re: [RFC PATCH 7/7] Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive (part 2) References: <1341409108-13567-1-git-send-email-ablock84@googlemail.com> <1341409108-13567-8-git-send-email-ablock84@googlemail.com> <500D328F.7050904@gmx.net> In-Reply-To: <500D328F.7050904@gmx.net> Content-Type: text/plain; charset=ISO-8859-1; format=flowed Sender: linux-btrfs-owner@vger.kernel.org List-ID: On 07/23/2012 01:16 PM, Arne Jansen wrote: > This is a first review run. I ask for more comments in several places. > Maybe these comments can help to dive deeper into a functional review > in a second run. > I'd really appreciate it if you could write a few pages about the > concepts how you decide what to send and when. > It seems there's still a lot of headroom for performance optimizations > cpu/seek-wise. I started do document stuff in http://btrfs.wiki.kernel.org/index.php/Btrfs_Send/Receive There is also a collection for optimizations that I have in mind for later. > All in all I really like this work. > > On 04.07.2012 15:38, Alexander Block wrote: >> This is the second part of the splitted BTRFS_IOC_SEND patch which >> contains the actual send logic. >> >> Signed-off-by: Alexander Block >> --- >> fs/btrfs/ioctl.c | 3 + >> fs/btrfs/send.c | 3246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> fs/btrfs/send.h | 4 + >> 3 files changed, 3253 insertions(+) >> >> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c >> index 8d258cb..9173867 100644 >> --- a/fs/btrfs/ioctl.c >> +++ b/fs/btrfs/ioctl.c >> @@ -54,6 +54,7 @@ >> #include "inode-map.h" >> #include "backref.h" >> #include "rcu-string.h" >> +#include "send.h" >> >> /* Mask out flags that are inappropriate for the given type of inode. */ >> static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) >> @@ -3567,6 +3568,8 @@ long btrfs_ioctl(struct file *file, unsigned int >> return btrfs_ioctl_balance_progress(root, argp); >> case BTRFS_IOC_SET_RECEIVED_SUBVOL: >> return btrfs_ioctl_set_received_subvol(file, argp); >> + case BTRFS_IOC_SEND: >> + return btrfs_ioctl_send(file, argp); >> case BTRFS_IOC_GET_DEV_STATS: >> return btrfs_ioctl_get_dev_stats(root, argp, 0); >> case BTRFS_IOC_GET_AND_RESET_DEV_STATS: >> diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c >> index 47a2557..4d3fcfc 100644 >> --- a/fs/btrfs/send.c >> +++ b/fs/btrfs/send.c >> @@ -1007,3 +1007,3249 @@ out: >> return ret; >> } >> >> +struct backref_ctx { >> + struct send_ctx *sctx; >> + >> + /* number of total found references */ >> + u64 found; >> + >> + /* >> + * used for clones found in send_root. clones found behind cur_objectid >> + * and cur_offset are not considered as allowed clones. >> + */ >> + u64 cur_objectid; >> + u64 cur_offset; >> + >> + /* may be truncated in case it's the last extent in a file */ >> + u64 extent_len; >> + >> + /* Just to check for bugs in backref resolving */ >> + int found_in_send_root; >> +}; >> + >> +static int __clone_root_cmp_bsearch(const void *key, const void *elt) >> +{ >> + u64 root = (u64)key; >> + struct clone_root *cr = (struct clone_root *)elt; >> + >> + if (root< cr->root->objectid) >> + return -1; >> + if (root> cr->root->objectid) >> + return 1; >> + return 0; >> +} >> + >> +static int __clone_root_cmp_sort(const void *e1, const void *e2) >> +{ >> + struct clone_root *cr1 = (struct clone_root *)e1; >> + struct clone_root *cr2 = (struct clone_root *)e2; >> + >> + if (cr1->root->objectid< cr2->root->objectid) >> + return -1; >> + if (cr1->root->objectid> cr2->root->objectid) >> + return 1; >> + return 0; >> +} >> + >> +/* >> + * Called for every backref that is found for the current extent. > > Comment: results are collected in sctx->clone_roots->ino/offset/found_refs > >> + */ >> +static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) >> +{ >> + struct backref_ctx *bctx = ctx_; >> + struct clone_root *found; >> + int ret; >> + u64 i_size; >> + >> + /* First check if the root is in the list of accepted clone sources */ >> + found = bsearch((void *)root, bctx->sctx->clone_roots, >> + bctx->sctx->clone_roots_cnt, >> + sizeof(struct clone_root), >> + __clone_root_cmp_bsearch); >> + if (!found) >> + return 0; >> + >> + if (found->root == bctx->sctx->send_root&& >> + ino == bctx->cur_objectid&& >> + offset == bctx->cur_offset) { >> + bctx->found_in_send_root = 1; > > found_in_send_root_and_cur_ino_offset? I renamed it to found_itself. Hope that's more clear. > >> + } >> + >> + /* >> + * There are inodes that have extents that lie behind it's i_size. Don't > its Fixed. >> + * accept clones from these extents. >> + */ >> + ret = get_inode_info(found->root, ino,&i_size, NULL, NULL, NULL, NULL); >> + if (ret< 0) >> + return ret; >> + >> + if (offset + bctx->extent_len> i_size) >> + return 0; >> + >> + /* >> + * Make sure we don't consider clones from send_root that are >> + * behind the current inode/offset. >> + */ >> + if (found->root == bctx->sctx->send_root) { >> + /* >> + * TODO for the moment we don't accept clones from the inode >> + * that is currently send. We may change this when >> + * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same >> + * file. >> + */ >> + if (ino>= bctx->cur_objectid) >> + return 0; >> + /*if (ino> ctx->cur_objectid) >> + return 0; >> + if (offset + ctx->extent_len> ctx->cur_offset) >> + return 0;*/ > > #if 0 ... #else ... #endif Fixed. > >> + >> + bctx->found++; >> + found->found_refs++; >> + found->ino = ino; >> + found->offset = offset; > > only the last ino is kept? > I removed that return path. Now the code below the if handles that too. >> + return 0; >> + } >> + >> + bctx->found++; >> + found->found_refs++; >> + if (ino< found->ino) { >> + found->ino = ino; >> + found->offset = offset; > > whereas here only the lowest ino is kept. Why? > I take the lowest because then we have the same file as clone source every time when extents got cloned multiple times. >> + } else if (found->ino == ino) { >> + /* >> + * same extent found more then once in the same file. >> + */ >> + if (found->offset> offset + bctx->extent_len) >> + found->offset = offset; > > This is unclear to me. Seems to mean something like > 'find the lowest offset', but not exactly. Some explaination > would be good. Hmm...I don't remember why it was needed. I remember that I added it later for some reason but can't see now why. > >> + } >> + >> + return 0; >> +} >> + >> +/* >> + * path must point to the extent item when called. >> + */ > > What is the purpose of this function? I probably will figure it out > when reading on, but a comment would be nice here. > Added a description of the function. >> +static int find_extent_clone(struct send_ctx *sctx, >> + struct btrfs_path *path, >> + u64 ino, u64 data_offset, >> + u64 ino_size, >> + struct clone_root **found) >> +{ >> + int ret; >> + int extent_type; >> + u64 logical; >> + u64 num_bytes; >> + u64 extent_item_pos; >> + struct btrfs_file_extent_item *fi; >> + struct extent_buffer *eb = path->nodes[0]; >> + struct backref_ctx backref_ctx; > > currently it's still small enough to keep in on stack, maybe a > comment in struct backref_ctx that it is kept on stack would be > nice. > To make sure, I've removed it from the stack and use kmalloc now. >> + struct clone_root *cur_clone_root; >> + struct btrfs_key found_key; >> + struct btrfs_path *tmp_path; >> + u32 i; >> + >> + tmp_path = alloc_path_for_send(); >> + if (!tmp_path) >> + return -ENOMEM; >> + >> + if (data_offset>= ino_size) { >> + /* >> + * There may be extents that lie behind the file's size. >> + * I at least had this in combination with snapshotting while >> + * writing large files. >> + */ >> + ret = 0; >> + goto out; >> + } >> + >> + fi = btrfs_item_ptr(eb, path->slots[0], >> + struct btrfs_file_extent_item); >> + extent_type = btrfs_file_extent_type(eb, fi); >> + if (extent_type == BTRFS_FILE_EXTENT_INLINE) { >> + ret = -ENOENT; >> + goto out; >> + } >> + >> + num_bytes = btrfs_file_extent_num_bytes(eb, fi); >> + logical = btrfs_file_extent_disk_bytenr(eb, fi); >> + if (logical == 0) { >> + ret = -ENOENT; >> + goto out; >> + } >> + logical += btrfs_file_extent_offset(eb, fi); >> + >> + ret = extent_from_logical(sctx->send_root->fs_info, >> + logical, tmp_path,&found_key); >> + btrfs_release_path(tmp_path); >> + >> + if (ret< 0) >> + goto out; >> + if (ret& BTRFS_EXTENT_FLAG_TREE_BLOCK) { >> + ret = -EIO; >> + goto out; >> + } >> + >> + /* >> + * Setup the clone roots. >> + */ >> + for (i = 0; i< sctx->clone_roots_cnt; i++) { >> + cur_clone_root = sctx->clone_roots + i; >> + cur_clone_root->ino = (u64)-1; >> + cur_clone_root->offset = 0; >> + cur_clone_root->found_refs = 0; >> + } >> + >> + backref_ctx.sctx = sctx; >> + backref_ctx.found = 0; >> + backref_ctx.cur_objectid = ino; >> + backref_ctx.cur_offset = data_offset; >> + backref_ctx.found_in_send_root = 0; >> + backref_ctx.extent_len = num_bytes; >> + >> + /* >> + * The last extent of a file may be too large due to page alignment. >> + * We need to adjust extent_len in this case so that the checks in >> + * __iterate_backrefs work. >> + */ >> + if (data_offset + num_bytes>= ino_size) >> + backref_ctx.extent_len = ino_size - data_offset; >> + >> + /* >> + * Now collect all backrefs. >> + */ >> + extent_item_pos = logical - found_key.objectid; >> + ret = iterate_extent_inodes(sctx->send_root->fs_info, >> + found_key.objectid, extent_item_pos, 1, >> + __iterate_backrefs,&backref_ctx); >> + if (ret< 0) >> + goto out; >> + >> + if (!backref_ctx.found_in_send_root) { >> + /* found a bug in backref code? */ >> + ret = -EIO; >> + printk(KERN_ERR "btrfs: ERROR did not find backref in " >> + "send_root. inode=%llu, offset=%llu, " >> + "logical=%llu\n", >> + ino, data_offset, logical); >> + goto out; >> + } >> + >> +verbose_printk(KERN_DEBUG "btrfs: find_extent_clone: data_offset=%llu, " >> + "ino=%llu, " >> + "num_bytes=%llu, logical=%llu\n", >> + data_offset, ino, num_bytes, logical); >> + >> + if (!backref_ctx.found) >> + verbose_printk("btrfs: no clones found\n"); >> + >> + cur_clone_root = NULL; >> + for (i = 0; i< sctx->clone_roots_cnt; i++) { >> + if (sctx->clone_roots[i].found_refs) { >> + if (!cur_clone_root) >> + cur_clone_root = sctx->clone_roots + i; >> + else if (sctx->clone_roots[i].root == sctx->send_root) >> + /* prefer clones from send_root over others */ >> + cur_clone_root = sctx->clone_roots + i; >> + break; > > If you break after the first found ref, you might miss the send_root. > Ay, that's true. Removed the break. >> + } >> + >> + } >> + >> + if (cur_clone_root) { >> + *found = cur_clone_root; >> + ret = 0; >> + } else { >> + ret = -ENOENT; >> + } >> + >> +out: >> + btrfs_free_path(tmp_path); >> + return ret; >> +} >> + >> +static int read_symlink(struct send_ctx *sctx, >> + struct btrfs_root *root, >> + u64 ino, >> + struct fs_path *dest) >> +{ >> + int ret; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_file_extent_item *ei; >> + u8 type; >> + u8 compression; >> + unsigned long off; >> + int len; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + key.objectid = ino; >> + key.type = BTRFS_EXTENT_DATA_KEY; >> + key.offset = 0; >> + ret = btrfs_search_slot(NULL, root,&key, path, 0, 0); >> + if (ret< 0) >> + goto out; >> + BUG_ON(ret); >> + >> + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_file_extent_item); >> + type = btrfs_file_extent_type(path->nodes[0], ei); >> + compression = btrfs_file_extent_compression(path->nodes[0], ei); >> + BUG_ON(type != BTRFS_FILE_EXTENT_INLINE); >> + BUG_ON(compression); >> + >> + off = btrfs_file_extent_inline_start(ei); >> + len = btrfs_file_extent_inline_len(path->nodes[0], ei); >> + >> + ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); >> + if (ret< 0) >> + goto out; > > superfluous > >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +/* >> + * Helper function to generate a file name that is unique in the root of >> + * send_root and parent_root. This is used to generate names for orphan inodes. >> + */ >> +static int gen_unique_name(struct send_ctx *sctx, >> + u64 ino, u64 gen, >> + struct fs_path *dest) >> +{ >> + int ret = 0; >> + struct btrfs_path *path; >> + struct btrfs_dir_item *di; >> + char tmp[64]; >> + int len; >> + u64 idx = 0; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + while (1) { >> + len = snprintf(tmp, sizeof(tmp) - 1, "o%llu-%llu-%llu", >> + ino, gen, idx); > > wouldn't it be easier to just take a uuid? This would save you a lot > of code and especially the need to verify that the name is really > unique, saving seeks. The answer from Alex Lyakas is correct here. We generate unique names that must be the same for every call with the same ino/gen combination. > >> + if (len>= sizeof(tmp)) { >> + /* should really not happen */ >> + ret = -EOVERFLOW; >> + goto out; >> + } >> + >> + di = btrfs_lookup_dir_item(NULL, sctx->send_root, >> + path, BTRFS_FIRST_FREE_OBJECTID, >> + tmp, strlen(tmp), 0); >> + btrfs_release_path(path); >> + if (IS_ERR(di)) { >> + ret = PTR_ERR(di); >> + goto out; >> + } >> + if (di) { >> + /* not unique, try again */ >> + idx++; >> + continue; >> + } >> + >> + if (!sctx->parent_root) { >> + /* unique */ >> + ret = 0; >> + break; >> + } >> + >> + di = btrfs_lookup_dir_item(NULL, sctx->parent_root, >> + path, BTRFS_FIRST_FREE_OBJECTID, >> + tmp, strlen(tmp), 0); >> + btrfs_release_path(path); >> + if (IS_ERR(di)) { >> + ret = PTR_ERR(di); >> + goto out; >> + } >> + if (di) { >> + /* not unique, try again */ >> + idx++; >> + continue; >> + } >> + /* unique */ >> + break; >> + } >> + >> + ret = fs_path_add(dest, tmp, strlen(tmp)); >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +enum inode_state { >> + inode_state_no_change, >> + inode_state_will_create, >> + inode_state_did_create, >> + inode_state_will_delete, >> + inode_state_did_delete, >> +}; >> + >> +static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen) > > don't you want to return a enum inode_state instead of int? > The function also returns error codes. Is it in such cases still preferred to return an enum? >> +{ >> + int ret; >> + int left_ret; >> + int right_ret; >> + u64 left_gen; >> + u64 right_gen; >> + >> + ret = get_inode_info(sctx->send_root, ino, NULL,&left_gen, NULL, NULL, >> + NULL); >> + if (ret< 0&& ret != -ENOENT) >> + goto out; >> + left_ret = ret; >> + >> + if (!sctx->parent_root) { >> + right_ret = -ENOENT; >> + } else { >> + ret = get_inode_info(sctx->parent_root, ino, NULL,&right_gen, >> + NULL, NULL, NULL); >> + if (ret< 0&& ret != -ENOENT) >> + goto out; >> + right_ret = ret; >> + } >> + >> + if (!left_ret&& !right_ret) { >> + if (left_gen == gen&& right_gen == gen) > > Please also use {} here > Fixed. >> + ret = inode_state_no_change; >> + else if (left_gen == gen) { >> + if (ino< sctx->send_progress) >> + ret = inode_state_did_create; >> + else >> + ret = inode_state_will_create; >> + } else if (right_gen == gen) { >> + if (ino< sctx->send_progress) >> + ret = inode_state_did_delete; >> + else >> + ret = inode_state_will_delete; >> + } else { >> + ret = -ENOENT; >> + } >> + } else if (!left_ret) { >> + if (left_gen == gen) { >> + if (ino< sctx->send_progress) >> + ret = inode_state_did_create; >> + else >> + ret = inode_state_will_create; >> + } else { >> + ret = -ENOENT; >> + } >> + } else if (!right_ret) { >> + if (right_gen == gen) { >> + if (ino< sctx->send_progress) >> + ret = inode_state_did_delete; >> + else >> + ret = inode_state_will_delete; >> + } else { >> + ret = -ENOENT; >> + } >> + } else { >> + ret = -ENOENT; >> + } >> + >> +out: >> + return ret; >> +} >> + >> +static int is_inode_existent(struct send_ctx *sctx, u64 ino, u64 gen) >> +{ >> + int ret; >> + >> + ret = get_cur_inode_state(sctx, ino, gen); >> + if (ret< 0) >> + goto out; >> + >> + if (ret == inode_state_no_change || >> + ret == inode_state_did_create || >> + ret == inode_state_will_delete) >> + ret = 1; >> + else >> + ret = 0; >> + >> +out: >> + return ret; >> +} >> + >> +/* >> + * Helper function to lookup a dir item in a dir. >> + */ >> +static int lookup_dir_item_inode(struct btrfs_root *root, >> + u64 dir, const char *name, int name_len, >> + u64 *found_inode, >> + u8 *found_type) >> +{ >> + int ret = 0; >> + struct btrfs_dir_item *di; >> + struct btrfs_key key; >> + struct btrfs_path *path; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + di = btrfs_lookup_dir_item(NULL, root, path, >> + dir, name, name_len, 0); >> + if (!di) { >> + ret = -ENOENT; >> + goto out; >> + } >> + if (IS_ERR(di)) { >> + ret = PTR_ERR(di); >> + goto out; >> + } >> + btrfs_dir_item_key_to_cpu(path->nodes[0], di,&key); >> + *found_inode = key.objectid; >> + *found_type = btrfs_dir_type(path->nodes[0], di); >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +static int get_first_ref(struct send_ctx *sctx, > > The name does not reflect well what the function does. > It's more like get_first_parent_dir or get_first_inode_ref > I did not rename it as we have much more uses of xxx_ref function names which all would need to be renamed. I added a comment which should help to understand the purpose of this function. >> + struct btrfs_root *root, u64 ino, >> + u64 *dir, u64 *dir_gen, struct fs_path *name) >> +{ >> + int ret; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct btrfs_path *path; >> + struct btrfs_inode_ref *iref; >> + int len; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + key.objectid = ino; >> + key.type = BTRFS_INODE_REF_KEY; >> + key.offset = 0; >> + >> + ret = btrfs_search_slot_for_read(root,&key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (!ret) >> + btrfs_item_key_to_cpu(path->nodes[0],&found_key, >> + path->slots[0]); >> + if (ret || found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + ret = -ENOENT; >> + goto out; >> + } >> + >> + iref = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_inode_ref); >> + len = btrfs_inode_ref_name_len(path->nodes[0], iref); >> + ret = fs_path_add_from_extent_buffer(name, path->nodes[0], >> + (unsigned long)(iref + 1), len); >> + if (ret< 0) >> + goto out; >> + btrfs_release_path(path); >> + >> + ret = get_inode_info(root, found_key.offset, NULL, dir_gen, NULL, NULL, >> + NULL); >> + if (ret< 0) >> + goto out; >> + >> + *dir = found_key.offset; >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +static int is_first_ref(struct send_ctx *sctx, >> + struct btrfs_root *root, >> + u64 ino, u64 dir, >> + const char *name, int name_len) >> +{ >> + int ret; >> + struct fs_path *tmp_name; >> + u64 tmp_dir; >> + u64 tmp_dir_gen; >> + >> + tmp_name = fs_path_alloc(sctx); >> + if (!tmp_name) >> + return -ENOMEM; >> + >> + ret = get_first_ref(sctx, root, ino,&tmp_dir,&tmp_dir_gen, tmp_name); >> + if (ret< 0) >> + goto out; >> + >> + if (name_len != fs_path_len(tmp_name)) { >> + ret = 0; >> + goto out; >> + } >> + >> + ret = memcmp(tmp_name->start, name, name_len); > > or just ret = !memcmp...? > Changed to !memcmp. >> + if (ret) >> + ret = 0; >> + else >> + ret = 1; >> + >> +out: >> + fs_path_free(sctx, tmp_name); >> + return ret; >> +} >> + >> +static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen, >> + const char *name, int name_len, >> + u64 *who_ino, u64 *who_gen) >> +{ >> + int ret = 0; >> + u64 other_inode = 0; >> + u8 other_type = 0; >> + >> + if (!sctx->parent_root) >> + goto out; >> + >> + ret = is_inode_existent(sctx, dir, dir_gen); >> + if (ret<= 0) >> + goto out; >> + >> + ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len, >> + &other_inode,&other_type); >> + if (ret< 0&& ret != -ENOENT) >> + goto out; >> + if (ret) { >> + ret = 0; >> + goto out; >> + } >> + >> + if (other_inode> sctx->send_progress) { > > I haven't really grasped what this function does (a comment would be > nice), but I have a feeling that renames might break things when the > parent is not a direct ancenstor. Maybe it gets clearer when I read > on ;) > Hmm in my tests it worked. We have problems when the snapshots have no relation at all, which were reported by Alex Lyakas and which I'll go through later. Added a comment. >> + ret = get_inode_info(sctx->parent_root, other_inode, NULL, >> + who_gen, NULL, NULL, NULL); >> + if (ret< 0) >> + goto out; >> + >> + ret = 1; >> + *who_ino = other_inode; >> + } else { >> + ret = 0; >> + } >> + >> +out: >> + return ret; >> +} >> + Added a comment here. >> +static int did_overwrite_ref(struct send_ctx *sctx, >> + u64 dir, u64 dir_gen, >> + u64 ino, u64 ino_gen, >> + const char *name, int name_len) >> +{ >> + int ret = 0; >> + u64 gen; >> + u64 ow_inode; >> + u8 other_type; >> + >> + if (!sctx->parent_root) >> + goto out; >> + >> + ret = is_inode_existent(sctx, dir, dir_gen); >> + if (ret<= 0) >> + goto out; >> + >> + /* check if the ref was overwritten by another ref */ >> + ret = lookup_dir_item_inode(sctx->send_root, dir, name, name_len, >> + &ow_inode,&other_type); >> + if (ret< 0&& ret != -ENOENT) >> + goto out; >> + if (ret) { >> + /* was never and will never be overwritten */ >> + ret = 0; >> + goto out; >> + } >> + >> + ret = get_inode_info(sctx->send_root, ow_inode, NULL,&gen, NULL, NULL, >> + NULL); >> + if (ret< 0) >> + goto out; >> + >> + if (ow_inode == ino&& gen == ino_gen) { >> + ret = 0; >> + goto out; >> + } >> + >> + /* we know that it is or will be overwritten. check this now */ >> + if (ow_inode< sctx->send_progress) >> + ret = 1; >> + else >> + ret = 0; >> + >> +out: >> + return ret; >> +} >> + Added a comment here. >> +static int did_overwrite_first_ref(struct send_ctx *sctx, u64 ino, u64 gen) >> +{ >> + int ret = 0; >> + struct fs_path *name = NULL; >> + u64 dir; >> + u64 dir_gen; >> + >> + if (!sctx->parent_root) >> + goto out; >> + >> + name = fs_path_alloc(sctx); >> + if (!name) >> + return -ENOMEM; >> + >> + ret = get_first_ref(sctx, sctx->parent_root, ino,&dir,&dir_gen, name); >> + if (ret< 0) >> + goto out; >> + >> + ret = did_overwrite_ref(sctx, dir, dir_gen, ino, gen, >> + name->start, fs_path_len(name)); > >> + if (ret< 0) >> + goto out; > > superfluous > Removed. >> + >> +out: >> + fs_path_free(sctx, name); >> + return ret; >> +} >> + >> +static int name_cache_insert(struct send_ctx *sctx, >> + struct name_cache_entry *nce) >> +{ >> + int ret = 0; >> + struct name_cache_entry **ncea; >> + >> + ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); > > attention: radix_trees take an unsigned long as index, and ino > is a u64. You're in trouble on 32 bit. > Fixed by using the lower 32bits only for 32bit kernels and an additional list to store clashes. >> + if (ncea) { >> + if (!ncea[0]) >> + ncea[0] = nce; >> + else if (!ncea[1]) >> + ncea[1] = nce; >> + else >> + BUG(); >> + } else { >> + ncea = kmalloc(sizeof(void *) * 2, GFP_NOFS); >> + if (!ncea) >> + return -ENOMEM; >> + >> + ncea[0] = nce; >> + ncea[1] = NULL; >> + ret = radix_tree_insert(&sctx->name_cache, nce->ino, ncea); >> + if (ret< 0) >> + return ret; >> + } >> + list_add_tail(&nce->list,&sctx->name_cache_list); >> + sctx->name_cache_size++; >> + >> + return ret; >> +} >> + >> +static void name_cache_delete(struct send_ctx *sctx, >> + struct name_cache_entry *nce) >> +{ >> + struct name_cache_entry **ncea; >> + >> + ncea = radix_tree_lookup(&sctx->name_cache, nce->ino); >> + BUG_ON(!ncea); >> + >> + if (ncea[0] == nce) >> + ncea[0] = NULL; >> + else if (ncea[1] == nce) >> + ncea[1] = NULL; >> + else >> + BUG(); >> + >> + if (!ncea[0]&& !ncea[1]) { >> + radix_tree_delete(&sctx->name_cache, nce->ino); >> + kfree(ncea); >> + } >> + >> + list_del(&nce->list); >> + >> + sctx->name_cache_size--; >> +} >> + >> +static struct name_cache_entry *name_cache_search(struct send_ctx *sctx, >> + u64 ino, u64 gen) >> +{ >> + struct name_cache_entry **ncea; >> + >> + ncea = radix_tree_lookup(&sctx->name_cache, ino); >> + if (!ncea) >> + return NULL; >> + >> + if (ncea[0]&& ncea[0]->gen == gen) >> + return ncea[0]; >> + else if (ncea[1]&& ncea[1]->gen == gen) >> + return ncea[1]; >> + return NULL; >> +} >> + >> +static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce) >> +{ >> + list_del(&nce->list); >> + list_add_tail(&nce->list,&sctx->name_cache_list); >> +} >> + >> +static void name_cache_clean_unused(struct send_ctx *sctx) >> +{ >> + struct name_cache_entry *nce; >> + >> + if (sctx->name_cache_size< SEND_CTX_NAME_CACHE_CLEAN_SIZE) >> + return; > > superfluous, the while condition below is enough. > Please note that the if and the while use different constants. I want to trigger cleanup only after some time and then clean up multiple entries at once. >> + >> + while (sctx->name_cache_size> SEND_CTX_MAX_NAME_CACHE_SIZE) { >> + nce = list_entry(sctx->name_cache_list.next, >> + struct name_cache_entry, list); >> + name_cache_delete(sctx, nce); >> + kfree(nce); >> + } >> +} >> + >> +static void name_cache_free(struct send_ctx *sctx) >> +{ >> + struct name_cache_entry *nce; >> + struct name_cache_entry *tmp; >> + >> + list_for_each_entry_safe(nce, tmp,&sctx->name_cache_list, list) { > > it's easier to just always delete the head until the list is empty. > Saves you the tmp-var. > Changed it to while(!list_empty... >> + name_cache_delete(sctx, nce); >> + } >> +} >> + >> +static int __get_cur_name_and_parent(struct send_ctx *sctx, >> + u64 ino, u64 gen, >> + u64 *parent_ino, >> + u64 *parent_gen, >> + struct fs_path *dest) >> +{ >> + int ret; >> + int nce_ret; >> + struct btrfs_path *path = NULL; >> + struct name_cache_entry *nce = NULL; >> + >> + nce = name_cache_search(sctx, ino, gen); >> + if (nce) { >> + if (ino< sctx->send_progress&& nce->need_later_update) { >> + name_cache_delete(sctx, nce); >> + kfree(nce); >> + nce = NULL; >> + } else { >> + name_cache_used(sctx, nce); >> + *parent_ino = nce->parent_ino; >> + *parent_gen = nce->parent_gen; >> + ret = fs_path_add(dest, nce->name, nce->name_len); >> + if (ret< 0) >> + goto out; >> + ret = nce->ret; >> + goto out; >> + } >> + } >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + ret = is_inode_existent(sctx, ino, gen); >> + if (ret< 0) >> + goto out; >> + >> + if (!ret) { >> + ret = gen_unique_name(sctx, ino, gen, dest); >> + if (ret< 0) >> + goto out; >> + ret = 1; >> + goto out_cache; >> + } >> + >> + if (ino< sctx->send_progress) >> + ret = get_first_ref(sctx, sctx->send_root, ino, >> + parent_ino, parent_gen, dest); >> + else >> + ret = get_first_ref(sctx, sctx->parent_root, ino, >> + parent_ino, parent_gen, dest); >> + if (ret< 0) >> + goto out; >> + >> + ret = did_overwrite_ref(sctx, *parent_ino, *parent_gen, ino, gen, >> + dest->start, dest->end - dest->start); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + fs_path_reset(dest); >> + ret = gen_unique_name(sctx, ino, gen, dest); >> + if (ret< 0) >> + goto out; >> + ret = 1; >> + } >> + >> +out_cache: >> + nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); >> + if (!nce) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + nce->ino = ino; >> + nce->gen = gen; >> + nce->parent_ino = *parent_ino; >> + nce->parent_gen = *parent_gen; >> + nce->name_len = fs_path_len(dest); >> + nce->ret = ret; > > This is a bit too magic for me. ret == 1 iff it's a unique_name? Added more comments. 1 means get_cur_path needs to stop and that we have an orphan inode. > >> + strcpy(nce->name, dest->start); >> + memset(&nce->use_list, 0, sizeof(nce->use_list)); > > use_list is unused, anyway, it's a strange way to initialize a > list_head. There's the INIT_LIST_HEAD macro. use_list is removed now. > >> + >> + if (ino< sctx->send_progress) >> + nce->need_later_update = 0; >> + else >> + nce->need_later_update = 1; >> + >> + nce_ret = name_cache_insert(sctx, nce); >> + if (nce_ret< 0) >> + ret = nce_ret; >> + name_cache_clean_unused(sctx); >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +/* >> + * Magic happens here. This function returns the first ref to an inode as it >> + * would look like while receiving the stream at this point in time. >> + * We walk the path up to the root. For every inode in between, we check if it >> + * was already processed/sent. If yes, we continue with the parent as found >> + * in send_root. If not, we continue with the parent as found in parent_root. >> + * If we encounter an inode that was deleted at this point in time, we use the >> + * inodes "orphan" name instead of the real name and stop. Same with new inodes >> + * that were not created yet and overwritten inodes/refs. >> + * >> + * When do we have have orphan inodes: >> + * 1. When an inode is freshly created and thus no valid refs are available yet >> + * 2. When a directory lost all it's refs (deleted) but still has dir items >> + * inside which were not processed yet (pending for move/delete). If anyone >> + * tried to get the path to the dir items, it would get a path inside that >> + * orphan directory. >> + * 3. When an inode is moved around or gets new links, it may overwrite the ref >> + * of an unprocessed inode. If in that case the first ref would be >> + * overwritten, the overwritten inode gets "orphanized". Later when we >> + * process this overwritten inode, it is restored at a new place by moving >> + * the orphan inode. >> + * >> + * sctx->send_progress tells this function at which point in time receiving >> + * would be. >> + */ > > Thanks for the comment :) > >> +static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, >> + struct fs_path *dest) >> +{ >> + int ret = 0; >> + struct fs_path *name = NULL; >> + u64 parent_inode = 0; >> + u64 parent_gen = 0; >> + int stop = 0; >> + >> + name = fs_path_alloc(sctx); >> + if (!name) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + dest->reversed = 1; >> + fs_path_reset(dest); >> + >> + while (!stop&& ino != BTRFS_FIRST_FREE_OBJECTID) { >> + fs_path_reset(name); >> + >> + ret = __get_cur_name_and_parent(sctx, ino, gen, >> + &parent_inode,&parent_gen, name); >> + if (ret< 0) >> + goto out; >> + if (ret) >> + stop = 1; >> + >> + ret = fs_path_add_path(dest, name); >> + if (ret< 0) >> + goto out; >> + >> + ino = parent_inode; >> + gen = parent_gen; >> + } >> + >> +out: >> + fs_path_free(sctx, name); >> + if (!ret) >> + fs_path_unreverse(dest); >> + return ret; >> +} >> + >> +/* >> + * Called for regular files when sending extents data. Opens a struct file >> + * to read from the file. >> + */ >> +static int open_cur_inode_file(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + struct btrfs_key key; >> + struct vfsmount *mnt; >> + struct inode *inode; >> + struct dentry *dentry; >> + struct file *filp; >> + int new = 0; >> + >> + if (sctx->cur_inode_filp) >> + goto out; >> + >> + key.objectid = sctx->cur_ino; >> + key.type = BTRFS_INODE_ITEM_KEY; >> + key.offset = 0; >> + >> + inode = btrfs_iget(sctx->send_root->fs_info->sb,&key, sctx->send_root, >> + &new); >> + if (IS_ERR(inode)) { >> + ret = PTR_ERR(inode); >> + goto out; >> + } >> + >> + dentry = d_obtain_alias(inode); >> + inode = NULL; >> + if (IS_ERR(dentry)) { >> + ret = PTR_ERR(dentry); >> + goto out; >> + } >> + >> + mnt = mntget(sctx->mnt); >> + filp = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE, current_cred()); >> + dentry = NULL; >> + mnt = NULL; > > It would be good if this part could be reviewed by someone with > deep vfs knowledge. Maybe you can compile those parts into a > separate patch and send it to the appropriate ppl for review. > Linus had to merge parts of this function and did not complain. I will probably still send a new mail regarding this and the other vfs parts. >> + if (IS_ERR(filp)) { >> + ret = PTR_ERR(filp); >> + goto out; >> + } >> + sctx->cur_inode_filp = filp; >> + >> +out: >> + /* >> + * no xxxput required here as every vfs op >> + * does it by itself on failure >> + */ >> + return ret; >> +} >> + >> +/* >> + * Closes the struct file that was created in open_cur_inode_file >> + */ >> +static int close_cur_inode_file(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + >> + if (!sctx->cur_inode_filp) >> + goto out; >> + >> + ret = filp_close(sctx->cur_inode_filp, NULL); >> + sctx->cur_inode_filp = NULL; >> + >> +out: >> + return ret; >> +} >> + >> +/* >> + * Sends a BTRFS_SEND_C_SUBVOL command/item to userspace >> + */ >> +static int send_subvol_begin(struct send_ctx *sctx) >> +{ >> + int ret; >> + struct btrfs_root *send_root = sctx->send_root; >> + struct btrfs_root *parent_root = sctx->parent_root; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_root_ref *ref; >> + struct extent_buffer *leaf; >> + char *name = NULL; >> + int namelen; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); >> + if (!name) { >> + btrfs_free_path(path); >> + return -ENOMEM; >> + } >> + >> + key.objectid = send_root->objectid; >> + key.type = BTRFS_ROOT_BACKREF_KEY; >> + key.offset = 0; >> + >> + ret = btrfs_search_slot_for_read(send_root->fs_info->tree_root, >> + &key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = -ENOENT; >> + goto out; >> + } >> + >> + leaf = path->nodes[0]; >> + btrfs_item_key_to_cpu(leaf,&key, path->slots[0]); >> + if (key.type != BTRFS_ROOT_BACKREF_KEY || >> + key.objectid != send_root->objectid) { >> + ret = -ENOENT; >> + goto out; >> + } > > It looks like we could use a helper for finding the first entry > with a specific objectid+key... > Hmm yepp, I have a lot of places where things like this happen. Will do that later. >> + ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); >> + namelen = btrfs_root_ref_name_len(leaf, ref); >> + read_extent_buffer(leaf, name, (unsigned long)(ref + 1), namelen); >> + btrfs_release_path(path); >> + >> + if (ret< 0) >> + goto out; > > How can ret be< 0 here? Whoops, a leftover. Removed. > >> + >> + if (parent_root) { >> + ret = begin_cmd(sctx, BTRFS_SEND_C_SNAPSHOT); >> + if (ret< 0) >> + goto out; >> + } else { >> + ret = begin_cmd(sctx, BTRFS_SEND_C_SUBVOL); >> + if (ret< 0) >> + goto out; >> + } >> + >> + TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); > > It's called PATH, but it seems to be only the last path component. > What about subvols that are ancored deeper in the dir tree? > Sounds like btrfs_root_ref_name_len does not contain the full path? If not, need to handle that. >> + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, >> + sctx->send_root->root_item.uuid); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, >> + sctx->send_root->root_item.ctransid); >> + if (parent_root) { > > The name of the parent is not sent? > Nope. We can't use it for anything. And when we later allow to receive to an arbitrary path, we can't count on the parent path/name but only on the uuid of the parent. >> + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, >> + sctx->parent_root->root_item.uuid); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, >> + sctx->parent_root->root_item.ctransid); >> + } >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + btrfs_free_path(path); >> + kfree(name); >> + return ret; >> +} >> + >> +static int send_truncate(struct send_ctx *sctx, u64 ino, u64 gen, u64 size) >> +{ >> + int ret = 0; >> + struct fs_path *p; >> + >> +verbose_printk("btrfs: send_truncate %llu size=%llu\n", ino, size); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_TRUNCATE); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, ino, gen, p); >> + if (ret< 0) >> + goto out; >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, size); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int send_chmod(struct send_ctx *sctx, u64 ino, u64 gen, u64 mode) >> +{ >> + int ret = 0; >> + struct fs_path *p; >> + >> +verbose_printk("btrfs: send_chmod %llu mode=%llu\n", ino, mode); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_CHMOD); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, ino, gen, p); >> + if (ret< 0) >> + goto out; >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode& 07777); > > four 7? > >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int send_chown(struct send_ctx *sctx, u64 ino, u64 gen, u64 uid, u64 gid) >> +{ >> + int ret = 0; >> + struct fs_path *p; >> + >> +verbose_printk("btrfs: send_chown %llu uid=%llu, gid=%llu\n", ino, uid, gid); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_CHOWN); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, ino, gen, p); >> + if (ret< 0) >> + goto out; >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_UID, uid); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_GID, gid); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int send_utimes(struct send_ctx *sctx, u64 ino, u64 gen) >> +{ >> + int ret = 0; >> + struct fs_path *p = NULL; >> + struct btrfs_inode_item *ii; >> + struct btrfs_path *path = NULL; >> + struct extent_buffer *eb; >> + struct btrfs_key key; >> + int slot; >> + >> +verbose_printk("btrfs: send_utimes %llu\n", ino); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + path = alloc_path_for_send(); >> + if (!path) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + key.objectid = ino; >> + key.type = BTRFS_INODE_ITEM_KEY; >> + key.offset = 0; >> + ret = btrfs_search_slot(NULL, sctx->send_root,&key, path, 0, 0); >> + if (ret< 0) >> + goto out; > > you don't check for existence. I guess you know it exists, otherwise > you wouldn't end up here... > Yepp, calling this function (and other send_xxx functions) is only allowed on send_root and with existing inodes. >> + >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_UTIMES); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, ino, gen, p); >> + if (ret< 0) >> + goto out; >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_ATIME, eb, >> + btrfs_inode_atime(ii)); >> + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_MTIME, eb, >> + btrfs_inode_mtime(ii)); >> + TLV_PUT_BTRFS_TIMESPEC(sctx, BTRFS_SEND_A_CTIME, eb, >> + btrfs_inode_ctime(ii)); >> + /* TODO otime? */ > > yes, please :) > Can't do that for now. We need to wait for the otime patches to come into upstream before. I changed the comment to make this more clear and also added a TODO to https://btrfs.wiki.kernel.org/index.php/Btrfs_Send/Receive >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +/* >> + * Sends a BTRFS_SEND_C_MKXXX or SYMLINK command to user space. We don't have >> + * a valid path yet because we did not process the refs yet. So, the inode >> + * is created as orphan. >> + */ >> +static int send_create_inode(struct send_ctx *sctx, struct btrfs_path *path, >> + struct btrfs_key *key) >> +{ >> + int ret = 0; >> + struct extent_buffer *eb = path->nodes[0]; >> + struct btrfs_inode_item *ii; >> + struct fs_path *p; >> + int slot = path->slots[0]; >> + int cmd; >> + u64 mode; >> + >> +verbose_printk("btrfs: send_create_inode %llu\n", sctx->cur_ino); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ii = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); >> + mode = btrfs_inode_mode(eb, ii); >> + >> + if (S_ISREG(mode)) >> + cmd = BTRFS_SEND_C_MKFILE; >> + else if (S_ISDIR(mode)) >> + cmd = BTRFS_SEND_C_MKDIR; >> + else if (S_ISLNK(mode)) >> + cmd = BTRFS_SEND_C_SYMLINK; >> + else if (S_ISCHR(mode) || S_ISBLK(mode)) >> + cmd = BTRFS_SEND_C_MKNOD; >> + else if (S_ISFIFO(mode)) >> + cmd = BTRFS_SEND_C_MKFIFO; >> + else if (S_ISSOCK(mode)) >> + cmd = BTRFS_SEND_C_MKSOCK; >> + else { > > normally you'd put {} in all cases if you need it for one. > Fixed that. >> + printk(KERN_WARNING "btrfs: unexpected inode type %o", >> + (int)(mode& S_IFMT)); >> + ret = -ENOTSUPP; >> + goto out; >> + } >> + >> + ret = begin_cmd(sctx, cmd); >> + if (ret< 0) >> + goto out; >> + >> + ret = gen_unique_name(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + >> + if (S_ISLNK(mode)) { >> + fs_path_reset(p); >> + ret = read_symlink(sctx, sctx->send_root, sctx->cur_ino, p); >> + if (ret< 0) >> + goto out; >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p); >> + } else if (S_ISCHR(mode) || S_ISBLK(mode) || >> + S_ISFIFO(mode) || S_ISSOCK(mode)) { >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, btrfs_inode_rdev(eb, ii)); >> + } >> + >> + ret = send_cmd(sctx); >> + if (ret< 0) >> + goto out; >> + >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +struct recorded_ref { >> + struct list_head list; >> + char *dir_path; >> + char *name; >> + struct fs_path *full_path; >> + u64 dir; >> + u64 dir_gen; >> + int dir_path_len; >> + int name_len; >> +}; >> + >> +/* >> + * We need to process new refs before deleted refs, but compare_tree gives us >> + * everything mixed. So we first record all refs and later process them. >> + * This function is a helper to record one ref. >> + */ >> +static int record_ref(struct list_head *head, u64 dir, >> + u64 dir_gen, struct fs_path *path) >> +{ >> + struct recorded_ref *ref; >> + char *tmp; >> + >> + ref = kmalloc(sizeof(*ref), GFP_NOFS); >> + if (!ref) >> + return -ENOMEM; >> + >> + ref->dir = dir; >> + ref->dir_gen = dir_gen; >> + ref->full_path = path; >> + >> + tmp = strrchr(ref->full_path->start, '/'); >> + if (!tmp) { >> + ref->name_len = ref->full_path->end - ref->full_path->start; >> + ref->name = ref->full_path->start; >> + ref->dir_path_len = 0; >> + ref->dir_path = ref->full_path->start; >> + } else { >> + tmp++; >> + ref->name_len = ref->full_path->end - tmp; >> + ref->name = tmp; >> + ref->dir_path = ref->full_path->start; >> + ref->dir_path_len = ref->full_path->end - >> + ref->full_path->start - 1 - ref->name_len; >> + } >> + >> + list_add_tail(&ref->list, head); >> + return 0; >> +} >> + >> +static void __free_recorded_refs(struct send_ctx *sctx, struct list_head *head) >> +{ >> + struct recorded_ref *cur; >> + struct recorded_ref *tmp; >> + >> + list_for_each_entry_safe(cur, tmp, head, list) { >> + fs_path_free(sctx, cur->full_path); >> + kfree(cur); >> + } >> + INIT_LIST_HEAD(head); > > This is a bit non-obvious. You use the _safe-macro as if you're > going to delete each entry, but then you don't delete it and > instead just reset the head. I'd prefer a while(!list_empty())- > list_del-loop here. > Changed to use while(!list_empty... >> +} >> + >> +static void free_recorded_refs(struct send_ctx *sctx) >> +{ >> + __free_recorded_refs(sctx,&sctx->new_refs); >> + __free_recorded_refs(sctx,&sctx->deleted_refs); >> +} >> + >> +/* >> + * Renames/moves a file/dir to it's orphan name. Used when the first > its > >> + * ref of an unprocessed inode gets overwritten and for all non empty >> + * directories. >> + */ >> +static int orphanize_inode(struct send_ctx *sctx, u64 ino, u64 gen, >> + struct fs_path *path) >> +{ >> + int ret; >> + struct fs_path *orphan; >> + >> + orphan = fs_path_alloc(sctx); >> + if (!orphan) >> + return -ENOMEM; >> + >> + ret = gen_unique_name(sctx, ino, gen, orphan); >> + if (ret< 0) >> + goto out; >> + >> + ret = send_rename(sctx, path, orphan); >> + >> +out: >> + fs_path_free(sctx, orphan); >> + return ret; >> +} >> + >> +/* >> + * Returns 1 if a directory can be removed at this point in time. >> + * We check this by iterating all dir items and checking if the inode behind >> + * the dir item was already processed. >> + */ >> +static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) >> +{ >> + int ret = 0; >> + struct btrfs_root *root = sctx->parent_root; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct btrfs_key loc; >> + struct btrfs_dir_item *di; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + key.objectid = dir; >> + key.type = BTRFS_DIR_INDEX_KEY; >> + key.offset = 0; >> + >> + while (1) { >> + ret = btrfs_search_slot_for_read(root,&key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (!ret) { >> + btrfs_item_key_to_cpu(path->nodes[0],&found_key, >> + path->slots[0]); >> + } >> + if (ret || found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + break; >> + } > > another case for the above mentioned helper... > >> + >> + di = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_dir_item); >> + btrfs_dir_item_key_to_cpu(path->nodes[0], di,&loc); >> + >> + if (loc.objectid> send_progress) { >> + ret = 0; >> + goto out; >> + } >> + >> + btrfs_release_path(path); >> + key.offset = found_key.offset + 1; >> + } >> + >> + ret = 1; >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +/* >> + * This does all the move/link/unlink/rmdir magic. >> + */ >> +static int process_recorded_refs(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + struct recorded_ref *cur; >> + struct ulist *check_dirs = NULL; >> + struct ulist_iterator uit; >> + struct ulist_node *un; >> + struct fs_path *valid_path = NULL; >> + u64 ow_inode; >> + u64 ow_gen; >> + int did_overwrite = 0; >> + int is_orphan = 0; >> + >> +verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); >> + >> + valid_path = fs_path_alloc(sctx); >> + if (!valid_path) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + check_dirs = ulist_alloc(GFP_NOFS); >> + if (!check_dirs) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + /* >> + * First, check if the first ref of the current inode was overwritten >> + * before. If yes, we know that the current inode was already orphanized >> + * and thus use the orphan name. If not, we can use get_cur_path to >> + * get the path of the first ref as it would like while receiving at >> + * this point in time. >> + * New inodes are always orphan at the beginning, so force to use the >> + * orphan name in this case. >> + * The first ref is stored in valid_path and will be updated if it >> + * gets moved around. >> + */ >> + if (!sctx->cur_inode_new) { >> + ret = did_overwrite_first_ref(sctx, sctx->cur_ino, >> + sctx->cur_inode_gen); >> + if (ret< 0) >> + goto out; >> + if (ret) >> + did_overwrite = 1; >> + } >> + if (sctx->cur_inode_new || did_overwrite) { >> + ret = gen_unique_name(sctx, sctx->cur_ino, >> + sctx->cur_inode_gen, valid_path); >> + if (ret< 0) >> + goto out; >> + is_orphan = 1; >> + } else { >> + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, >> + valid_path); >> + if (ret< 0) >> + goto out; >> + } >> + >> + list_for_each_entry(cur,&sctx->new_refs, list) { >> + /* >> + * Check if this new ref would overwrite the first ref of >> + * another unprocessed inode. If yes, orphanize the >> + * overwritten inode. If we find an overwritten ref that is >> + * not the first ref, simply unlink it. >> + */ >> + ret = will_overwrite_ref(sctx, cur->dir, cur->dir_gen, >> + cur->name, cur->name_len, >> + &ow_inode,&ow_gen); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = is_first_ref(sctx, sctx->parent_root, >> + ow_inode, cur->dir, cur->name, >> + cur->name_len); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = orphanize_inode(sctx, ow_inode, ow_gen, >> + cur->full_path); >> + if (ret< 0) >> + goto out; >> + } else { >> + ret = send_unlink(sctx, cur->full_path); >> + if (ret< 0) >> + goto out; >> + } >> + } >> + >> + /* >> + * link/move the ref to the new place. If we have an orphan >> + * inode, move it and update valid_path. If not, link or move >> + * it depending on the inode mode. >> + */ >> + if (is_orphan) { >> + ret = send_rename(sctx, valid_path, cur->full_path); >> + if (ret< 0) >> + goto out; >> + is_orphan = 0; >> + ret = fs_path_copy(valid_path, cur->full_path); >> + if (ret< 0) >> + goto out; >> + } else { >> + if (S_ISDIR(sctx->cur_inode_mode)) { > > why not save a level of indentation here by using? > The if does not exist anymore due to a recent patch. >> + /* >> + * Dirs can't be linked, so move it. For moved >> + * dirs, we always have one new and one deleted >> + * ref. The deleted ref is ignored later. >> + */ >> + ret = send_rename(sctx, valid_path, >> + cur->full_path); >> + if (ret< 0) >> + goto out; >> + ret = fs_path_copy(valid_path, cur->full_path); >> + if (ret< 0) >> + goto out; >> + } else { >> + ret = send_link(sctx, valid_path, >> + cur->full_path); >> + if (ret< 0) >> + goto out; >> + } >> + } >> + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, > > careful, aux is only an unsigned long, meant to be as large as a pointer. > Will make aux 64bit. >> + GFP_NOFS); >> + if (ret< 0) >> + goto out; >> + } >> + >> + if (S_ISDIR(sctx->cur_inode_mode)&& sctx->cur_inode_deleted) { >> + /* >> + * Check if we can already rmdir the directory. If not, >> + * orphanize it. For every dir item inside that gets deleted >> + * later, we do this check again and rmdir it then if possible. >> + * See the use of check_dirs for more details. >> + */ >> + ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = send_rmdir(sctx, valid_path); >> + if (ret< 0) >> + goto out; >> + } else if (!is_orphan) { >> + ret = orphanize_inode(sctx, sctx->cur_ino, >> + sctx->cur_inode_gen, valid_path); >> + if (ret< 0) >> + goto out; >> + is_orphan = 1; >> + } >> + >> + list_for_each_entry(cur,&sctx->deleted_refs, list) { >> + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, >> + GFP_NOFS); >> + if (ret< 0) >> + goto out; >> + } >> + } else if (!S_ISDIR(sctx->cur_inode_mode)) { >> + /* >> + * We have a non dir inode. Go through all deleted refs and >> + * unlink them if they were not already overwritten by other >> + * inodes. >> + */ >> + list_for_each_entry(cur,&sctx->deleted_refs, list) { >> + ret = did_overwrite_ref(sctx, cur->dir, cur->dir_gen, >> + sctx->cur_ino, sctx->cur_inode_gen, >> + cur->name, cur->name_len); >> + if (ret< 0) >> + goto out; >> + if (!ret) { >> + ret = send_unlink(sctx, cur->full_path); >> + if (ret< 0) >> + goto out; >> + } >> + ret = ulist_add(check_dirs, cur->dir, cur->dir_gen, >> + GFP_NOFS); >> + if (ret< 0) >> + goto out; >> + } >> + >> + /* >> + * If the inode is still orphan, unlink the orphan. This may >> + * happen when a previous inode did overwrite the first ref >> + * of this inode and no new refs were added for the current >> + * inode. >> + */ >> + if (is_orphan) { >> + ret = send_unlink(sctx, valid_path); >> + if (ret< 0) >> + goto out; >> + } >> + } >> + >> + /* >> + * We did collect all parent dirs where cur_inode was once located. We >> + * now go through all these dirs and check if they are pending for >> + * deletion and if it's finally possible to perform the rmdir now. >> + * We also update the inode stats of the parent dirs here. >> + */ >> + ULIST_ITER_INIT(&uit); >> + while ((un = ulist_next(check_dirs,&uit))) { >> + if (un->val> sctx->cur_ino) >> + continue; >> + >> + ret = get_cur_inode_state(sctx, un->val, un->aux); >> + if (ret< 0) >> + goto out; >> + >> + if (ret == inode_state_did_create || >> + ret == inode_state_no_change) { >> + /* TODO delayed utimes */ >> + ret = send_utimes(sctx, un->val, un->aux); >> + if (ret< 0) >> + goto out; >> + } else if (ret == inode_state_did_delete) { >> + ret = can_rmdir(sctx, un->val, sctx->cur_ino); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = get_cur_path(sctx, un->val, un->aux, >> + valid_path); >> + if (ret< 0) >> + goto out; >> + ret = send_rmdir(sctx, valid_path); >> + if (ret< 0) >> + goto out; >> + } >> + } >> + } >> + >> + /* >> + * Current inode is now at it's new position, so we must increase > its >> + * send_progress >> + */ >> + sctx->send_progress = sctx->cur_ino + 1; > > is this the right place for it, or should be done at the calling > site? > You're right, the caller should update send_progress. Especially as currently send_progress gets updated too early in the cur_inode_new_gen case (as reported by Alex Lyakas). >> + >> + ret = 0; >> + >> +out: >> + free_recorded_refs(sctx); >> + ulist_free(check_dirs); >> + fs_path_free(sctx, valid_path); >> + return ret; >> +} >> + >> +static int __record_new_ref(int num, u64 dir, int index, >> + struct fs_path *name, >> + void *ctx) >> +{ >> + int ret = 0; >> + struct send_ctx *sctx = ctx; >> + struct fs_path *p; >> + u64 gen; >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = get_inode_info(sctx->send_root, dir, NULL,&gen, NULL, NULL, >> + NULL); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, dir, gen, p); >> + if (ret< 0) >> + goto out; >> + ret = fs_path_add_path(p, name); >> + if (ret< 0) >> + goto out; >> + >> + ret = record_ref(&sctx->new_refs, dir, gen, p); >> + >> +out: >> + if (ret) >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int __record_deleted_ref(int num, u64 dir, int index, >> + struct fs_path *name, >> + void *ctx) >> +{ >> + int ret = 0; >> + struct send_ctx *sctx = ctx; >> + struct fs_path *p; >> + u64 gen; >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = get_inode_info(sctx->parent_root, dir, NULL,&gen, NULL, NULL, >> + NULL); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, dir, gen, p); >> + if (ret< 0) >> + goto out; >> + ret = fs_path_add_path(p, name); >> + if (ret< 0) >> + goto out; >> + >> + ret = record_ref(&sctx->deleted_refs, dir, gen, p); >> + >> +out: >> + if (ret) >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int record_new_ref(struct send_ctx *sctx) >> +{ >> + int ret; >> + >> + ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, >> + sctx->cmp_key, 0, __record_new_ref, sctx); >> + >> + return ret; >> +} >> + >> +static int record_deleted_ref(struct send_ctx *sctx) >> +{ >> + int ret; >> + >> + ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, 0, __record_deleted_ref, sctx); >> + return ret; >> +} >> + >> +struct find_ref_ctx { >> + u64 dir; >> + struct fs_path *name; >> + int found_idx; >> +}; >> + >> +static int __find_iref(int num, u64 dir, int index, >> + struct fs_path *name, >> + void *ctx_) >> +{ >> + struct find_ref_ctx *ctx = ctx_; >> + >> + if (dir == ctx->dir&& fs_path_len(name) == fs_path_len(ctx->name)&& >> + strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) { >> + ctx->found_idx = num; >> + return 1; >> + } >> + return 0; >> +} >> + >> +static int find_iref(struct send_ctx *sctx, >> + struct btrfs_root *root, >> + struct btrfs_path *path, >> + struct btrfs_key *key, >> + u64 dir, struct fs_path *name) >> +{ >> + int ret; >> + struct find_ref_ctx ctx; >> + >> + ctx.dir = dir; >> + ctx.name = name; >> + ctx.found_idx = -1; >> + >> + ret = iterate_inode_ref(sctx, root, path, key, 0, __find_iref,&ctx); >> + if (ret< 0) >> + return ret; >> + >> + if (ctx.found_idx == -1) >> + return -ENOENT; >> + >> + return ctx.found_idx; >> +} >> + >> +static int __record_changed_new_ref(int num, u64 dir, int index, >> + struct fs_path *name, >> + void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + >> + ret = find_iref(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, dir, name); >> + if (ret == -ENOENT) >> + ret = __record_new_ref(num, dir, index, name, sctx); >> + else if (ret> 0) >> + ret = 0; >> + >> + return ret; >> +} >> + >> +static int __record_changed_deleted_ref(int num, u64 dir, int index, >> + struct fs_path *name, >> + void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + >> + ret = find_iref(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, >> + dir, name); >> + if (ret == -ENOENT) >> + ret = __record_deleted_ref(num, dir, index, name, sctx); >> + else if (ret> 0) >> + ret = 0; >> + >> + return ret; >> +} >> + >> +static int record_changed_ref(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + >> + ret = iterate_inode_ref(sctx, sctx->send_root, sctx->left_path, >> + sctx->cmp_key, 0, __record_changed_new_ref, sctx); >> + if (ret< 0) >> + goto out; >> + ret = iterate_inode_ref(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, 0, __record_changed_deleted_ref, sctx); >> + >> +out: >> + return ret; >> +} >> + >> +/* >> + * Record and process all refs at once. Needed when an inode changes the >> + * generation number, which means that it was deleted and recreated. >> + */ >> +static int process_all_refs(struct send_ctx *sctx, >> + enum btrfs_compare_tree_result cmd) >> +{ >> + int ret; >> + struct btrfs_root *root; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct extent_buffer *eb; >> + int slot; >> + iterate_inode_ref_t cb; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + if (cmd == BTRFS_COMPARE_TREE_NEW) { >> + root = sctx->send_root; >> + cb = __record_new_ref; >> + } else if (cmd == BTRFS_COMPARE_TREE_DELETED) { >> + root = sctx->parent_root; >> + cb = __record_deleted_ref; >> + } else { >> + BUG(); >> + } >> + >> + key.objectid = sctx->cmp_key->objectid; >> + key.type = BTRFS_INODE_REF_KEY; >> + key.offset = 0; >> + while (1) { >> + ret = btrfs_search_slot_for_read(root,&key, path, 1, 0); >> + if (ret< 0) { >> + btrfs_release_path(path); > > not needed Removed. > >> + goto out; >> + } >> + if (ret) { >> + btrfs_release_path(path); > > ditto Was needed here as we have a call to process_recorded_refs after the break. But I moved the release out of the loop so that we don't have that duplicated for the next break. > >> + break; >> + } >> + >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + btrfs_item_key_to_cpu(eb,&found_key, slot); >> + >> + if (found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + btrfs_release_path(path); > > and here See above. > >> + break; >> + } > > helper :) > >> + >> + ret = iterate_inode_ref(sctx, sctx->parent_root, path, >> + &found_key, 0, cb, sctx); >> + btrfs_release_path(path); >> + if (ret< 0) >> + goto out; >> + >> + key.offset = found_key.offset + 1; >> + } >> + >> + ret = process_recorded_refs(sctx); >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +static int send_set_xattr(struct send_ctx *sctx, >> + struct fs_path *path, >> + const char *name, int name_len, >> + const char *data, int data_len) >> +{ >> + int ret = 0; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_SET_XATTR); >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); >> + TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); >> + TLV_PUT(sctx, BTRFS_SEND_A_XATTR_DATA, data, data_len); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + return ret; >> +} >> + >> +static int send_remove_xattr(struct send_ctx *sctx, >> + struct fs_path *path, >> + const char *name, int name_len) >> +{ >> + int ret = 0; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_REMOVE_XATTR); >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path); >> + TLV_PUT_STRING(sctx, BTRFS_SEND_A_XATTR_NAME, name, name_len); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + return ret; >> +} >> + >> +static int __process_new_xattr(int num, const char *name, int name_len, >> + const char *data, int data_len, >> + u8 type, void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + struct fs_path *p; >> + posix_acl_xattr_header dummy_acl; >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + /* >> + * This hack is needed because empty acl's are stored as zero byte >> + * data in xattrs. Problem with that is, that receiving these zero byte >> + * acl's will fail later. To fix this, we send a dummy acl list that >> + * only contains the version number and no entries. >> + */ >> + if (!strncmp(name, XATTR_NAME_POSIX_ACL_ACCESS, name_len) || >> + !strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, name_len)) { >> + if (data_len == 0) { >> + dummy_acl.a_version = >> + cpu_to_le32(POSIX_ACL_XATTR_VERSION); >> + data = (char *)&dummy_acl; >> + data_len = sizeof(dummy_acl); >> + } >> + } >> + >> + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); >> + if (ret< 0) >> + goto out; >> + >> + ret = send_set_xattr(sctx, p, name, name_len, data, data_len); >> + >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int __process_deleted_xattr(int num, const char *name, int name_len, >> + const char *data, int data_len, >> + u8 type, void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + struct fs_path *p; >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); >> + if (ret< 0) >> + goto out; >> + >> + ret = send_remove_xattr(sctx, p, name, name_len); >> + >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int process_new_xattr(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + >> + ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, >> + sctx->cmp_key, __process_new_xattr, sctx); >> + >> + return ret; >> +} >> + >> +static int process_deleted_xattr(struct send_ctx *sctx) >> +{ >> + int ret; >> + >> + ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, __process_deleted_xattr, sctx); >> + >> + return ret; >> +} >> + >> +struct find_xattr_ctx { >> + const char *name; >> + int name_len; >> + int found_idx; >> + char *found_data; >> + int found_data_len; >> +}; >> + >> +static int __find_xattr(int num, const char *name, int name_len, >> + const char *data, int data_len, >> + u8 type, void *vctx) >> +{ >> + struct find_xattr_ctx *ctx = vctx; >> + >> + if (name_len == ctx->name_len&& >> + strncmp(name, ctx->name, name_len) == 0) { >> + ctx->found_idx = num; >> + ctx->found_data_len = data_len; >> + ctx->found_data = kmalloc(data_len, GFP_NOFS); >> + if (!ctx->found_data) >> + return -ENOMEM; >> + memcpy(ctx->found_data, data, data_len); >> + return 1; >> + } >> + return 0; >> +} >> + >> +static int find_xattr(struct send_ctx *sctx, >> + struct btrfs_root *root, >> + struct btrfs_path *path, >> + struct btrfs_key *key, >> + const char *name, int name_len, >> + char **data, int *data_len) >> +{ >> + int ret; >> + struct find_xattr_ctx ctx; >> + >> + ctx.name = name; >> + ctx.name_len = name_len; >> + ctx.found_idx = -1; >> + ctx.found_data = NULL; >> + ctx.found_data_len = 0; >> + >> + ret = iterate_dir_item(sctx, root, path, key, __find_xattr,&ctx); >> + if (ret< 0) >> + return ret; >> + >> + if (ctx.found_idx == -1) >> + return -ENOENT; >> + if (data) { >> + *data = ctx.found_data; >> + *data_len = ctx.found_data_len; >> + } else { >> + kfree(ctx.found_data); >> + } >> + return ctx.found_idx; >> +} >> + >> + >> +static int __process_changed_new_xattr(int num, const char *name, int name_len, >> + const char *data, int data_len, >> + u8 type, void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + char *found_data = NULL; >> + int found_data_len = 0; >> + struct fs_path *p = NULL; >> + >> + ret = find_xattr(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, name, name_len,&found_data, >> + &found_data_len); >> + if (ret == -ENOENT) { >> + ret = __process_new_xattr(num, name, name_len, data, data_len, >> + type, ctx); >> + } else if (ret>= 0) { >> + if (data_len != found_data_len || >> + memcmp(data, found_data, data_len)) { >> + ret = __process_new_xattr(num, name, name_len, data, >> + data_len, type, ctx); >> + } else { >> + ret = 0; >> + } >> + } >> + >> + kfree(found_data); >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int __process_changed_deleted_xattr(int num, const char *name, >> + int name_len, >> + const char *data, int data_len, >> + u8 type, void *ctx) >> +{ >> + int ret; >> + struct send_ctx *sctx = ctx; >> + >> + ret = find_xattr(sctx, sctx->send_root, sctx->left_path, sctx->cmp_key, >> + name, name_len, NULL, NULL); >> + if (ret == -ENOENT) >> + ret = __process_deleted_xattr(num, name, name_len, data, >> + data_len, type, ctx); >> + else if (ret>= 0) >> + ret = 0; >> + >> + return ret; >> +} >> + >> +static int process_changed_xattr(struct send_ctx *sctx) >> +{ >> + int ret = 0; >> + >> + ret = iterate_dir_item(sctx, sctx->send_root, sctx->left_path, >> + sctx->cmp_key, __process_changed_new_xattr, sctx); >> + if (ret< 0) >> + goto out; >> + ret = iterate_dir_item(sctx, sctx->parent_root, sctx->right_path, >> + sctx->cmp_key, __process_changed_deleted_xattr, sctx); >> + >> +out: >> + return ret; >> +} >> + >> +static int process_all_new_xattrs(struct send_ctx *sctx) >> +{ >> + int ret; >> + struct btrfs_root *root; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct extent_buffer *eb; >> + int slot; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + root = sctx->send_root; >> + >> + key.objectid = sctx->cmp_key->objectid; >> + key.type = BTRFS_XATTR_ITEM_KEY; >> + key.offset = 0; >> + while (1) { >> + ret = btrfs_search_slot_for_read(root,&key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = 0; >> + goto out; >> + } >> + >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + btrfs_item_key_to_cpu(eb,&found_key, slot); >> + >> + if (found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + ret = 0; >> + goto out; >> + } > > helper... > >> + >> + ret = iterate_dir_item(sctx, root, path,&found_key, >> + __process_new_xattr, sctx); >> + if (ret< 0) >> + goto out; >> + >> + btrfs_release_path(path); >> + key.offset = found_key.offset + 1; >> + } >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +/* >> + * Read some bytes from the current inode/file and send a write command to >> + * user space. >> + */ >> +static int send_write(struct send_ctx *sctx, u64 offset, u32 len) >> +{ >> + int ret = 0; >> + struct fs_path *p; >> + loff_t pos = offset; >> + int readed; >> + mm_segment_t old_fs; >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + /* >> + * vfs normally only accepts user space buffers for security reasons. >> + * we only read from the file and also only provide the read_buf buffer >> + * to vfs. As this buffer does not come from a user space call, it's >> + * ok to temporary allow kernel space buffers. >> + */ >> + old_fs = get_fs(); >> + set_fs(KERNEL_DS); >> + >> +verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len); >> + >> + ret = open_cur_inode_file(sctx); >> + if (ret< 0) >> + goto out; >> + >> + ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len,&pos); >> + if (ret< 0) >> + goto out; >> + readed = ret; > > num_read? > renamed. >> + if (!readed) >> + goto out; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); >> + TLV_PUT(sctx, BTRFS_SEND_A_DATA, sctx->read_buf, readed); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + set_fs(old_fs); >> + if (ret< 0) >> + return ret; >> + return readed; >> +} >> + >> +/* >> + * Send a clone command to user space. >> + */ >> +static int send_clone(struct send_ctx *sctx, >> + u64 offset, u32 len, >> + struct clone_root *clone_root) >> +{ >> + int ret = 0; >> + struct btrfs_root *clone_root2 = clone_root->root; > > a name from hell :) > Removed that one completely and using clone_root->root below. >> + struct fs_path *p; >> + u64 gen; >> + >> +verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, " >> + "clone_inode=%llu, clone_offset=%llu\n", offset, len, >> + clone_root->root->objectid, clone_root->ino, >> + clone_root->offset); >> + >> + p = fs_path_alloc(sctx); >> + if (!p) >> + return -ENOMEM; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_CLONE); >> + if (ret< 0) >> + goto out; >> + >> + ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_LEN, len); >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); >> + >> + if (clone_root2 == sctx->send_root) { >> + ret = get_inode_info(sctx->send_root, clone_root->ino, NULL, >> + &gen, NULL, NULL, NULL); >> + if (ret< 0) >> + goto out; >> + ret = get_cur_path(sctx, clone_root->ino, gen, p); >> + } else { >> + ret = get_inode_path(sctx, clone_root2, clone_root->ino, p); >> + } >> + if (ret< 0) >> + goto out; >> + >> + TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID, >> + clone_root2->root_item.uuid); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID, >> + clone_root2->root_item.ctransid); >> + TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p); >> + TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_OFFSET, >> + clone_root->offset); >> + >> + ret = send_cmd(sctx); >> + >> +tlv_put_failure: >> +out: >> + fs_path_free(sctx, p); >> + return ret; >> +} >> + >> +static int send_write_or_clone(struct send_ctx *sctx, >> + struct btrfs_path *path, >> + struct btrfs_key *key, >> + struct clone_root *clone_root) >> +{ >> + int ret = 0; >> + struct btrfs_file_extent_item *ei; >> + u64 offset = key->offset; >> + u64 pos = 0; >> + u64 len; >> + u32 l; >> + u8 type; >> + >> + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], >> + struct btrfs_file_extent_item); >> + type = btrfs_file_extent_type(path->nodes[0], ei); >> + if (type == BTRFS_FILE_EXTENT_INLINE) >> + len = btrfs_file_extent_inline_len(path->nodes[0], ei); >> + else >> + len = btrfs_file_extent_num_bytes(path->nodes[0], ei); > > BTRFS_FILE_EXTENT_PREALLOC? > Isn't num_bytes also valid for PREALLOC? >> + >> + if (offset + len> sctx->cur_inode_size) >> + len = sctx->cur_inode_size - offset; >> + if (len == 0) { >> + ret = 0; >> + goto out; >> + } >> + >> + if (!clone_root) { >> + while (pos< len) { >> + l = len - pos; >> + if (l> BTRFS_SEND_READ_SIZE) >> + l = BTRFS_SEND_READ_SIZE; >> + ret = send_write(sctx, pos + offset, l); >> + if (ret< 0) >> + goto out; >> + if (!ret) >> + break; >> + pos += ret; >> + } >> + ret = 0; >> + } else { >> + ret = send_clone(sctx, offset, len, clone_root); >> + } >> + >> +out: >> + return ret; >> +} >> + >> +static int is_extent_unchanged(struct send_ctx *sctx, >> + struct btrfs_path *left_path, >> + struct btrfs_key *ekey) >> +{ >> + int ret = 0; >> + struct btrfs_key key; >> + struct btrfs_path *path = NULL; >> + struct extent_buffer *eb; >> + int slot; >> + struct btrfs_key found_key; >> + struct btrfs_file_extent_item *ei; >> + u64 left_disknr; >> + u64 right_disknr; >> + u64 left_offset; >> + u64 right_offset; >> + u64 left_len; >> + u64 right_len; >> + u8 left_type; >> + u8 right_type; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + eb = left_path->nodes[0]; >> + slot = left_path->slots[0]; >> + >> + ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); >> + left_type = btrfs_file_extent_type(eb, ei); >> + left_disknr = btrfs_file_extent_disk_bytenr(eb, ei); >> + left_len = btrfs_file_extent_num_bytes(eb, ei); >> + left_offset = btrfs_file_extent_offset(eb, ei); >> + >> + if (left_type != BTRFS_FILE_EXTENT_REG) { >> + ret = 0; >> + goto out; >> + } >> + >> + key.objectid = ekey->objectid; >> + key.type = BTRFS_EXTENT_DATA_KEY; >> + key.offset = ekey->offset; >> + >> + while (1) { >> + ret = btrfs_search_slot_for_read(sctx->parent_root,&key, path, >> + 0, 0); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = 0; >> + goto out; >> + } >> + btrfs_item_key_to_cpu(path->nodes[0],&found_key, >> + path->slots[0]); >> + if (found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + ret = 0; >> + goto out; >> + } >> + > > helper... > >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + >> + ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); >> + right_type = btrfs_file_extent_type(eb, ei); >> + right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); >> + right_len = btrfs_file_extent_num_bytes(eb, ei); >> + right_offset = btrfs_file_extent_offset(eb, ei); >> + btrfs_release_path(path); >> + >> + if (right_type != BTRFS_FILE_EXTENT_REG) { >> + ret = 0; >> + goto out; >> + } >> + >> + if (left_disknr != right_disknr) { >> + ret = 0; >> + goto out; >> + } >> + >> + key.offset = found_key.offset + right_len; >> + if (key.offset>= ekey->offset + left_len) { >> + ret = 1; >> + goto out; >> + } >> + } >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +static int process_extent(struct send_ctx *sctx, >> + struct btrfs_path *path, >> + struct btrfs_key *key) >> +{ >> + int ret = 0; >> + struct clone_root *found_clone = NULL; >> + >> + if (S_ISLNK(sctx->cur_inode_mode)) >> + return 0; >> + >> + if (sctx->parent_root&& !sctx->cur_inode_new) { >> + ret = is_extent_unchanged(sctx, path, key); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = 0; >> + goto out; >> + } >> + } >> + >> + ret = find_extent_clone(sctx, path, key->objectid, key->offset, >> + sctx->cur_inode_size,&found_clone); >> + if (ret != -ENOENT&& ret< 0) >> + goto out; >> + >> + ret = send_write_or_clone(sctx, path, key, found_clone); >> + >> +out: >> + return ret; >> +} >> + >> +static int process_all_extents(struct send_ctx *sctx) >> +{ >> + int ret; >> + struct btrfs_root *root; >> + struct btrfs_path *path; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct extent_buffer *eb; >> + int slot; >> + >> + root = sctx->send_root; >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + key.objectid = sctx->cmp_key->objectid; >> + key.type = BTRFS_EXTENT_DATA_KEY; >> + key.offset = 0; >> + while (1) { >> + ret = btrfs_search_slot_for_read(root,&key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = 0; >> + goto out; >> + } >> + >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + btrfs_item_key_to_cpu(eb,&found_key, slot); >> + >> + if (found_key.objectid != key.objectid || >> + found_key.type != key.type) { >> + ret = 0; >> + goto out; >> + } >> + >> + ret = process_extent(sctx, path,&found_key); >> + if (ret< 0) >> + goto out; >> + >> + btrfs_release_path(path); >> + key.offset = found_key.offset + 1; >> + } >> + >> +out: >> + btrfs_free_path(path); >> + return ret; >> +} >> + >> +static int process_recorded_refs_if_needed(struct send_ctx *sctx, int at_end) >> +{ >> + int ret = 0; >> + >> + if (sctx->cur_ino == 0) >> + goto out; >> + if (!at_end&& sctx->cur_ino == sctx->cmp_key->objectid&& >> + sctx->cmp_key->type<= BTRFS_INODE_REF_KEY) >> + goto out; >> + if (list_empty(&sctx->new_refs)&& list_empty(&sctx->deleted_refs)) >> + goto out; >> + >> + ret = process_recorded_refs(sctx); >> + >> +out: >> + return ret; >> +} >> + >> +static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) >> +{ >> + int ret = 0; >> + u64 left_mode; >> + u64 left_uid; >> + u64 left_gid; >> + u64 right_mode; >> + u64 right_uid; >> + u64 right_gid; >> + int need_chmod = 0; >> + int need_chown = 0; >> + >> + ret = process_recorded_refs_if_needed(sctx, at_end); >> + if (ret< 0) >> + goto out; >> + >> + if (sctx->cur_ino == 0 || sctx->cur_inode_deleted) >> + goto out; >> + if (!at_end&& sctx->cmp_key->objectid == sctx->cur_ino) >> + goto out; >> + >> + ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL, >> + &left_mode,&left_uid,&left_gid); >> + if (ret< 0) >> + goto out; >> + >> + if (!S_ISLNK(sctx->cur_inode_mode)) { >> + if (!sctx->parent_root || sctx->cur_inode_new) { >> + need_chmod = 1; >> + need_chown = 1; >> + } else { >> + ret = get_inode_info(sctx->parent_root, sctx->cur_ino, >> + NULL, NULL,&right_mode,&right_uid, >> + &right_gid); >> + if (ret< 0) >> + goto out; >> + >> + if (left_uid != right_uid || left_gid != right_gid) >> + need_chown = 1; >> + if (left_mode != right_mode) >> + need_chmod = 1; >> + } >> + } >> + >> + if (S_ISREG(sctx->cur_inode_mode)) { >> + ret = send_truncate(sctx, sctx->cur_ino, sctx->cur_inode_gen, >> + sctx->cur_inode_size); >> + if (ret< 0) >> + goto out; >> + } >> + >> + if (need_chown) { >> + ret = send_chown(sctx, sctx->cur_ino, sctx->cur_inode_gen, >> + left_uid, left_gid); >> + if (ret< 0) >> + goto out; >> + } >> + if (need_chmod) { >> + ret = send_chmod(sctx, sctx->cur_ino, sctx->cur_inode_gen, >> + left_mode); >> + if (ret< 0) >> + goto out; >> + } >> + >> + /* >> + * Need to send that every time, no matter if it actually changed >> + * between the two trees as we have done changes to the inode before. >> + */ >> + ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); >> + if (ret< 0) >> + goto out; >> + >> +out: >> + return ret; >> +} >> + >> +static int changed_inode(struct send_ctx *sctx, >> + enum btrfs_compare_tree_result result) >> +{ >> + int ret = 0; >> + struct btrfs_key *key = sctx->cmp_key; >> + struct btrfs_inode_item *left_ii = NULL; >> + struct btrfs_inode_item *right_ii = NULL; >> + u64 left_gen = 0; >> + u64 right_gen = 0; >> + >> + ret = close_cur_inode_file(sctx); >> + if (ret< 0) >> + goto out; >> + >> + sctx->cur_ino = key->objectid; >> + sctx->cur_inode_new_gen = 0; >> + sctx->send_progress = sctx->cur_ino; >> + >> + if (result == BTRFS_COMPARE_TREE_NEW || >> + result == BTRFS_COMPARE_TREE_CHANGED) { >> + left_ii = btrfs_item_ptr(sctx->left_path->nodes[0], >> + sctx->left_path->slots[0], >> + struct btrfs_inode_item); >> + left_gen = btrfs_inode_generation(sctx->left_path->nodes[0], >> + left_ii); >> + } else { >> + right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], >> + sctx->right_path->slots[0], >> + struct btrfs_inode_item); >> + right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], >> + right_ii); >> + } >> + if (result == BTRFS_COMPARE_TREE_CHANGED) { >> + right_ii = btrfs_item_ptr(sctx->right_path->nodes[0], >> + sctx->right_path->slots[0], >> + struct btrfs_inode_item); >> + >> + right_gen = btrfs_inode_generation(sctx->right_path->nodes[0], >> + right_ii); >> + if (left_gen != right_gen) >> + sctx->cur_inode_new_gen = 1; >> + } >> + >> + if (result == BTRFS_COMPARE_TREE_NEW) { >> + sctx->cur_inode_gen = left_gen; >> + sctx->cur_inode_new = 1; >> + sctx->cur_inode_deleted = 0; >> + sctx->cur_inode_size = btrfs_inode_size( >> + sctx->left_path->nodes[0], left_ii); >> + sctx->cur_inode_mode = btrfs_inode_mode( >> + sctx->left_path->nodes[0], left_ii); >> + if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) >> + ret = send_create_inode(sctx, sctx->left_path, >> + sctx->cmp_key); >> + } else if (result == BTRFS_COMPARE_TREE_DELETED) { >> + sctx->cur_inode_gen = right_gen; >> + sctx->cur_inode_new = 0; >> + sctx->cur_inode_deleted = 1; >> + sctx->cur_inode_size = btrfs_inode_size( >> + sctx->right_path->nodes[0], right_ii); >> + sctx->cur_inode_mode = btrfs_inode_mode( >> + sctx->right_path->nodes[0], right_ii); >> + } else if (result == BTRFS_COMPARE_TREE_CHANGED) { >> + if (sctx->cur_inode_new_gen) { >> + sctx->cur_inode_gen = right_gen; >> + sctx->cur_inode_new = 0; >> + sctx->cur_inode_deleted = 1; >> + sctx->cur_inode_size = btrfs_inode_size( >> + sctx->right_path->nodes[0], right_ii); >> + sctx->cur_inode_mode = btrfs_inode_mode( >> + sctx->right_path->nodes[0], right_ii); >> + ret = process_all_refs(sctx, >> + BTRFS_COMPARE_TREE_DELETED); >> + if (ret< 0) >> + goto out; >> + >> + sctx->cur_inode_gen = left_gen; >> + sctx->cur_inode_new = 1; >> + sctx->cur_inode_deleted = 0; >> + sctx->cur_inode_size = btrfs_inode_size( >> + sctx->left_path->nodes[0], left_ii); >> + sctx->cur_inode_mode = btrfs_inode_mode( >> + sctx->left_path->nodes[0], left_ii); >> + ret = send_create_inode(sctx, sctx->left_path, >> + sctx->cmp_key); >> + if (ret< 0) >> + goto out; >> + >> + ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW); >> + if (ret< 0) >> + goto out; >> + ret = process_all_extents(sctx); >> + if (ret< 0) >> + goto out; >> + ret = process_all_new_xattrs(sctx); >> + if (ret< 0) >> + goto out; >> + } else { >> + sctx->cur_inode_gen = left_gen; >> + sctx->cur_inode_new = 0; >> + sctx->cur_inode_new_gen = 0; >> + sctx->cur_inode_deleted = 0; >> + sctx->cur_inode_size = btrfs_inode_size( >> + sctx->left_path->nodes[0], left_ii); >> + sctx->cur_inode_mode = btrfs_inode_mode( >> + sctx->left_path->nodes[0], left_ii); >> + } >> + } >> + >> +out: >> + return ret; >> +} >> + >> +static int changed_ref(struct send_ctx *sctx, >> + enum btrfs_compare_tree_result result) >> +{ >> + int ret = 0; >> + >> + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); >> + >> + if (!sctx->cur_inode_new_gen&& >> + sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) { >> + if (result == BTRFS_COMPARE_TREE_NEW) >> + ret = record_new_ref(sctx); >> + else if (result == BTRFS_COMPARE_TREE_DELETED) >> + ret = record_deleted_ref(sctx); >> + else if (result == BTRFS_COMPARE_TREE_CHANGED) >> + ret = record_changed_ref(sctx); >> + } >> + >> + return ret; >> +} >> + >> +static int changed_xattr(struct send_ctx *sctx, >> + enum btrfs_compare_tree_result result) >> +{ >> + int ret = 0; >> + >> + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); >> + >> + if (!sctx->cur_inode_new_gen&& !sctx->cur_inode_deleted) { >> + if (result == BTRFS_COMPARE_TREE_NEW) >> + ret = process_new_xattr(sctx); >> + else if (result == BTRFS_COMPARE_TREE_DELETED) >> + ret = process_deleted_xattr(sctx); >> + else if (result == BTRFS_COMPARE_TREE_CHANGED) >> + ret = process_changed_xattr(sctx); >> + } >> + >> + return ret; >> +} >> + >> +static int changed_extent(struct send_ctx *sctx, >> + enum btrfs_compare_tree_result result) >> +{ >> + int ret = 0; >> + >> + BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid); >> + >> + if (!sctx->cur_inode_new_gen&& !sctx->cur_inode_deleted) { >> + if (result != BTRFS_COMPARE_TREE_DELETED) >> + ret = process_extent(sctx, sctx->left_path, >> + sctx->cmp_key); >> + } >> + >> + return ret; >> +} >> + >> + >> +static int changed_cb(struct btrfs_root *left_root, >> + struct btrfs_root *right_root, >> + struct btrfs_path *left_path, >> + struct btrfs_path *right_path, >> + struct btrfs_key *key, >> + enum btrfs_compare_tree_result result, >> + void *ctx) >> +{ >> + int ret = 0; >> + struct send_ctx *sctx = ctx; >> + >> + sctx->left_path = left_path; >> + sctx->right_path = right_path; >> + sctx->cmp_key = key; >> + >> + ret = finish_inode_if_needed(sctx, 0); >> + if (ret< 0) >> + goto out; >> + >> + if (key->type == BTRFS_INODE_ITEM_KEY) >> + ret = changed_inode(sctx, result); >> + else if (key->type == BTRFS_INODE_REF_KEY) >> + ret = changed_ref(sctx, result); >> + else if (key->type == BTRFS_XATTR_ITEM_KEY) >> + ret = changed_xattr(sctx, result); >> + else if (key->type == BTRFS_EXTENT_DATA_KEY) >> + ret = changed_extent(sctx, result); >> + >> +out: >> + return ret; >> +} >> + >> +static int full_send_tree(struct send_ctx *sctx) >> +{ >> + int ret; >> + struct btrfs_trans_handle *trans = NULL; >> + struct btrfs_root *send_root = sctx->send_root; >> + struct btrfs_key key; >> + struct btrfs_key found_key; >> + struct btrfs_path *path; >> + struct extent_buffer *eb; >> + int slot; >> + u64 start_ctransid; >> + u64 ctransid; >> + >> + path = alloc_path_for_send(); >> + if (!path) >> + return -ENOMEM; >> + >> + spin_lock(&send_root->root_times_lock); >> + start_ctransid = btrfs_root_ctransid(&send_root->root_item); >> + spin_unlock(&send_root->root_times_lock); >> + >> + key.objectid = BTRFS_FIRST_FREE_OBJECTID; >> + key.type = BTRFS_INODE_ITEM_KEY; >> + key.offset = 0; >> + >> +join_trans: >> + /* >> + * We need to make sure the transaction does not get committed >> + * while we do anything on commit roots. Join a transaction to prevent >> + * this. >> + */ >> + trans = btrfs_join_transaction(send_root); >> + if (IS_ERR(trans)) { >> + ret = PTR_ERR(trans); >> + trans = NULL; >> + goto out; >> + } >> + >> + /* >> + * Make sure the tree has not changed >> + */ >> + spin_lock(&send_root->root_times_lock); >> + ctransid = btrfs_root_ctransid(&send_root->root_item); >> + spin_unlock(&send_root->root_times_lock); >> + >> + if (ctransid != start_ctransid) { >> + WARN(1, KERN_WARNING "btrfs: the root that you're trying to " >> + "send was modified in between. This is " >> + "probably a bug.\n"); > > What is the purpose of getting the ctransid outside the > transaction anyway? > Hmm I don't understand the question... >> + ret = -EIO; >> + goto out; >> + } >> + >> + ret = btrfs_search_slot_for_read(send_root,&key, path, 1, 0); >> + if (ret< 0) >> + goto out; >> + if (ret) >> + goto out_finish; >> + >> + while (1) { >> + /* >> + * When someone want to commit while we iterate, end the >> + * joined transaction and rejoin. >> + */ >> + if (btrfs_should_end_transaction(trans, send_root)) { >> + ret = btrfs_end_transaction(trans, send_root); >> + trans = NULL; >> + if (ret< 0) >> + goto out; >> + btrfs_release_path(path); >> + goto join_trans; >> + } >> + >> + eb = path->nodes[0]; >> + slot = path->slots[0]; >> + btrfs_item_key_to_cpu(eb,&found_key, slot); >> + >> + ret = changed_cb(send_root, NULL, path, NULL, >> + &found_key, BTRFS_COMPARE_TREE_NEW, sctx); >> + if (ret< 0) >> + goto out; >> + >> + key.objectid = found_key.objectid; >> + key.type = found_key.type; >> + key.offset = found_key.offset + 1; > > shouldn't this just be before the goto join_trans? > Hmm I don't think so. Am I missing something? >> + >> + ret = btrfs_next_item(send_root, path); >> + if (ret< 0) >> + goto out; >> + if (ret) { >> + ret = 0; >> + break; >> + } >> + } >> + >> +out_finish: >> + ret = finish_inode_if_needed(sctx, 1); >> + >> +out: >> + btrfs_free_path(path); >> + if (trans) { >> + if (!ret) >> + ret = btrfs_end_transaction(trans, send_root); >> + else >> + btrfs_end_transaction(trans, send_root); >> + } >> + return ret; >> +} >> + >> +static int send_subvol(struct send_ctx *sctx) >> +{ >> + int ret; >> + >> + ret = send_header(sctx); >> + if (ret< 0) >> + goto out; >> + >> + ret = send_subvol_begin(sctx); >> + if (ret< 0) >> + goto out; >> + >> + if (sctx->parent_root) { >> + ret = btrfs_compare_trees(sctx->send_root, sctx->parent_root, >> + changed_cb, sctx); >> + if (ret< 0) >> + goto out; >> + ret = finish_inode_if_needed(sctx, 1); >> + if (ret< 0) >> + goto out; >> + } else { >> + ret = full_send_tree(sctx); >> + if (ret< 0) >> + goto out; >> + } >> + >> +out: >> + if (!ret) >> + ret = close_cur_inode_file(sctx); >> + else >> + close_cur_inode_file(sctx); >> + >> + free_recorded_refs(sctx); >> + return ret; >> +} >> + >> +long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) >> +{ >> + int ret = 0; >> + struct btrfs_root *send_root; >> + struct btrfs_root *clone_root; >> + struct btrfs_fs_info *fs_info; >> + struct btrfs_ioctl_send_args *arg = NULL; >> + struct btrfs_key key; >> + struct file *filp = NULL; >> + struct send_ctx *sctx = NULL; >> + u32 i; >> + u64 *clone_sources_tmp = NULL; >> + >> + if (!capable(CAP_SYS_ADMIN)) >> + return -EPERM; >> + >> + send_root = BTRFS_I(fdentry(mnt_file)->d_inode)->root; >> + fs_info = send_root->fs_info; >> + >> + arg = memdup_user(arg_, sizeof(*arg)); >> + if (IS_ERR(arg)) { >> + ret = PTR_ERR(arg); >> + arg = NULL; >> + goto out; >> + } >> + >> + if (!access_ok(VERIFY_READ, arg->clone_sources, >> + sizeof(*arg->clone_sources * >> + arg->clone_sources_count))) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); >> + if (!sctx) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + INIT_LIST_HEAD(&sctx->new_refs); >> + INIT_LIST_HEAD(&sctx->deleted_refs); >> + INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); >> + INIT_LIST_HEAD(&sctx->name_cache_list); >> + >> + sctx->send_filp = fget(arg->send_fd); >> + if (IS_ERR(sctx->send_filp)) { >> + ret = PTR_ERR(sctx->send_filp); >> + goto out; >> + } >> + >> + sctx->mnt = mnt_file->f_path.mnt; >> + >> + sctx->send_root = send_root; >> + sctx->clone_roots_cnt = arg->clone_sources_count; >> + >> + sctx->send_max_size = BTRFS_SEND_BUF_SIZE; >> + sctx->send_buf = vmalloc(sctx->send_max_size); >> + if (!sctx->send_buf) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + sctx->read_buf = vmalloc(BTRFS_SEND_READ_SIZE); >> + if (!sctx->read_buf) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + sctx->clone_roots = vzalloc(sizeof(struct clone_root) * >> + (arg->clone_sources_count + 1)); >> + if (!sctx->clone_roots) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + if (arg->clone_sources_count) { >> + clone_sources_tmp = vmalloc(arg->clone_sources_count * >> + sizeof(*arg->clone_sources)); >> + if (!clone_sources_tmp) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + >> + ret = copy_from_user(clone_sources_tmp, arg->clone_sources, >> + arg->clone_sources_count * >> + sizeof(*arg->clone_sources)); >> + if (ret) { >> + ret = -EFAULT; >> + goto out; >> + } >> + >> + for (i = 0; i< arg->clone_sources_count; i++) { >> + key.objectid = clone_sources_tmp[i]; >> + key.type = BTRFS_ROOT_ITEM_KEY; >> + key.offset = (u64)-1; >> + clone_root = btrfs_read_fs_root_no_name(fs_info,&key); >> + if (!clone_root) { >> + ret = -EINVAL; >> + goto out; >> + } >> + if (IS_ERR(clone_root)) { >> + ret = PTR_ERR(clone_root); >> + goto out; >> + } >> + sctx->clone_roots[i].root = clone_root; >> + } >> + vfree(clone_sources_tmp); >> + clone_sources_tmp = NULL; >> + } >> + >> + if (arg->parent_root) { >> + key.objectid = arg->parent_root; >> + key.type = BTRFS_ROOT_ITEM_KEY; >> + key.offset = (u64)-1; >> + sctx->parent_root = btrfs_read_fs_root_no_name(fs_info,&key); >> + if (!sctx->parent_root) { >> + ret = -EINVAL; >> + goto out; >> + } >> + } >> + >> + /* >> + * Clones from send_root are allowed, but only if the clone source >> + * is behind the current send position. This is checked while searching >> + * for possible clone sources. >> + */ >> + sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; >> + >> + /* We do a bsearch later */ >> + sort(sctx->clone_roots, sctx->clone_roots_cnt, >> + sizeof(*sctx->clone_roots), __clone_root_cmp_sort, >> + NULL); >> + >> + ret = send_subvol(sctx); >> + if (ret< 0) >> + goto out; >> + >> + ret = begin_cmd(sctx, BTRFS_SEND_C_END); >> + if (ret< 0) >> + goto out; >> + ret = send_cmd(sctx); >> + if (ret< 0) >> + goto out; >> + >> +out: >> + if (filp) >> + fput(filp); >> + kfree(arg); >> + vfree(clone_sources_tmp); >> + >> + if (sctx) { >> + if (sctx->send_filp) >> + fput(sctx->send_filp); >> + >> + vfree(sctx->clone_roots); >> + vfree(sctx->send_buf); >> + vfree(sctx->read_buf); >> + >> + name_cache_free(sctx); >> + >> + kfree(sctx); >> + } >> + >> + return ret; >> +} >> diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h >> index a4c23ee..53f8ee7 100644 >> --- a/fs/btrfs/send.h >> +++ b/fs/btrfs/send.h >> @@ -124,3 +124,7 @@ enum { >> __BTRFS_SEND_A_MAX, >> }; >> #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) >> + >> +#ifdef __KERNEL__ >> +long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); >> +#endif > > -- > To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html >