All of lore.kernel.org
 help / color / mirror / Atom feed
From: Miao Xie <miaox@cn.fujitsu.com>
To: <fdmanana@gmail.com>
Cc: "linux-btrfs@vger.kernel.org" <linux-btrfs@vger.kernel.org>
Subject: Re: [PATCH 02/12] Btrfs: load checksum data once when submitting a direct read io
Date: Tue, 29 Jul 2014 09:56:14 +0800	[thread overview]
Message-ID: <53D6FF3E.8080404@cn.fujitsu.com> (raw)
In-Reply-To: <CAL3q7H5ihcw404e4dYpoJU-hcSZJaKe=mBi38srFqdSLv8DoRg@mail.gmail.com>

On Mon, 28 Jul 2014 18:24:47 +0100, Filipe David Manana wrote:
> On Sat, Jun 28, 2014 at 12:34 PM, Miao Xie <miaox@cn.fujitsu.com> wrote:
>> The current code would load checksum data for several times when we split
>> a whole direct read io because of the limit of the raid stripe, it would
>> make us search the csum tree for several times. In fact, it just wasted time,
>> and made the contention of the csum tree root be more serious. This patch
>> improves this problem by loading the data at once.
>>
>> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
>> ---
>>  fs/btrfs/btrfs_inode.h |  1 -
>>  fs/btrfs/ctree.h       |  3 +--
>>  fs/btrfs/file-item.c   | 14 ++------------
>>  fs/btrfs/inode.c       | 40 ++++++++++++++++++++++------------------
>>  4 files changed, 25 insertions(+), 33 deletions(-)
>>
>> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
>> index 4794923..7e9f53b 100644
>> --- a/fs/btrfs/btrfs_inode.h
>> +++ b/fs/btrfs/btrfs_inode.h
>> @@ -263,7 +263,6 @@ struct btrfs_dio_private {
>>
>>         /* dio_bio came from fs/direct-io.c */
>>         struct bio *dio_bio;
>> -       u8 csum[0];
>>  };
>>
>>  /*
>> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
>> index be91397..40e9938 100644
>> --- a/fs/btrfs/ctree.h
>> +++ b/fs/btrfs/ctree.h
>> @@ -3739,8 +3739,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
>>  int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>                           struct bio *bio, u32 *dst);
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 logical_offset);
>> +                             struct bio *bio, u64 logical_offset);
>>  int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
>>                              struct btrfs_root *root,
>>                              u64 objectid, u64 pos,
>> diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
>> index f46cfe4..cf1b94f 100644
>> --- a/fs/btrfs/file-item.c
>> +++ b/fs/btrfs/file-item.c
>> @@ -299,19 +299,9 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
>>  }
>>
>>  int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
>> -                             struct btrfs_dio_private *dip, struct bio *bio,
>> -                             u64 offset)
>> +                             struct bio *bio, u64 offset)
>>  {
>> -       int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
>> -       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -       int ret;
>> -
>> -       len >>= inode->i_sb->s_blocksize_bits;
>> -       len *= csum_size;
>> -
>> -       ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
>> -                                     (u32 *)(dip->csum + len), 1);
>> -       return ret;
>> +       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
>>  }
>>
>>  int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
>> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
>> index a3f102f..969fb22 100644
>> --- a/fs/btrfs/inode.c
>> +++ b/fs/btrfs/inode.c
>> @@ -7081,7 +7081,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         struct inode *inode = dip->inode;
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct bio *dio_bio;
>> -       u32 *csums = (u32 *)dip->csum;
>> +       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
>> +       u32 *csums = (u32 *)io_bio->csum;
>>         u64 start;
>>         int i;
>>
>> @@ -7123,6 +7124,9 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
>>         if (err)
>>                 clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
>>         dio_end_io(dio_bio, err);
>> +
>> +       if (io_bio->end_io)
>> +               io_bio->end_io(io_bio, err);
>>         bio_put(bio);
>>  }
>>
>> @@ -7261,13 +7265,20 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
>>                 ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
>>                 if (ret)
>>                         goto err;
>> -       } else if (!skip_sum) {
>> -               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
>> +       } else {
>> +               /*
>> +                * We have loaded all the csum data we need when we submit
>> +                * the first bio, so skip it.
>> +                */
>> +               if (dip->logical_offset != file_offset)
>> +                       goto map;
>> +
>> +               /* Load all csum data at once. */
>> +               ret = btrfs_lookup_bio_sums_dio(root, inode, dip->orig_bio,
>>                                                 file_offset);
>>                 if (ret)
>>                         goto err;
>>         }
>> -
>>  map:
>>         ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
>>  err:
>> @@ -7288,7 +7299,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
>>         u64 submit_len = 0;
>>         u64 map_length;
>>         int nr_pages = 0;
>> -       int ret = 0;
>> +       int ret;
>>         int async_submit = 0;
>>
>>         map_length = orig_bio->bi_iter.bi_size;
>> @@ -7392,30 +7403,20 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         struct btrfs_root *root = BTRFS_I(inode)->root;
>>         struct btrfs_dio_private *dip;
>>         struct bio *io_bio;
>> +       struct btrfs_io_bio *btrfs_bio;
>>         int skip_sum;
>> -       int sum_len;
>>         int write = rw & REQ_WRITE;
>>         int ret = 0;
>> -       u16 csum_size;
>>
>>         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
>>
>> -       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
>> +       io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS | __GFP_ZERO);
> 
> Hi Miao,
> 
> With this change (adding the __GFP_ZERO flag), I ran once into the
> following warning while running xfstests (dunno exactly which test
> case triggered it, likely one of those that run fsstress):

Thanks for test.
I'll fix it.

Miao

> 
> [ 3941.856860] ------------[ cut here ]------------
> [ 3941.856871] WARNING: CPU: 0 PID: 4154 at mm/mempool.c:205
> mempool_alloc+0xc8/0x1c0()
> [ 3941.856873] Modules linked in: btrfs xor raid6_pq binfmt_misc nfsd
> auth_rpcgss oid_registry nfs_acl nfs lockd fscache sunrpc i2c_piix4
> i2c_core pcspkr evbug psmouse serio_raw e1000 [
> last unloaded: btrfs]
> [ 3941.856886] CPU: 0 PID: 4154 Comm: xfs_io Tainted: G        W
> 3.16.0-rc6-fdm-btrfs-next-37+ #1
> [ 3941.856887] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
> [ 3941.856889]  0000000000000009 ffff8800d569f778 ffffffff8169a687
> 00000000000077b0
> [ 3941.856892]  0000000000000000 ffff8800d569f7b8 ffffffff8104fb4c
> 00000000ffffffff
> [ 3941.856894]  0000000000008050 0000000000000001 0000000000008050
> ffff88004f921918
> [ 3941.856896] Call Trace:
> [ 3941.856901]  [<ffffffff8169a687>] dump_stack+0x4e/0x68
> [ 3941.856904]  [<ffffffff8104fb4c>] warn_slowpath_common+0x8c/0xc0
> [ 3941.856905]  [<ffffffff8104fb9a>] warn_slowpath_null+0x1a/0x20
> [ 3941.856907]  [<ffffffff81151fc8>] mempool_alloc+0xc8/0x1c0
> [ 3941.856911]  [<ffffffff810129cf>] ? save_stack_trace+0x2f/0x50
> [ 3941.856918]  [<ffffffff8131331a>] bio_alloc_bioset+0x10a/0x1c0
> [ 3941.856921]  [<ffffffff81314c68>] bio_clone_bioset+0x88/0x310
> [ 3941.856923]  [<ffffffff81151a65>] ? mempool_alloc_slab+0x15/0x20
> [ 3941.856936]  [<ffffffffa0209385>] btrfs_bio_clone+0x15/0x20 [btrfs]
> [ 3941.856944]  [<ffffffffa01ed47f>] btrfs_submit_direct+0x4f/0x7b0 [btrfs]
> [ 3941.856948]  [<ffffffff811fc10a>] ? do_blockdev_direct_IO+0x17ea/0x1f60
> [ 3941.856952]  [<ffffffff810afb35>] ? mark_held_locks+0x75/0xa0
> [ 3941.856955]  [<ffffffff816a383f>] ? _raw_spin_unlock_irqrestore+0x3f/0x70
> [ 3941.856956]  [<ffffffff811fc13e>] do_blockdev_direct_IO+0x181e/0x1f60
> [ 3941.856965]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856972]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856974]  [<ffffffff811fc8cc>] __blockdev_direct_IO+0x4c/0x50
> [ 3941.856981]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.856987]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.856993]  [<ffffffffa01eb591>] btrfs_direct_IO+0x1a1/0x340 [btrfs]
> [ 3941.856999]  [<ffffffffa01f86d0>] ?
> btrfs_page_exists_in_range+0x2a0/0x2a0 [btrfs]
> [ 3941.857005]  [<ffffffffa01ed430>] ?
> btrfs_writepage_start_hook+0xf0/0xf0 [btrfs]
> [ 3941.857007]  [<ffffffff81150210>] generic_file_direct_write+0xb0/0x180
> [ 3941.857014]  [<ffffffffa01fc4a1>] btrfs_file_write_iter+0x411/0x560 [btrfs]
> [ 3941.857017]  [<ffffffff811ba541>] new_sync_write+0x81/0xb0
> [ 3941.857019]  [<ffffffff811bb342>] vfs_write+0xc2/0x1f0
> [ 3941.857020]  [<ffffffff811bba2a>] SyS_pwrite64+0x9a/0xb0
> [ 3941.857022]  [<ffffffff816a3d92>] system_call_fastpath+0x16/0x1b
> [ 3941.857024] ---[ end trace c1dfd29523250709 ]---
> 
> Thanks.
> 
> 
>>         if (!io_bio) {
>>                 ret = -ENOMEM;
>>                 goto free_ordered;
>>         }
>>
>> -       if (!skip_sum && !write) {
>> -               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
>> -               sum_len = dio_bio->bi_iter.bi_size >>
>> -                       inode->i_sb->s_blocksize_bits;
>> -               sum_len *= csum_size;
>> -       } else {
>> -               sum_len = 0;
>> -       }
>> -
>> -       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
>> +       dip = kmalloc(sizeof(*dip), GFP_NOFS);
>>         if (!dip) {
>>                 ret = -ENOMEM;
>>                 goto free_io_bio;
>> @@ -7441,6 +7442,9 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
>>         if (!ret)
>>                 return;
>>
>> +       btrfs_bio = btrfs_io_bio(io_bio);
>> +       if (btrfs_bio->end_io)
>> +               btrfs_bio->end_io(btrfs_bio, ret);
>>  free_io_bio:
>>         bio_put(io_bio);
>>
>> --
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
> 


  reply	other threads:[~2014-07-29  1:54 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-28 11:34 [PATCH 00/12] Implement the data repair function for direct read Miao Xie
2014-06-28 11:34 ` [PATCH 01/12] Btrfs: fix put dio bio twice when we submit dio bio fail Miao Xie
2014-06-28 11:34 ` [PATCH 02/12] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-07-28 17:24   ` Filipe David Manana
2014-07-29  1:56     ` Miao Xie [this message]
2014-06-28 11:34 ` [PATCH 03/12] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-06-28 11:34 ` [PATCH 04/12] Btrfs: do file data check by sub-bio's self Miao Xie
2014-06-28 11:34 ` [PATCH 05/12] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-06-28 11:34 ` [PATCH 06/12] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-06-28 11:34 ` [PATCH 07/12] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-06-28 11:34 ` [PATCH 08/12] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-06-28 11:34 ` [PATCH 09/12] Btrfs: modify clean_io_failure " Miao Xie
2014-06-28 11:35 ` [PATCH 10/12] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-06-28 11:35 ` [PATCH 11/12] Btrfs: implement repair function when direct read fails Miao Xie
2014-06-28 11:35 ` [PATCH 12/12] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie
2014-07-29  9:23 ` [PATCH v2 00/12] Implement the data repair function for direct read Miao Xie
2014-07-29  9:23   ` [PATCH v2 01/12] Btrfs: fix put dio bio twice when we submit dio bio fail Miao Xie
2014-07-29  9:24   ` [PATCH v2 02/12] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-08-08  0:32     ` Filipe David Manana
2014-08-08  9:22       ` Miao Xie
2014-08-08  9:23       ` [PATCH v3 " Miao Xie
2014-07-29  9:24   ` [PATCH v2 03/12] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-07-29  9:24   ` [PATCH v2 04/12] Btrfs: do file data check by sub-bio's self Miao Xie
2014-07-29  9:24   ` [PATCH v2 05/12] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-07-29  9:24   ` [PATCH v2 06/12] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-07-29  9:24   ` [PATCH v2 07/12] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-07-29  9:24   ` [PATCH v2 08/12] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-07-29  9:24   ` [PATCH v2 09/12] Btrfs: modify clean_io_failure " Miao Xie
2014-07-29  9:24   ` [PATCH v2 10/12] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-07-29  9:24   ` [PATCH v2 11/12] Btrfs: implement repair function when direct read fails Miao Xie
2014-08-29 18:31     ` Chris Mason
2014-09-01  6:56       ` Miao Xie
2014-09-02 12:33         ` Liu Bo
2014-09-02 13:05           ` Chris Mason
2014-09-03  9:02             ` Miao Xie
2014-09-12 10:43             ` [PATCH v4 00/11] Implement the data repair function for direct read Miao Xie
2014-09-12 10:43               ` [PATCH v4 01/11] Btrfs: load checksum data once when submitting a direct read io Miao Xie
2014-09-12 10:43               ` [PATCH v4 02/11] Btrfs: cleanup similar code of the buffered data data check and dio read data check Miao Xie
2014-09-12 10:43               ` [PATCH v4 03/11] Btrfs: do file data check by sub-bio's self Miao Xie
2014-09-12 10:43               ` [PATCH v4 04/11] Btrfs: fix missing error handler if submiting re-read bio fails Miao Xie
2014-09-12 10:43               ` [PATCH v4 05/11] Btrfs: Cleanup unused variant and argument of IO failure handlers Miao Xie
2014-09-12 10:43               ` [PATCH v4 06/11] Btrfs: split bio_readpage_error into several functions Miao Xie
2014-09-12 10:44               ` [PATCH v4 07/11] Btrfs: modify repair_io_failure and make it suit direct io Miao Xie
2014-09-12 10:44               ` [PATCH v4 08/11] Btrfs: modify clean_io_failure " Miao Xie
2014-09-12 10:44               ` [PATCH v4 09/11] Btrfs: Set real mirror number for read operation on RAID0/5/6 Miao Xie
2014-09-12 10:44               ` [PATCH v4 10/11] Btrfs: implement repair function when direct read fails Miao Xie
2014-09-12 10:44               ` [PATCH v4 11/11] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie
2014-09-12 14:50               ` [PATCH v4 00/11] Implement the data repair function for direct read Chris Mason
2014-07-29  9:24   ` [PATCH v2 12/12] Btrfs: cleanup the read failure record after write or when the inode is freeing Miao Xie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53D6FF3E.8080404@cn.fujitsu.com \
    --to=miaox@cn.fujitsu.com \
    --cc=fdmanana@gmail.com \
    --cc=linux-btrfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.