* [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
@ 2022-05-06 13:57 Liang Chen
2022-05-06 16:54 ` kernel test robot
2022-05-06 18:02 ` kernel test robot
0 siblings, 2 replies; 4+ messages in thread
From: Liang Chen @ 2022-05-06 13:57 UTC (permalink / raw)
To: linux-fsdevel; +Cc: hch, jmoyer, jack, lczerner, Liang Chen
From: Liang Chen <liangchen.linux@gmail.com>
As pointed out in commit 332391a, mixing buffered reads and asynchronous
direct writes risks ending up with a situation where stale data is left
in page cache while new data is already written to disk. The same problem
hits block dev fs too. A similar approach needs to be taken here.
Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
---
block/fops.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/block/fops.c b/block/fops.c
index 9f2ecec406b0..8ab679814b9d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -136,11 +136,56 @@ struct blkdev_dio {
size_t size;
atomic_t ref;
unsigned int flags;
+ struct work_struct complete_work;
struct bio bio ____cacheline_aligned_in_smp;
};
static struct bio_set blkdev_dio_pool;
+static void blkdev_aio_complete_work(struct work_struct *work)
+{
+ struct blkdev_dio *dio = container_of(work, struct blkdev_dio, complete_work);
+ struct kiocb *iocb = dio->iocb;
+ int err;
+ struct inode *inode = bdev_file_inode(iocb->ki_filp);
+ loff_t offset = iocb->ki_pos;
+ ssize_t ret;
+
+ WRITE_ONCE(iocb->private, NULL);
+
+ if (likely(!dio->bio.bi_status)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
+ }
+
+ /*
+ * Try again to invalidate clean pages which might have been cached by
+ * non-direct readahead, or faulted in by get_user_pages() if the source
+ * of the write was an mmap'ed region of the file we're writing. Either
+ * one is a pretty crazy thing to do, so we don't support it 100%. If
+ * this invalidation fails, tough, the write still worked...
+ */
+ if (iocb->ki_flags & IOCB_WRITE && ret > 0 &&
+ inode->i_mapping->nrpages) {
+ err = invalidate_inode_pages2_range(inode->i_mapping,
+ offset >> PAGE_SHIFT,
+ (offset + ret - 1) >> PAGE_SHIFT);
+ if (err)
+ dio_warn_stale_pagecache(iocb->ki_filp);
+ }
+
+ iocb->ki_complete(iocb, ret);
+
+ /*
+ * For multi-bio dio dio->bio has an extra reference to ensure the
+ * dio stays around. In the other case, an extra reference is taken
+ * to make sure
+ */
+ bio_put(&dio->bio);
+}
+
static void blkdev_bio_end_io(struct bio *bio)
{
struct blkdev_dio *dio = bio->bi_private;
@@ -153,6 +198,14 @@ static void blkdev_bio_end_io(struct bio *bio)
if (!(dio->flags & DIO_IS_SYNC)) {
struct kiocb *iocb = dio->iocb;
ssize_t ret;
+ struct inode *inode = bdev_file_inode(iocb->ki_filp);
+
+ if (iocb->ki_flags & IOCB_WRITE){
+ INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
+ queue_work(inode->i_sb->s_dio_done_wq,
+ &dio->complete_work);
+ goto out;
+ }
WRITE_ONCE(iocb->private, NULL);
@@ -173,6 +226,7 @@ static void blkdev_bio_end_io(struct bio *bio)
}
}
+out:
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
@@ -284,6 +338,20 @@ static void blkdev_bio_end_io_async(struct bio *bio)
struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
struct kiocb *iocb = dio->iocb;
ssize_t ret;
+ struct inode *inode = bdev_file_inode(iocb->ki_filp);
+
+ if (iocb->ki_flags & IOCB_WRITE){
+ INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
+ /*
+ * Grab an extra reference to ensure the dio structure
+ * which the bio embeds in stays around for complete_work
+ * to access.
+ */
+ bio_get(bio);
+ queue_work(inode->i_sb->s_dio_done_wq,
+ &dio->complete_work);
+ goto out;
+ }
WRITE_ONCE(iocb->private, NULL);
@@ -296,6 +364,7 @@ static void blkdev_bio_end_io_async(struct bio *bio)
iocb->ki_complete(iocb, ret);
+out:
if (dio->flags & DIO_SHOULD_DIRTY) {
bio_check_pages_dirty(bio);
} else {
@@ -366,14 +435,37 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
return -EIOCBQUEUED;
}
+int blkdev_sb_init_dio_done_wq(struct super_block *sb)
+{
+ struct workqueue_struct *old;
+ struct workqueue_struct *wq = alloc_workqueue("dio/%s",
+ WQ_MEM_RECLAIM, 0,
+ sb->s_id);
+ if (!wq)
+ return -ENOMEM;
+ /*
+ * This has to be atomic as more DIOs can race to create the workqueue
+ */
+ old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
+ /* Someone created workqueue before us? Free ours... */
+ if (old)
+ destroy_workqueue(wq);
+ return 0;
+}
+
static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
unsigned int nr_pages;
+ struct inode *inode = bdev_file_inode(iocb->ki_filp);
if (!iov_iter_count(iter))
return 0;
nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+
+ if(!inode->i_sb->s_dio_done_wq && blkdev_sb_init_dio_done_wq(inode->i_sb))
+ return -ENOMEM;
+
if (likely(nr_pages <= BIO_MAX_VECS)) {
if (is_sync_kiocb(iocb))
return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
--
2.31.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
@ 2022-05-06 16:54 ` kernel test robot
2022-05-06 18:02 ` kernel test robot
1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 16:54 UTC (permalink / raw)
To: Liang Chen, linux-fsdevel
Cc: llvm, kbuild-all, hch, jmoyer, jack, lczerner, Liang Chen
Hi Liang,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5 next-20220506]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: riscv-randconfig-r024-20220506 (https://download.01.org/0day-ci/archive/20220507/202205070030.wgFHZ8hk-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 5e004fb787698440a387750db7f8028e7cb14cfc)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# install riscv cross compiling tool for clang build
# apt-get install binutils-riscv64-linux-gnu
# https://github.com/intel-lab-lkp/linux/commit/2b2bda510e6c93a57581021c675b023d2f43e81e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
git checkout 2b2bda510e6c93a57581021c675b023d2f43e81e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> block/fops.c:423:5: warning: no previous prototype for function 'blkdev_sb_init_dio_done_wq' [-Wmissing-prototypes]
int blkdev_sb_init_dio_done_wq(struct super_block *sb)
^
block/fops.c:423:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
int blkdev_sb_init_dio_done_wq(struct super_block *sb)
^
static
1 warning generated.
vim +/blkdev_sb_init_dio_done_wq +423 block/fops.c
422
> 423 int blkdev_sb_init_dio_done_wq(struct super_block *sb)
424 {
425 struct workqueue_struct *old;
426 struct workqueue_struct *wq = alloc_workqueue("dio/%s",
427 WQ_MEM_RECLAIM, 0,
428 sb->s_id);
429 if (!wq)
430 return -ENOMEM;
431 /*
432 * This has to be atomic as more DIOs can race to create the workqueue
433 */
434 old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
435 /* Someone created workqueue before us? Free ours... */
436 if (old)
437 destroy_workqueue(wq);
438 return 0;
439 }
440
--
0-DAY CI Kernel Test Service
https://01.org/lkp
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
2022-05-06 16:54 ` kernel test robot
@ 2022-05-06 18:02 ` kernel test robot
1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 18:02 UTC (permalink / raw)
To: Liang Chen, linux-fsdevel
Cc: kbuild-all, hch, jmoyer, jack, lczerner, Liang Chen
Hi Liang,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: arc-randconfig-r043-20220506 (https://download.01.org/0day-ci/archive/20220507/202205070158.7Io6lIxQ-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/2b2bda510e6c93a57581021c675b023d2f43e81e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
git checkout 2b2bda510e6c93a57581021c675b023d2f43e81e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arc SHELL=/bin/bash
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> block/fops.c:423:5: warning: no previous prototype for 'blkdev_sb_init_dio_done_wq' [-Wmissing-prototypes]
423 | int blkdev_sb_init_dio_done_wq(struct super_block *sb)
| ^~~~~~~~~~~~~~~~~~~~~~~~~~
vim +/blkdev_sb_init_dio_done_wq +423 block/fops.c
422
> 423 int blkdev_sb_init_dio_done_wq(struct super_block *sb)
424 {
425 struct workqueue_struct *old;
426 struct workqueue_struct *wq = alloc_workqueue("dio/%s",
427 WQ_MEM_RECLAIM, 0,
428 sb->s_id);
429 if (!wq)
430 return -ENOMEM;
431 /*
432 * This has to be atomic as more DIOs can race to create the workqueue
433 */
434 old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
435 /* Someone created workqueue before us? Free ours... */
436 if (old)
437 destroy_workqueue(wq);
438 return 0;
439 }
440
--
0-DAY CI Kernel Test Service
https://01.org/lkp
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
@ 2022-05-06 19:56 kernel test robot
0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 19:56 UTC (permalink / raw)
To: kbuild
[-- Attachment #1: Type: text/plain, Size: 5294 bytes --]
CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220506135709.46872-1-lchen@localhost.localdomain>
References: <20220506135709.46872-1-lchen@localhost.localdomain>
TO: Liang Chen <liangchen.linux@gmail.com>
TO: linux-fsdevel(a)vger.kernel.org
CC: hch(a)infradead.org
CC: jmoyer(a)redhat.com
CC: jack(a)suse.cz
CC: lczerner(a)redhat.com
CC: Liang Chen <liangchen.linux@gmail.com>
Hi Liang,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5 next-20220506]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
:::::: branch date: 6 hours ago
:::::: commit date: 6 hours ago
config: i386-randconfig-c001 (https://download.01.org/0day-ci/archive/20220507/202205070314.rXjpPg15-lkp(a)intel.com/config)
compiler: gcc-11 (Debian 11.2.0-20) 11.2.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
cocci warnings: (new ones prefixed by >>)
>> block/fops.c:177:5-24: atomic_dec_and_test variation before object free at line 185.
vim +177 block/fops.c
2b2bda510e6c93 Liang Chen 2022-05-06 168
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 169 static void blkdev_bio_end_io(struct bio *bio)
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 170 {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 171 struct blkdev_dio *dio = bio->bi_private;
09ce8744253a03 Jens Axboe 2021-10-14 172 bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 173
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 174 if (bio->bi_status && !dio->bio.bi_status)
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 175 dio->bio.bi_status = bio->bi_status;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 176
e71aa913e26543 Pavel Begunkov 2021-10-27 @177 if (atomic_dec_and_test(&dio->ref)) {
09ce8744253a03 Jens Axboe 2021-10-14 178 if (!(dio->flags & DIO_IS_SYNC)) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 179 struct kiocb *iocb = dio->iocb;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 180 ssize_t ret;
2b2bda510e6c93 Liang Chen 2022-05-06 181 struct inode *inode = bdev_file_inode(iocb->ki_filp);
2b2bda510e6c93 Liang Chen 2022-05-06 182
2b2bda510e6c93 Liang Chen 2022-05-06 183 if (iocb->ki_flags & IOCB_WRITE){
2b2bda510e6c93 Liang Chen 2022-05-06 184 INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
2b2bda510e6c93 Liang Chen 2022-05-06 @185 queue_work(inode->i_sb->s_dio_done_wq,
2b2bda510e6c93 Liang Chen 2022-05-06 186 &dio->complete_work);
2b2bda510e6c93 Liang Chen 2022-05-06 187 goto out;
2b2bda510e6c93 Liang Chen 2022-05-06 188 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 189
3e08773c3841e9 Christoph Hellwig 2021-10-12 190 WRITE_ONCE(iocb->private, NULL);
3e08773c3841e9 Christoph Hellwig 2021-10-12 191
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 192 if (likely(!dio->bio.bi_status)) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 193 ret = dio->size;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 194 iocb->ki_pos += ret;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 195 } else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 196 ret = blk_status_to_errno(dio->bio.bi_status);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 197 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 198
6b19b766e8f077 Jens Axboe 2021-10-21 199 dio->iocb->ki_complete(iocb, ret);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 200 bio_put(&dio->bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 201 } else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 202 struct task_struct *waiter = dio->waiter;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 203
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 204 WRITE_ONCE(dio->waiter, NULL);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 205 blk_wake_io_task(waiter);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 206 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 207 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 208
2b2bda510e6c93 Liang Chen 2022-05-06 209 out:
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 210 if (should_dirty) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 211 bio_check_pages_dirty(bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 212 } else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 213 bio_release_pages(bio, false);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 214 bio_put(bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 215 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 216 }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07 217
--
0-DAY CI Kernel Test Service
https://01.org/lkp
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-05-06 19:56 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
2022-05-06 16:54 ` kernel test robot
2022-05-06 18:02 ` kernel test robot
2022-05-06 19:56 kernel test robot
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.