All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
@ 2022-05-06 13:57 Liang Chen
  2022-05-06 16:54 ` kernel test robot
  2022-05-06 18:02 ` kernel test robot
  0 siblings, 2 replies; 4+ messages in thread
From: Liang Chen @ 2022-05-06 13:57 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: hch, jmoyer, jack, lczerner, Liang Chen

From: Liang Chen <liangchen.linux@gmail.com>

As pointed out in commit 332391a, mixing buffered reads and asynchronous
direct writes risks ending up with a situation where stale data is left
in page cache while new data is already written to disk. The same problem
hits block dev fs too. A similar approach needs to be taken here.

Signed-off-by: Liang Chen <liangchen.linux@gmail.com>
---
 block/fops.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/block/fops.c b/block/fops.c
index 9f2ecec406b0..8ab679814b9d 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -136,11 +136,56 @@ struct blkdev_dio {
 	size_t			size;
 	atomic_t		ref;
 	unsigned int		flags;
+	struct work_struct	complete_work;
 	struct bio		bio ____cacheline_aligned_in_smp;
 };
 
 static struct bio_set blkdev_dio_pool;
 
+static void blkdev_aio_complete_work(struct work_struct *work)
+{
+	struct blkdev_dio *dio = container_of(work, struct blkdev_dio, complete_work);
+	struct kiocb *iocb = dio->iocb;
+	int err;
+	struct inode *inode = bdev_file_inode(iocb->ki_filp);
+	loff_t offset = iocb->ki_pos;
+	ssize_t ret;
+
+	WRITE_ONCE(iocb->private, NULL);
+
+	if (likely(!dio->bio.bi_status)) {
+		ret = dio->size;
+		iocb->ki_pos += ret;
+	} else {
+		ret = blk_status_to_errno(dio->bio.bi_status);
+	}
+
+	/*
+	 * Try again to invalidate clean pages which might have been cached by
+	 * non-direct readahead, or faulted in by get_user_pages() if the source
+	 * of the write was an mmap'ed region of the file we're writing.  Either
+	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
+	 * this invalidation fails, tough, the write still worked...
+	 */
+	if (iocb->ki_flags & IOCB_WRITE && ret > 0 &&
+	    inode->i_mapping->nrpages) {
+		err = invalidate_inode_pages2_range(inode->i_mapping,
+				offset >> PAGE_SHIFT,
+				(offset + ret - 1) >> PAGE_SHIFT);
+		if (err)
+			dio_warn_stale_pagecache(iocb->ki_filp);
+	}
+
+	iocb->ki_complete(iocb, ret);
+
+	/*
+	 * For multi-bio dio dio->bio has an extra reference to ensure the
+	 * dio stays around. In the other case, an extra reference is taken
+	 * to make sure 
+	 */
+	bio_put(&dio->bio);
+}
+
 static void blkdev_bio_end_io(struct bio *bio)
 {
 	struct blkdev_dio *dio = bio->bi_private;
@@ -153,6 +198,14 @@ static void blkdev_bio_end_io(struct bio *bio)
 		if (!(dio->flags & DIO_IS_SYNC)) {
 			struct kiocb *iocb = dio->iocb;
 			ssize_t ret;
+			struct inode *inode = bdev_file_inode(iocb->ki_filp);
+
+			if (iocb->ki_flags & IOCB_WRITE){
+				INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
+				queue_work(inode->i_sb->s_dio_done_wq,
+						&dio->complete_work);
+				goto out;
+			}
 
 			WRITE_ONCE(iocb->private, NULL);
 
@@ -173,6 +226,7 @@ static void blkdev_bio_end_io(struct bio *bio)
 		}
 	}
 
+out:
 	if (should_dirty) {
 		bio_check_pages_dirty(bio);
 	} else {
@@ -284,6 +338,20 @@ static void blkdev_bio_end_io_async(struct bio *bio)
 	struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
 	struct kiocb *iocb = dio->iocb;
 	ssize_t ret;
+	struct inode *inode = bdev_file_inode(iocb->ki_filp);
+
+	if (iocb->ki_flags & IOCB_WRITE){
+		INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
+		/*
+		 * Grab an extra reference to ensure the dio structure
+		 * which the bio embeds in stays around for complete_work
+		 * to access.
+		 */
+		bio_get(bio);
+		queue_work(inode->i_sb->s_dio_done_wq,
+				&dio->complete_work);
+		goto out;
+	}
 
 	WRITE_ONCE(iocb->private, NULL);
 
@@ -296,6 +364,7 @@ static void blkdev_bio_end_io_async(struct bio *bio)
 
 	iocb->ki_complete(iocb, ret);
 
+out:
 	if (dio->flags & DIO_SHOULD_DIRTY) {
 		bio_check_pages_dirty(bio);
 	} else {
@@ -366,14 +435,37 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	return -EIOCBQUEUED;
 }
 
+int blkdev_sb_init_dio_done_wq(struct super_block *sb)
+{
+	struct workqueue_struct *old;
+	struct workqueue_struct *wq = alloc_workqueue("dio/%s",
+						     WQ_MEM_RECLAIM, 0,
+						     sb->s_id);
+	if (!wq)
+	       return -ENOMEM;
+	/*
+	 * This has to be atomic as more DIOs can race to create the workqueue
+	 */
+	old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
+	/* Someone created workqueue before us? Free ours... */
+	if (old)
+		destroy_workqueue(wq);
+       return 0;
+}
+
 static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
 	unsigned int nr_pages;
+	struct inode *inode = bdev_file_inode(iocb->ki_filp);
 
 	if (!iov_iter_count(iter))
 		return 0;
 
 	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+
+	if(!inode->i_sb->s_dio_done_wq && blkdev_sb_init_dio_done_wq(inode->i_sb))
+		return -ENOMEM;
+
 	if (likely(nr_pages <= BIO_MAX_VECS)) {
 		if (is_sync_kiocb(iocb))
 			return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
  2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
@ 2022-05-06 16:54 ` kernel test robot
  2022-05-06 18:02 ` kernel test robot
  1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 16:54 UTC (permalink / raw)
  To: Liang Chen, linux-fsdevel
  Cc: llvm, kbuild-all, hch, jmoyer, jack, lczerner, Liang Chen

Hi Liang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5 next-20220506]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: riscv-randconfig-r024-20220506 (https://download.01.org/0day-ci/archive/20220507/202205070030.wgFHZ8hk-lkp@intel.com/config)
compiler: clang version 15.0.0 (https://github.com/llvm/llvm-project 5e004fb787698440a387750db7f8028e7cb14cfc)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/2b2bda510e6c93a57581021c675b023d2f43e81e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
        git checkout 2b2bda510e6c93a57581021c675b023d2f43e81e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> block/fops.c:423:5: warning: no previous prototype for function 'blkdev_sb_init_dio_done_wq' [-Wmissing-prototypes]
   int blkdev_sb_init_dio_done_wq(struct super_block *sb)
       ^
   block/fops.c:423:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int blkdev_sb_init_dio_done_wq(struct super_block *sb)
   ^
   static 
   1 warning generated.


vim +/blkdev_sb_init_dio_done_wq +423 block/fops.c

   422	
 > 423	int blkdev_sb_init_dio_done_wq(struct super_block *sb)
   424	{
   425		struct workqueue_struct *old;
   426		struct workqueue_struct *wq = alloc_workqueue("dio/%s",
   427							     WQ_MEM_RECLAIM, 0,
   428							     sb->s_id);
   429		if (!wq)
   430		       return -ENOMEM;
   431		/*
   432		 * This has to be atomic as more DIOs can race to create the workqueue
   433		 */
   434		old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
   435		/* Someone created workqueue before us? Free ours... */
   436		if (old)
   437			destroy_workqueue(wq);
   438	       return 0;
   439	}
   440	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
  2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
  2022-05-06 16:54 ` kernel test robot
@ 2022-05-06 18:02 ` kernel test robot
  1 sibling, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 18:02 UTC (permalink / raw)
  To: Liang Chen, linux-fsdevel
  Cc: kbuild-all, hch, jmoyer, jack, lczerner, Liang Chen

Hi Liang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
config: arc-randconfig-r043-20220506 (https://download.01.org/0day-ci/archive/20220507/202205070158.7Io6lIxQ-lkp@intel.com/config)
compiler: arc-elf-gcc (GCC) 11.3.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/2b2bda510e6c93a57581021c675b023d2f43e81e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
        git checkout 2b2bda510e6c93a57581021c675b023d2f43e81e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=arc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> block/fops.c:423:5: warning: no previous prototype for 'blkdev_sb_init_dio_done_wq' [-Wmissing-prototypes]
     423 | int blkdev_sb_init_dio_done_wq(struct super_block *sb)
         |     ^~~~~~~~~~~~~~~~~~~~~~~~~~


vim +/blkdev_sb_init_dio_done_wq +423 block/fops.c

   422	
 > 423	int blkdev_sb_init_dio_done_wq(struct super_block *sb)
   424	{
   425		struct workqueue_struct *old;
   426		struct workqueue_struct *wq = alloc_workqueue("dio/%s",
   427							     WQ_MEM_RECLAIM, 0,
   428							     sb->s_id);
   429		if (!wq)
   430		       return -ENOMEM;
   431		/*
   432		 * This has to be atomic as more DIOs can race to create the workqueue
   433		 */
   434		old = cmpxchg(&sb->s_dio_done_wq, NULL, wq);
   435		/* Someone created workqueue before us? Free ours... */
   436		if (old)
   437			destroy_workqueue(wq);
   438	       return 0;
   439	}
   440	

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev
@ 2022-05-06 19:56 kernel test robot
  0 siblings, 0 replies; 4+ messages in thread
From: kernel test robot @ 2022-05-06 19:56 UTC (permalink / raw)
  To: kbuild

[-- Attachment #1: Type: text/plain, Size: 5294 bytes --]

CC: kbuild-all(a)lists.01.org
BCC: lkp(a)intel.com
In-Reply-To: <20220506135709.46872-1-lchen@localhost.localdomain>
References: <20220506135709.46872-1-lchen@localhost.localdomain>
TO: Liang Chen <liangchen.linux@gmail.com>
TO: linux-fsdevel(a)vger.kernel.org
CC: hch(a)infradead.org
CC: jmoyer(a)redhat.com
CC: jack(a)suse.cz
CC: lczerner(a)redhat.com
CC: Liang Chen <liangchen.linux@gmail.com>

Hi Liang,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on axboe-block/for-next]
[also build test WARNING on v5.18-rc5 next-20220506]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/intel-lab-lkp/linux/commits/Liang-Chen/fs-Fix-page-cache-inconsistency-when-mixing-buffered-and-AIO-DIO-for-bdev/20220506-215958
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
:::::: branch date: 6 hours ago
:::::: commit date: 6 hours ago
config: i386-randconfig-c001 (https://download.01.org/0day-ci/archive/20220507/202205070314.rXjpPg15-lkp(a)intel.com/config)
compiler: gcc-11 (Debian 11.2.0-20) 11.2.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>


cocci warnings: (new ones prefixed by >>)
>> block/fops.c:177:5-24: atomic_dec_and_test variation before object free at line 185.

vim +177 block/fops.c

2b2bda510e6c93 Liang Chen        2022-05-06  168  
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  169  static void blkdev_bio_end_io(struct bio *bio)
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  170  {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  171  	struct blkdev_dio *dio = bio->bi_private;
09ce8744253a03 Jens Axboe        2021-10-14  172  	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  173  
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  174  	if (bio->bi_status && !dio->bio.bi_status)
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  175  		dio->bio.bi_status = bio->bi_status;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  176  
e71aa913e26543 Pavel Begunkov    2021-10-27 @177  	if (atomic_dec_and_test(&dio->ref)) {
09ce8744253a03 Jens Axboe        2021-10-14  178  		if (!(dio->flags & DIO_IS_SYNC)) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  179  			struct kiocb *iocb = dio->iocb;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  180  			ssize_t ret;
2b2bda510e6c93 Liang Chen        2022-05-06  181  			struct inode *inode = bdev_file_inode(iocb->ki_filp);
2b2bda510e6c93 Liang Chen        2022-05-06  182  
2b2bda510e6c93 Liang Chen        2022-05-06  183  			if (iocb->ki_flags & IOCB_WRITE){
2b2bda510e6c93 Liang Chen        2022-05-06  184  				INIT_WORK(&dio->complete_work, blkdev_aio_complete_work);
2b2bda510e6c93 Liang Chen        2022-05-06 @185  				queue_work(inode->i_sb->s_dio_done_wq,
2b2bda510e6c93 Liang Chen        2022-05-06  186  						&dio->complete_work);
2b2bda510e6c93 Liang Chen        2022-05-06  187  				goto out;
2b2bda510e6c93 Liang Chen        2022-05-06  188  			}
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  189  
3e08773c3841e9 Christoph Hellwig 2021-10-12  190  			WRITE_ONCE(iocb->private, NULL);
3e08773c3841e9 Christoph Hellwig 2021-10-12  191  
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  192  			if (likely(!dio->bio.bi_status)) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  193  				ret = dio->size;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  194  				iocb->ki_pos += ret;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  195  			} else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  196  				ret = blk_status_to_errno(dio->bio.bi_status);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  197  			}
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  198  
6b19b766e8f077 Jens Axboe        2021-10-21  199  			dio->iocb->ki_complete(iocb, ret);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  200  			bio_put(&dio->bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  201  		} else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  202  			struct task_struct *waiter = dio->waiter;
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  203  
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  204  			WRITE_ONCE(dio->waiter, NULL);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  205  			blk_wake_io_task(waiter);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  206  		}
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  207  	}
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  208  
2b2bda510e6c93 Liang Chen        2022-05-06  209  out:
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  210  	if (should_dirty) {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  211  		bio_check_pages_dirty(bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  212  	} else {
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  213  		bio_release_pages(bio, false);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  214  		bio_put(bio);
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  215  	}
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  216  }
cd82cca7ebfe9c Christoph Hellwig 2021-09-07  217  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-05-06 19:56 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-06 13:57 [PATCH] fs: Fix page cache inconsistency when mixing buffered and AIO DIO for bdev Liang Chen
2022-05-06 16:54 ` kernel test robot
2022-05-06 18:02 ` kernel test robot
2022-05-06 19:56 kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.