All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vishal Verma <vishal.l.verma@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Jens Axboe <axboe@fb.com>, Jan Kara <jack@suse.cz>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Chinner <david@fromorbit.com>,
	xfs@oss.sgi.com, linux-block@vger.kernel.org, linux-mm@kvack.org,
	Matthew Wilcox <matthew.r.wilcox@intel.com>,
	linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
	Al Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH 5/5] dax: handle media errors in dax_do_io
Date: Thu, 24 Mar 2016 17:17:30 -0600	[thread overview]
Message-ID: <1458861450-17705-6-git-send-email-vishal.l.verma@intel.com> (raw)
In-Reply-To: <1458861450-17705-1-git-send-email-vishal.l.verma@intel.com>

dax_do_io (called for read() or write() for a dax file system) may fail
in the presence of bad blocks or media errors. Since we expect that a
write should clear media errors on nvdimms, make dax_do_io fall back to
the direct_IO path, which will send down a bio to the driver, which can
then attempt to clear the error.

Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 fs/block_dev.c      |  5 +++--
 fs/dax.c            | 34 ++++++++++++++++++++++++++++++++--
 fs/ext2/inode.c     |  5 +++--
 fs/ext4/indirect.c  | 11 +++++++----
 fs/ext4/inode.c     |  5 +++--
 fs/xfs/xfs_aops.c   |  7 ++++---
 include/linux/dax.h |  6 +++++-
 7 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9c0765b..f3873ab 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -168,8 +168,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	struct inode *inode = bdev_file_inode(file);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
-				NULL, DIO_SKIP_DIO_COUNT);
+		return dax_do_io(iocb, inode, I_BDEV(inode), iter, offset,
+				blkdev_get_block, blkdev_get_block,
+				NULL, NULL, DIO_SKIP_DIO_COUNT);
 	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
 				    blkdev_get_block, NULL, NULL,
 				    DIO_SKIP_DIO_COUNT);
diff --git a/fs/dax.c b/fs/dax.c
index a30481e..b90c8e9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 }
 
 /**
- * dax_do_io - Perform I/O to a DAX file
+ * __dax_do_io - Perform I/O to a DAX file
  * @iocb: The control block for this I/O
  * @inode: The file which the I/O is directed at
  * @iter: The addresses to do I/O from or to
@@ -224,7 +224,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
  * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
  * is in progress.
  */
-ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+ssize_t __dax_do_io(struct kiocb *iocb, struct inode *inode,
 		  struct iov_iter *iter, loff_t pos, get_block_t get_block,
 		  dio_iodone_t end_io, int flags)
 {
@@ -262,8 +262,38 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
  out:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(__dax_do_io);
+
+/*
+ * This is a library function for use by file systems. It will perform a
+ * fallback to direct_io semantics if the dax_io fails due to a media error.
+ */
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags)
+{
+	ssize_t retval;
+
+	retval = __dax_do_io(iocb, inode, iter, pos, dax_get_block, end_io,
+				flags);
+	if (iov_iter_rw(iter) == WRITE && retval == -EIO) {
+		/*
+		 * __dax_do_io may have failed a write due to a bad block.
+		 * Retry with direct_io, and if the direct_IO also fails,
+		 * return -EIO as that was the original error that led us
+		 * down the direct_IO path.
+		 */
+		retval = __blockdev_direct_IO(iocb, inode, bdev, iter, pos,
+				dio_get_block, end_io, submit_io, flags);
+		if (retval < 0)
+			return -EIO;
+	}
+	return retval;
+}
 EXPORT_SYMBOL_GPL(dax_do_io);
 
+
 /*
  * The user has performed a load from a hole in the file.  Allocating
  * a new page in the file would cause excessive storage usage for
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 824f249..8a307cf 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -862,8 +862,9 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	ssize_t ret;
 
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
-				DIO_LOCKING);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+				offset, ext2_get_block, ext2_get_block,
+				NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
 	else
 		ret = blockdev_direct_IO(iocb, inode, iter, offset,
 					 ext2_get_block);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 355ef9c..4b087b7 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -692,8 +692,9 @@ retry:
 			goto locked;
 		}
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, 0);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, 0);
 		else
 			ret = __blockdev_direct_IO(iocb, inode,
 						   inode->i_sb->s_bdev, iter,
@@ -703,8 +704,10 @@ retry:
 	} else {
 locked:
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, DIO_LOCKING);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, DIO_LOCKING |
+					DIO_SKIP_HOLES);
 		else
 			ret = blockdev_direct_IO(iocb, inode, iter, offset,
 						 ext4_get_block);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aee960b..4220dac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3315,8 +3315,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
 #endif
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
-				ext4_end_io_dio, dio_flags);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter, offset,
+				get_block_func, get_block_func,
+				ext4_end_io_dio, NULL, dio_flags);
 	else
 		ret = __blockdev_direct_IO(iocb, inode,
 					   inode->i_sb->s_bdev, iter, offset,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a9ebabfe..dc4e088 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1682,11 +1682,12 @@ xfs_vm_do_dio(
 					 void		*private),
 	int			flags)
 {
-	struct block_device	*bdev;
+	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset,
-				 xfs_get_blocks_direct, endio, 0);
+		return dax_do_io(iocb, inode, bdev, iter, offset,
+				 xfs_get_blocks_direct, xfs_get_blocks_direct,
+				 endio, NULL, flags);
 
 	bdev = xfs_find_bdev_for_inode(inode);
 	return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 933198a..6981076 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -5,8 +5,12 @@
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
+ssize_t __dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 		  get_block_t, dio_iodone_t, int flags);
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
-- 
2.5.5

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Vishal Verma <vishal.l.verma@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Vishal Verma <vishal.l.verma@intel.com>,
	linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
	xfs@oss.sgi.com, linux-ext4@vger.kernel.org, linux-mm@kvack.org,
	Matthew Wilcox <matthew.r.wilcox@intel.com>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.cz>,
	Jens Axboe <axboe@fb.com>, Al Viro <viro@zeniv.linux.org.uk>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH 5/5] dax: handle media errors in dax_do_io
Date: Thu, 24 Mar 2016 17:17:30 -0600	[thread overview]
Message-ID: <1458861450-17705-6-git-send-email-vishal.l.verma@intel.com> (raw)
In-Reply-To: <1458861450-17705-1-git-send-email-vishal.l.verma@intel.com>

dax_do_io (called for read() or write() for a dax file system) may fail
in the presence of bad blocks or media errors. Since we expect that a
write should clear media errors on nvdimms, make dax_do_io fall back to
the direct_IO path, which will send down a bio to the driver, which can
then attempt to clear the error.

Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 fs/block_dev.c      |  5 +++--
 fs/dax.c            | 34 ++++++++++++++++++++++++++++++++--
 fs/ext2/inode.c     |  5 +++--
 fs/ext4/indirect.c  | 11 +++++++----
 fs/ext4/inode.c     |  5 +++--
 fs/xfs/xfs_aops.c   |  7 ++++---
 include/linux/dax.h |  6 +++++-
 7 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9c0765b..f3873ab 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -168,8 +168,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	struct inode *inode = bdev_file_inode(file);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
-				NULL, DIO_SKIP_DIO_COUNT);
+		return dax_do_io(iocb, inode, I_BDEV(inode), iter, offset,
+				blkdev_get_block, blkdev_get_block,
+				NULL, NULL, DIO_SKIP_DIO_COUNT);
 	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
 				    blkdev_get_block, NULL, NULL,
 				    DIO_SKIP_DIO_COUNT);
diff --git a/fs/dax.c b/fs/dax.c
index a30481e..b90c8e9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 }
 
 /**
- * dax_do_io - Perform I/O to a DAX file
+ * __dax_do_io - Perform I/O to a DAX file
  * @iocb: The control block for this I/O
  * @inode: The file which the I/O is directed at
  * @iter: The addresses to do I/O from or to
@@ -224,7 +224,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
  * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
  * is in progress.
  */
-ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+ssize_t __dax_do_io(struct kiocb *iocb, struct inode *inode,
 		  struct iov_iter *iter, loff_t pos, get_block_t get_block,
 		  dio_iodone_t end_io, int flags)
 {
@@ -262,8 +262,38 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
  out:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(__dax_do_io);
+
+/*
+ * This is a library function for use by file systems. It will perform a
+ * fallback to direct_io semantics if the dax_io fails due to a media error.
+ */
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags)
+{
+	ssize_t retval;
+
+	retval = __dax_do_io(iocb, inode, iter, pos, dax_get_block, end_io,
+				flags);
+	if (iov_iter_rw(iter) == WRITE && retval == -EIO) {
+		/*
+		 * __dax_do_io may have failed a write due to a bad block.
+		 * Retry with direct_io, and if the direct_IO also fails,
+		 * return -EIO as that was the original error that led us
+		 * down the direct_IO path.
+		 */
+		retval = __blockdev_direct_IO(iocb, inode, bdev, iter, pos,
+				dio_get_block, end_io, submit_io, flags);
+		if (retval < 0)
+			return -EIO;
+	}
+	return retval;
+}
 EXPORT_SYMBOL_GPL(dax_do_io);
 
+
 /*
  * The user has performed a load from a hole in the file.  Allocating
  * a new page in the file would cause excessive storage usage for
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 824f249..8a307cf 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -862,8 +862,9 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	ssize_t ret;
 
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
-				DIO_LOCKING);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+				offset, ext2_get_block, ext2_get_block,
+				NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
 	else
 		ret = blockdev_direct_IO(iocb, inode, iter, offset,
 					 ext2_get_block);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 355ef9c..4b087b7 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -692,8 +692,9 @@ retry:
 			goto locked;
 		}
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, 0);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, 0);
 		else
 			ret = __blockdev_direct_IO(iocb, inode,
 						   inode->i_sb->s_bdev, iter,
@@ -703,8 +704,10 @@ retry:
 	} else {
 locked:
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, DIO_LOCKING);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, DIO_LOCKING |
+					DIO_SKIP_HOLES);
 		else
 			ret = blockdev_direct_IO(iocb, inode, iter, offset,
 						 ext4_get_block);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aee960b..4220dac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3315,8 +3315,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
 #endif
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
-				ext4_end_io_dio, dio_flags);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter, offset,
+				get_block_func, get_block_func,
+				ext4_end_io_dio, NULL, dio_flags);
 	else
 		ret = __blockdev_direct_IO(iocb, inode,
 					   inode->i_sb->s_bdev, iter, offset,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a9ebabfe..dc4e088 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1682,11 +1682,12 @@ xfs_vm_do_dio(
 					 void		*private),
 	int			flags)
 {
-	struct block_device	*bdev;
+	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset,
-				 xfs_get_blocks_direct, endio, 0);
+		return dax_do_io(iocb, inode, bdev, iter, offset,
+				 xfs_get_blocks_direct, xfs_get_blocks_direct,
+				 endio, NULL, flags);
 
 	bdev = xfs_find_bdev_for_inode(inode);
 	return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 933198a..6981076 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -5,8 +5,12 @@
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
+ssize_t __dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 		  get_block_t, dio_iodone_t, int flags);
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
-- 
2.5.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)
From: Vishal Verma <vishal.l.verma@intel.com>
To: linux-nvdimm@lists.01.org
Cc: Jens Axboe <axboe@fb.com>, Jan Kara <jack@suse.cz>,
	Andrew Morton <akpm@linux-foundation.org>,
	Vishal Verma <vishal.l.verma@intel.com>,
	xfs@oss.sgi.com, linux-block@vger.kernel.org, linux-mm@kvack.org,
	Matthew Wilcox <matthew.r.wilcox@intel.com>,
	linux-fsdevel@vger.kernel.org,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	linux-ext4@vger.kernel.org,
	Dan Williams <dan.j.williams@intel.com>,
	Al Viro <viro@zeniv.linux.org.uk>
Subject: [PATCH 5/5] dax: handle media errors in dax_do_io
Date: Thu, 24 Mar 2016 17:17:30 -0600	[thread overview]
Message-ID: <1458861450-17705-6-git-send-email-vishal.l.verma@intel.com> (raw)
In-Reply-To: <1458861450-17705-1-git-send-email-vishal.l.verma@intel.com>

dax_do_io (called for read() or write() for a dax file system) may fail
in the presence of bad blocks or media errors. Since we expect that a
write should clear media errors on nvdimms, make dax_do_io fall back to
the direct_IO path, which will send down a bio to the driver, which can
then attempt to clear the error.

Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 fs/block_dev.c      |  5 +++--
 fs/dax.c            | 34 ++++++++++++++++++++++++++++++++--
 fs/ext2/inode.c     |  5 +++--
 fs/ext4/indirect.c  | 11 +++++++----
 fs/ext4/inode.c     |  5 +++--
 fs/xfs/xfs_aops.c   |  7 ++++---
 include/linux/dax.h |  6 +++++-
 7 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9c0765b..f3873ab 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -168,8 +168,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	struct inode *inode = bdev_file_inode(file);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
-				NULL, DIO_SKIP_DIO_COUNT);
+		return dax_do_io(iocb, inode, I_BDEV(inode), iter, offset,
+				blkdev_get_block, blkdev_get_block,
+				NULL, NULL, DIO_SKIP_DIO_COUNT);
 	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
 				    blkdev_get_block, NULL, NULL,
 				    DIO_SKIP_DIO_COUNT);
diff --git a/fs/dax.c b/fs/dax.c
index a30481e..b90c8e9 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -208,7 +208,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
 }
 
 /**
- * dax_do_io - Perform I/O to a DAX file
+ * __dax_do_io - Perform I/O to a DAX file
  * @iocb: The control block for this I/O
  * @inode: The file which the I/O is directed at
  * @iter: The addresses to do I/O from or to
@@ -224,7 +224,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
  * As with do_blockdev_direct_IO(), we increment i_dio_count while the I/O
  * is in progress.
  */
-ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+ssize_t __dax_do_io(struct kiocb *iocb, struct inode *inode,
 		  struct iov_iter *iter, loff_t pos, get_block_t get_block,
 		  dio_iodone_t end_io, int flags)
 {
@@ -262,8 +262,38 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
  out:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(__dax_do_io);
+
+/*
+ * This is a library function for use by file systems. It will perform a
+ * fallback to direct_io semantics if the dax_io fails due to a media error.
+ */
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags)
+{
+	ssize_t retval;
+
+	retval = __dax_do_io(iocb, inode, iter, pos, dax_get_block, end_io,
+				flags);
+	if (iov_iter_rw(iter) == WRITE && retval == -EIO) {
+		/*
+		 * __dax_do_io may have failed a write due to a bad block.
+		 * Retry with direct_io, and if the direct_IO also fails,
+		 * return -EIO as that was the original error that led us
+		 * down the direct_IO path.
+		 */
+		retval = __blockdev_direct_IO(iocb, inode, bdev, iter, pos,
+				dio_get_block, end_io, submit_io, flags);
+		if (retval < 0)
+			return -EIO;
+	}
+	return retval;
+}
 EXPORT_SYMBOL_GPL(dax_do_io);
 
+
 /*
  * The user has performed a load from a hole in the file.  Allocating
  * a new page in the file would cause excessive storage usage for
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 824f249..8a307cf 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -862,8 +862,9 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	ssize_t ret;
 
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, ext2_get_block, NULL,
-				DIO_LOCKING);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+				offset, ext2_get_block, ext2_get_block,
+				NULL, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
 	else
 		ret = blockdev_direct_IO(iocb, inode, iter, offset,
 					 ext2_get_block);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 355ef9c..4b087b7 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -692,8 +692,9 @@ retry:
 			goto locked;
 		}
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, 0);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, 0);
 		else
 			ret = __blockdev_direct_IO(iocb, inode,
 						   inode->i_sb->s_bdev, iter,
@@ -703,8 +704,10 @@ retry:
 	} else {
 locked:
 		if (IS_DAX(inode))
-			ret = dax_do_io(iocb, inode, iter, offset,
-					ext4_get_block, NULL, DIO_LOCKING);
+			ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter,
+					offset, ext4_get_block, ext4_get_block,
+					NULL, NULL, DIO_LOCKING |
+					DIO_SKIP_HOLES);
 		else
 			ret = blockdev_direct_IO(iocb, inode, iter, offset,
 						 ext4_get_block);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aee960b..4220dac 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3315,8 +3315,9 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
 #endif
 	if (IS_DAX(inode))
-		ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
-				ext4_end_io_dio, dio_flags);
+		ret = dax_do_io(iocb, inode, inode->i_sb->s_bdev, iter, offset,
+				get_block_func, get_block_func,
+				ext4_end_io_dio, NULL, dio_flags);
 	else
 		ret = __blockdev_direct_IO(iocb, inode,
 					   inode->i_sb->s_bdev, iter, offset,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a9ebabfe..dc4e088 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1682,11 +1682,12 @@ xfs_vm_do_dio(
 					 void		*private),
 	int			flags)
 {
-	struct block_device	*bdev;
+	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
 
 	if (IS_DAX(inode))
-		return dax_do_io(iocb, inode, iter, offset,
-				 xfs_get_blocks_direct, endio, 0);
+		return dax_do_io(iocb, inode, bdev, iter, offset,
+				 xfs_get_blocks_direct, xfs_get_blocks_direct,
+				 endio, NULL, flags);
 
 	bdev = xfs_find_bdev_for_inode(inode);
 	return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 933198a..6981076 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -5,8 +5,12 @@
 #include <linux/mm.h>
 #include <asm/pgtable.h>
 
-ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
+ssize_t __dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
 		  get_block_t, dio_iodone_t, int flags);
+ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
+		  struct block_device *bdev, struct iov_iter *iter, loff_t pos,
+		  get_block_t dax_get_block, get_block_t dio_get_block,
+		  dio_iodone_t end_io, dio_submit_t submit_io, int flags);
 int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
 int dax_truncate_page(struct inode *, loff_t from, get_block_t);
 int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
-- 
2.5.5

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

  parent reply	other threads:[~2016-03-24 23:18 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-24 23:17 [PATCH 0/5] dax: handling of media errors Vishal Verma
2016-03-24 23:17 ` Vishal Verma
2016-03-24 23:17 ` Vishal Verma
2016-03-24 23:17 ` Vishal Verma
2016-03-24 23:17 ` [PATCH 1/5] block, dax: pass blk_dax_ctl through to drivers Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:17 ` [PATCH 2/5] dax: fallback from pmd to pte on error Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:17 ` [PATCH 3/5] dax: enable dax in the presence of known media errors (badblocks) Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:23   ` Verma, Vishal L
2016-03-24 23:23     ` Verma, Vishal L
2016-03-24 23:23     ` Verma, Vishal L
2016-03-24 23:17 ` [PATCH 4/5] dax: use sb_issue_zerout instead of calling dax_clear_sectors Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-25 10:44   ` Christoph Hellwig
2016-03-25 10:44     ` Christoph Hellwig
2016-03-25 21:01     ` Verma, Vishal L
2016-03-25 21:01       ` Verma, Vishal L
2016-03-25 18:47   ` Dan Williams
2016-03-25 18:47     ` Dan Williams
2016-03-25 18:47     ` Dan Williams
2016-03-25 21:03     ` Verma, Vishal L
2016-03-25 21:03       ` Verma, Vishal L
2016-03-25 21:03       ` Verma, Vishal L
2016-03-25 21:20       ` Dan Williams
2016-03-25 21:20         ` Dan Williams
2016-03-25 21:20         ` Dan Williams
2016-03-28 20:01         ` Verma, Vishal L
2016-03-28 20:01           ` Verma, Vishal L
2016-03-28 20:01           ` Verma, Vishal L
2016-03-28 23:34           ` Dan Williams
2016-03-28 23:34             ` Dan Williams
2016-03-28 23:34             ` Dan Williams
2016-03-28 23:34             ` Dan Williams
2016-03-29 18:57             ` Verma, Vishal L
2016-03-29 18:57               ` Verma, Vishal L
2016-03-29 18:57               ` Verma, Vishal L
2016-03-29 18:57               ` Verma, Vishal L
2016-03-29 19:37               ` Dan Williams
2016-03-29 19:37                 ` Dan Williams
2016-03-29 19:37                 ` Dan Williams
2016-03-29 19:37                 ` Dan Williams
2016-03-30  7:49               ` Jan Kara
2016-03-30  7:49                 ` Jan Kara
2016-03-30  7:49                 ` Jan Kara
2016-03-30  7:49                 ` Jan Kara
2016-03-30  7:49                 ` Jan Kara
2016-04-01 19:17                 ` Verma, Vishal L
2016-04-01 19:17                   ` Verma, Vishal L
2016-04-01 19:17                   ` Verma, Vishal L
2016-04-04 12:09                   ` Jan Kara
2016-04-04 12:09                     ` Jan Kara
2016-04-04 12:09                     ` Jan Kara
2016-04-04 12:09                     ` Jan Kara
2016-04-04 12:09                     ` Jan Kara
2016-03-24 23:17 ` Vishal Verma [this message]
2016-03-24 23:17   ` [PATCH 5/5] dax: handle media errors in dax_do_io Vishal Verma
2016-03-24 23:17   ` Vishal Verma
2016-03-25 10:45   ` Christoph Hellwig
2016-03-25 10:45     ` Christoph Hellwig
2016-03-25 10:45     ` Christoph Hellwig
2016-03-25 20:59     ` Verma, Vishal L
2016-03-25 20:59       ` Verma, Vishal L
2016-03-25 21:42       ` Dan Williams
2016-03-25 21:42         ` Dan Williams
2016-03-25 22:36         ` Verma, Vishal L
2016-03-25 22:36           ` Verma, Vishal L
2016-03-25 22:36           ` Verma, Vishal L
2016-03-26 16:53         ` hch
2016-03-26 16:53           ` hch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1458861450-17705-6-git-send-email-vishal.l.verma@intel.com \
    --to=vishal.l.verma@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@fb.com \
    --cc=david@fromorbit.com \
    --cc=jack@suse.cz \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=matthew.r.wilcox@intel.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.