All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: linux-fsdevel@vger.kernel.org, Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Matthew Wilcox <willy@linux.intel.com>
Subject: [PATCH 3/5] block: Add support for DAX on block devices
Date: Mon, 29 Jun 2015 16:02:30 -0400	[thread overview]
Message-ID: <1435608152-6982-4-git-send-email-matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1435608152-6982-1-git-send-email-matthew.r.wilcox@intel.com>

From: Matthew Wilcox <willy@linux.intel.com>

Without this patch, accesses to a file on a filesystem on a block device
could be done without the page cache, but accessing the block device
itself would always go through the page cache.

Now reads and writes to a block device that is capable of DAX will always
bypass the page cache.  Loads and stores to an mmapped block device will
bypass the page cache if the user specified O_DIRECT.  This opt-in from
the user is necessary because DAX mappings are currently incompatible
with RDMA and O_DIRECT I/Os with non-DAX files.

Include support for the DIO_SKIP_DIO_COUNT flag in DAX, which is only
used by the block device driver.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 fs/block_dev.c | 38 ++++++++++++++++++++++++++++++++++++--
 fs/dax.c       |  6 ++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index f04c873..e3fab8c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -152,6 +152,9 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
+	if (IS_DAX(inode))
+		return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
+				 NULL, DIO_SKIP_DIO_COUNT);
 	return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
 				    blkdev_get_block, NULL, NULL,
 				    DIO_SKIP_DIO_COUNT);
@@ -333,7 +336,37 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
 	mutex_unlock(&bd_inode->i_mutex);
 	return retval;
 }
-	
+
+#ifdef CONFIG_FS_DAX
+static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return dax_fault(vma, vmf, blkdev_get_block);
+}
+
+static int blkdev_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	return dax_mkwrite(vma, vmf, blkdev_get_block);
+}
+
+static const struct vm_operations_struct blkdev_dax_vm_ops = {
+	.fault		= blkdev_dax_fault,
+	.page_mkwrite	= blkdev_dax_mkwrite,
+};
+
+static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if ((IS_DAX(file->f_mapping->host)) && (file->f_flags & O_DIRECT)) {
+		file_accessed(file);
+		vma->vm_ops = &blkdev_dax_vm_ops;
+		vma->vm_flags |= VM_MIXEDMAP;
+		return 0;
+	}
+	return generic_file_mmap(file, vma);
+}
+#else
+#define blkdev_mmap	generic_file_mmap
+#endif
+
 int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 {
 	struct inode *bd_inode = filp->f_mapping->host;
@@ -1170,6 +1203,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 		bdev->bd_disk = disk;
 		bdev->bd_queue = disk->queue;
 		bdev->bd_contains = bdev;
+		bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
 		if (!partno) {
 			ret = -ENXIO;
 			bdev->bd_part = disk_get_part(disk, partno);
@@ -1670,7 +1704,7 @@ const struct file_operations def_blk_fops = {
 	.llseek		= block_llseek,
 	.read_iter	= blkdev_read_iter,
 	.write_iter	= blkdev_write_iter,
-	.mmap		= generic_file_mmap,
+	.mmap		= blkdev_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/fs/dax.c b/fs/dax.c
index 159f796..37a0c48 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -209,7 +209,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	}
 
 	/* Protects against truncate */
-	inode_dio_begin(inode);
+	if (!(flags & DIO_SKIP_DIO_COUNT))
+		inode_dio_begin(inode);
 
 	retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
@@ -219,7 +220,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
 	if ((retval > 0) && end_io)
 		end_io(iocb, pos, retval, bh.b_private);
 
-	inode_dio_end(inode);
+	if (!(flags & DIO_SKIP_DIO_COUNT))
+		inode_dio_end(inode);
  out:
 	return retval;
 }
-- 
2.1.4


  parent reply	other threads:[~2015-06-29 20:02 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-29 20:02 [PATCH 0/5] DAX updates for 4.2 Matthew Wilcox
2015-06-29 20:02 ` [PATCH 1/5] dax: Add block size note to documentation Matthew Wilcox
2015-06-29 20:02 ` [PATCH 2/5] dax: Use copy_from_iter_nocache Matthew Wilcox
2015-06-29 20:02 ` Matthew Wilcox [this message]
2015-06-30 11:19   ` [PATCH 3/5] block: Add support for DAX on block devices Christoph Hellwig
2015-06-30 19:56     ` Matthew Wilcox
2015-07-01  7:19       ` Christoph Hellwig
2015-06-29 20:02 ` [PATCH 4/5] ext4: Use ext4_get_block_write() for DAX Matthew Wilcox
2015-06-29 20:02 ` [PATCH 5/5] vfs: Allow truncate, chomd and chown to be interrupted by fatal signals Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1435608152-6982-4-git-send-email-matthew.r.wilcox@intel.com \
    --to=matthew.r.wilcox@intel.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.