linux-api.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
To: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>,
	linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org,
	linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH 16/17] ext4: Support for synchronous DAX faults
Date: Tue, 24 Oct 2017 17:24:13 +0200	[thread overview]
Message-ID: <20171024152415.22864-17-jack@suse.cz> (raw)
In-Reply-To: <20171024152415.22864-1-jack-AlSwsSmVLrQ@public.gmane.org>

We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to
prepare blocks for writing and the inode has some uncommitted metadata
changes. In the fault handler ext4_dax_fault() we then detect this case
(through VM_FAULT_NEEDDSYNC return value) and call helper
dax_finish_sync_fault() to flush metadata changes and insert page table
entry. Note that this will also dirty corresponding radix tree entry
which is what we want - fsync(2) will still provide data integrity
guarantees for applications not using userspace flushing. And
applications using userspace flushing can avoid calling fsync(2) and
thus avoid the performance overhead.

Reviewed-by: Ross Zwisler <ross.zwisler-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
 fs/ext4/file.c       | 15 ++++++++++++++-
 fs/ext4/inode.c      | 15 +++++++++++++++
 fs/jbd2/journal.c    | 17 +++++++++++++++++
 include/linux/jbd2.h |  1 +
 4 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 208adfc3e673..08a1d1a33a90 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -26,6 +26,7 @@
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
 #include <linux/uio.h>
+#include <linux/mman.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -295,6 +296,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 	 */
 	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
 		(vmf->vma->vm_flags & VM_SHARED);
+	pfn_t pfn;
 
 	if (write) {
 		sb_start_pagefault(sb);
@@ -310,9 +312,12 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
 	} else {
 		down_read(&EXT4_I(inode)->i_mmap_sem);
 	}
-	result = dax_iomap_fault(vmf, pe_size, NULL, &ext4_iomap_ops);
+	result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
 	if (write) {
 		ext4_journal_stop(handle);
+		/* Handling synchronous page fault? */
+		if (result & VM_FAULT_NEEDDSYNC)
+			result = dax_finish_sync_fault(vmf, pe_size, pfn);
 		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
 	} else {
@@ -350,6 +355,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
 		return -EIO;
 
+	/*
+	 * We don't support synchronous mappings for non-DAX files. At least
+	 * until someone comes with a sensible use case.
+	 */
+	if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
+		return -EOPNOTSUPP;
+
 	file_accessed(file);
 	if (IS_DAX(file_inode(file))) {
 		vma->vm_ops = &ext4_dax_vm_ops;
@@ -719,6 +731,7 @@ const struct file_operations ext4_file_operations = {
 	.compat_ioctl	= ext4_compat_ioctl,
 #endif
 	.mmap		= ext4_file_mmap,
+	.mmap_supported_flags = MAP_SYNC,
 	.open		= ext4_file_open,
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 31db875bc7a1..13a198924a0f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3394,6 +3394,19 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 }
 
 #ifdef CONFIG_FS_DAX
+static bool ext4_inode_datasync_dirty(struct inode *inode)
+{
+	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+	if (journal)
+		return !jbd2_transaction_committed(journal,
+					EXT4_I(inode)->i_datasync_tid);
+	/* Any metadata buffers to write? */
+	if (!list_empty(&inode->i_mapping->private_list))
+		return true;
+	return inode->i_state & I_DIRTY_DATASYNC;
+}
+
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			    unsigned flags, struct iomap *iomap)
 {
@@ -3466,6 +3479,8 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	}
 
 	iomap->flags = 0;
+	if ((flags & IOMAP_WRITE) && ext4_inode_datasync_dirty(inode))
+		iomap->flags |= IOMAP_F_DIRTY;
 	iomap->bdev = inode->i_sb->s_bdev;
 	iomap->dax_dev = sbi->s_daxdev;
 	iomap->offset = first_block << blkbits;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 7d5ef3bf3f3e..fa8cde498b4b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -738,6 +738,23 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 	return err;
 }
 
+/* Return 1 when transaction with given tid has already committed. */
+int jbd2_transaction_committed(journal_t *journal, tid_t tid)
+{
+	int ret = 1;
+
+	read_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction &&
+	    journal->j_running_transaction->t_tid == tid)
+		ret = 0;
+	if (journal->j_committing_transaction &&
+	    journal->j_committing_transaction->t_tid == tid)
+		ret = 0;
+	read_unlock(&journal->j_state_lock);
+	return ret;
+}
+EXPORT_SYMBOL(jbd2_transaction_committed);
+
 /*
  * When this function returns the transaction corresponding to tid
  * will be completed.  If the transaction has currently running, start
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 606b6bce3a5b..296d1e0ea87b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid);
 int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
 int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
 int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
+int jbd2_transaction_committed(journal_t *journal, tid_t tid);
 int jbd2_complete_transaction(journal_t *journal, tid_t tid);
 int jbd2_log_do_checkpoint(journal_t *journal);
 int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
-- 
2.12.3

  parent reply	other threads:[~2017-10-24 15:24 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-24 15:23 [PATCH 0/17 v5] dax, ext4, xfs: Synchronous page faults Jan Kara
     [not found] ` <20171024152415.22864-1-jack-AlSwsSmVLrQ@public.gmane.org>
2017-10-24 15:23   ` [PATCH 01/17] mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags Jan Kara
     [not found]     ` <20171024152415.22864-2-jack-AlSwsSmVLrQ@public.gmane.org>
2017-10-24 21:21       ` Ross Zwisler
2017-10-24 15:23   ` [PATCH 02/17] mm: Remove VM_FAULT_HWPOISON_LARGE_MASK Jan Kara
2017-10-24 15:24   ` [PATCH 03/17] dax: Simplify arguments of dax_insert_mapping() Jan Kara
2017-10-24 15:24   ` [PATCH 05/17] dax: Create local variable for VMA in dax_iomap_pte_fault() Jan Kara
2017-10-24 15:24   ` [PATCH 06/17] dax: Create local variable for vmf->flags & FAULT_FLAG_WRITE test Jan Kara
2017-10-24 15:24   ` [PATCH 07/17] dax: Inline dax_insert_mapping() into the callsite Jan Kara
2017-10-24 15:24   ` [PATCH 08/17] dax: Inline dax_pmd_insert_mapping() " Jan Kara
2017-10-24 15:24   ` [PATCH 09/17] dax: Fix comment describing dax_iomap_fault() Jan Kara
2017-10-24 15:24   ` [PATCH 10/17] dax: Allow dax_iomap_fault() to return pfn Jan Kara
2017-10-24 15:24   ` [PATCH 12/17] mm: Define MAP_SYNC and VM_SYNC flags Jan Kara
2017-10-24 15:24   ` [PATCH 13/17] dax, iomap: Add support for synchronous faults Jan Kara
2017-10-24 15:24   ` [PATCH 15/17] ext4: Simplify error handling in ext4_dax_huge_fault() Jan Kara
2017-10-24 15:24   ` Jan Kara [this message]
2017-10-24 15:24   ` [PATCH] mmap.2: Add description of MAP_SHARED_VALIDATE and MAP_SYNC Jan Kara
2017-10-24 21:10     ` Ross Zwisler
2017-10-26 13:24       ` Jan Kara
     [not found]         ` <20171026132402.GB31161-4I4JzKEfoa/jFM9bn6wA6Q@public.gmane.org>
2017-10-26 18:22           ` Ross Zwisler
2017-10-24 15:24 ` [PATCH 04/17] dax: Factor out getting of pfn out of iomap Jan Kara
2017-10-24 15:24 ` [PATCH 11/17] dax: Allow tuning whether dax_insert_mapping_entry() dirties entry Jan Kara
2017-10-24 15:24 ` [PATCH 14/17] dax: Implement dax_finish_sync_fault() Jan Kara
2017-10-24 15:24 ` [PATCH 17/17] xfs: support for synchronous DAX faults Jan Kara
2017-10-24 21:29   ` Ross Zwisler
2017-10-24 22:23   ` Dave Chinner
2017-10-26 15:48     ` Jan Kara
     [not found]       ` <20171026154804.GF31161-4I4JzKEfoa/jFM9bn6wA6Q@public.gmane.org>
2017-10-26 21:16         ` Dave Chinner
2017-10-27 10:08           ` Jan Kara
     [not found]             ` <20171027100834.GH31161-4I4JzKEfoa/jFM9bn6wA6Q@public.gmane.org>
2017-10-31 15:19               ` Jan Kara
     [not found]                 ` <20171031151907.GB26128-4I4JzKEfoa/jFM9bn6wA6Q@public.gmane.org>
2017-10-31 21:50                   ` Dan Williams
     [not found]                     ` <CAPcyv4gBVyZ8KhB2s1M0BdhC1=HAean6Mb6oV7zsJBx0-t3bhw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-11-01  3:47                       ` Ross Zwisler
2017-10-27  6:43       ` Christoph Hellwig
     [not found]         ` <20171027064301.GC22931-jcswGhMUV9g@public.gmane.org>
2017-10-27  9:13           ` Jan Kara
  -- strict thread matches above, loose matches on Subject: below --
2017-10-19 12:57 [PATCH 0/17 v4] dax, ext4, xfs: Synchronous page faults Jan Kara
     [not found] ` <20171019125817.11580-1-jack-AlSwsSmVLrQ@public.gmane.org>
2017-10-19 12:58   ` [PATCH 16/17] ext4: Support for synchronous DAX faults Jan Kara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171024152415.22864-17-jack@suse.cz \
    --to=jack-alswssmvlrq@public.gmane.org \
    --cc=dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org \
    --cc=linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org \
    --cc=linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org \
    --cc=linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).