All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: linux-fsdevel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>, Jan Kara <jack@suse.cz>,
	linux-nvdimm@lists.01.org, Dave Chinner <david@fromorbit.com>,
	linux-xfs@vger.kernel.org, Andy Lutomirski <luto@kernel.org>,
	linux-ext4@vger.kernel.org
Subject: [PATCH 5/7] dax, iomap: Add support for synchronous faults
Date: Thu, 27 Jul 2017 15:12:43 +0200	[thread overview]
Message-ID: <20170727131245.28279-6-jack@suse.cz> (raw)
In-Reply-To: <20170727131245.28279-1-jack@suse.cz>

Add a flag to iomap interface informing the caller that inode needs
fdstasync(2) for returned extent to become persistent and use it in DAX
fault code so that we map such extents only read only. We propagate the
information that the page table entry has been inserted write-protected
from dax_iomap_fault() with a new VM_FAULT_RO flag. Filesystem fault
handler is then responsible for calling fdatasync(2) and updating page
tables to map pfns read-write. dax_iomap_fault() also takes care of
updating vmf->orig_pte to match the PTE that was inserted so that we can
safely recheck that PTE did not change while write-enabling it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c              | 42 +++++++++++++++++++++++++++++++++++-------
 include/linux/iomap.h |  2 ++
 include/linux/mm.h    |  2 ++
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9658975b926a..8a6cf158c691 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -829,7 +829,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 }
 
 static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-			      loff_t pos, void *entry)
+			      loff_t pos, void *entry, bool force_ro)
 {
 	const sector_t sector = dax_iomap_sector(iomap, pos);
 	struct vm_area_struct *vma = vmf->vma;
@@ -858,7 +858,7 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		return PTR_ERR(ret);
 
 	trace_dax_insert_mapping(mapping->host, vmf, ret);
-	if (vmf->flags & FAULT_FLAG_WRITE)
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !force_ro)
 		rc = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
 	else
 		rc = vm_insert_mixed(vma, vaddr, pfn);
@@ -870,6 +870,14 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	vmf_ret = dax_fault_return(rc);
 	if (iomap->flags & IOMAP_F_NEW)
 		vmf_ret |= VM_FAULT_MAJOR;
+	if (!rc && (vmf->flags & FAULT_FLAG_WRITE) && force_ro) {
+		vmf_ret |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vm_insert_mixed_mkwrite() when changing PTE to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vma->vm_page_prot);
+	}
 	return vmf_ret;
 }
 
@@ -1092,6 +1100,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 	int error;
 	int vmf_ret = 0;
 	void *entry;
+	bool force_ro;
 
 	trace_dax_pte_fault(inode, vmf, vmf_ret);
 	/*
@@ -1167,13 +1176,15 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 		goto finish_iomap;
 	}
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		}
-		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry);
+		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry, force_ro);
 		goto finish_iomap;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
@@ -1219,7 +1230,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 #define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 
 static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-		loff_t pos, void *entry)
+		loff_t pos, void *entry, bool force_ro)
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1232,6 +1243,7 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	pgoff_t pgoff;
 	pfn_t pfn;
 	int id;
+	int result;
 
 	if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
 		goto fallback;
@@ -1256,8 +1268,19 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		goto fallback;
 
 	trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
-	return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
-			pfn, vmf->flags & FAULT_FLAG_WRITE);
+	result = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+			pfn, (vmf->flags & FAULT_FLAG_WRITE) && !force_ro);
+	/* Did we insert RO PMD despite the fault being a write one? */
+	if (!(result & VM_FAULT_ERROR) && (vmf->flags & FAULT_FLAG_WRITE) &&
+	    force_ro) {
+		result |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vmf_insert_pfn_pmd() when changing PMD to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vmf->vma->vm_page_prot);
+	}
+	return result;
 
 unlock_fallback:
 	dax_read_unlock(id);
@@ -1320,6 +1343,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	void *entry;
 	loff_t pos;
 	int error;
+	bool force_ro;
 
 	/*
 	 * Check whether offset isn't beyond end of file now. Caller is
@@ -1385,9 +1409,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
+
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
+		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry,
+						force_ro);
 		break;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f64dc6ce5161..957463602f6e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,8 @@ struct vm_fault;
  * Flags for all iomap mappings:
  */
 #define IOMAP_F_NEW	0x01	/* blocks have been newly allocated */
+#define IOMAP_F_NEEDDSYNC	0x02	/* inode needs fdatasync for storage to
+					 * become persistent */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa036093e76c..5085647d9f2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1142,6 +1142,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
+#define VM_FAULT_RO	0x2000		/* Write fault was handled just by
+					 * inserting RO page table entry for DAX */
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
 			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
-- 
2.12.3

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: <linux-ext4@vger.kernel.org>,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	linux-nvdimm@lists.01.org, <linux-xfs@vger.kernel.org>,
	Christoph Hellwig <hch@infradead.org>,
	Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.cz>
Subject: [PATCH 5/7] dax, iomap: Add support for synchronous faults
Date: Thu, 27 Jul 2017 15:12:43 +0200	[thread overview]
Message-ID: <20170727131245.28279-6-jack@suse.cz> (raw)
In-Reply-To: <20170727131245.28279-1-jack@suse.cz>

Add a flag to iomap interface informing the caller that inode needs
fdstasync(2) for returned extent to become persistent and use it in DAX
fault code so that we map such extents only read only. We propagate the
information that the page table entry has been inserted write-protected
from dax_iomap_fault() with a new VM_FAULT_RO flag. Filesystem fault
handler is then responsible for calling fdatasync(2) and updating page
tables to map pfns read-write. dax_iomap_fault() also takes care of
updating vmf->orig_pte to match the PTE that was inserted so that we can
safely recheck that PTE did not change while write-enabling it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c              | 42 +++++++++++++++++++++++++++++++++++-------
 include/linux/iomap.h |  2 ++
 include/linux/mm.h    |  2 ++
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9658975b926a..8a6cf158c691 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -829,7 +829,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 }
 
 static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-			      loff_t pos, void *entry)
+			      loff_t pos, void *entry, bool force_ro)
 {
 	const sector_t sector = dax_iomap_sector(iomap, pos);
 	struct vm_area_struct *vma = vmf->vma;
@@ -858,7 +858,7 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		return PTR_ERR(ret);
 
 	trace_dax_insert_mapping(mapping->host, vmf, ret);
-	if (vmf->flags & FAULT_FLAG_WRITE)
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !force_ro)
 		rc = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
 	else
 		rc = vm_insert_mixed(vma, vaddr, pfn);
@@ -870,6 +870,14 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	vmf_ret = dax_fault_return(rc);
 	if (iomap->flags & IOMAP_F_NEW)
 		vmf_ret |= VM_FAULT_MAJOR;
+	if (!rc && (vmf->flags & FAULT_FLAG_WRITE) && force_ro) {
+		vmf_ret |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vm_insert_mixed_mkwrite() when changing PTE to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vma->vm_page_prot);
+	}
 	return vmf_ret;
 }
 
@@ -1092,6 +1100,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 	int error;
 	int vmf_ret = 0;
 	void *entry;
+	bool force_ro;
 
 	trace_dax_pte_fault(inode, vmf, vmf_ret);
 	/*
@@ -1167,13 +1176,15 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 		goto finish_iomap;
 	}
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		}
-		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry);
+		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry, force_ro);
 		goto finish_iomap;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
@@ -1219,7 +1230,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 #define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 
 static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-		loff_t pos, void *entry)
+		loff_t pos, void *entry, bool force_ro)
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1232,6 +1243,7 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	pgoff_t pgoff;
 	pfn_t pfn;
 	int id;
+	int result;
 
 	if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
 		goto fallback;
@@ -1256,8 +1268,19 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		goto fallback;
 
 	trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
-	return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
-			pfn, vmf->flags & FAULT_FLAG_WRITE);
+	result = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+			pfn, (vmf->flags & FAULT_FLAG_WRITE) && !force_ro);
+	/* Did we insert RO PMD despite the fault being a write one? */
+	if (!(result & VM_FAULT_ERROR) && (vmf->flags & FAULT_FLAG_WRITE) &&
+	    force_ro) {
+		result |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vmf_insert_pfn_pmd() when changing PMD to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vmf->vma->vm_page_prot);
+	}
+	return result;
 
 unlock_fallback:
 	dax_read_unlock(id);
@@ -1320,6 +1343,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	void *entry;
 	loff_t pos;
 	int error;
+	bool force_ro;
 
 	/*
 	 * Check whether offset isn't beyond end of file now. Caller is
@@ -1385,9 +1409,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
+
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
+		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry,
+						force_ro);
 		break;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f64dc6ce5161..957463602f6e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,8 @@ struct vm_fault;
  * Flags for all iomap mappings:
  */
 #define IOMAP_F_NEW	0x01	/* blocks have been newly allocated */
+#define IOMAP_F_NEEDDSYNC	0x02	/* inode needs fdatasync for storage to
+					 * become persistent */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa036093e76c..5085647d9f2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1142,6 +1142,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
+#define VM_FAULT_RO	0x2000		/* Write fault was handled just by
+					 * inserting RO page table entry for DAX */
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
 			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
-- 
2.12.3

WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
To: <linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Cc: Christoph Hellwig <hch-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>,
	Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>,
	linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw@public.gmane.org,
	Dave Chinner <david-FqsqvQoI3Ljby3iVrkZq2A@public.gmane.org>,
	linux-xfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Andy Lutomirski <luto-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>,
	linux-ext4-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: [PATCH 5/7] dax, iomap: Add support for synchronous faults
Date: Thu, 27 Jul 2017 15:12:43 +0200	[thread overview]
Message-ID: <20170727131245.28279-6-jack@suse.cz> (raw)
In-Reply-To: <20170727131245.28279-1-jack-AlSwsSmVLrQ@public.gmane.org>

Add a flag to iomap interface informing the caller that inode needs
fdstasync(2) for returned extent to become persistent and use it in DAX
fault code so that we map such extents only read only. We propagate the
information that the page table entry has been inserted write-protected
from dax_iomap_fault() with a new VM_FAULT_RO flag. Filesystem fault
handler is then responsible for calling fdatasync(2) and updating page
tables to map pfns read-write. dax_iomap_fault() also takes care of
updating vmf->orig_pte to match the PTE that was inserted so that we can
safely recheck that PTE did not change while write-enabling it.

Signed-off-by: Jan Kara <jack-AlSwsSmVLrQ@public.gmane.org>
---
 fs/dax.c              | 42 +++++++++++++++++++++++++++++++++++-------
 include/linux/iomap.h |  2 ++
 include/linux/mm.h    |  2 ++
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9658975b926a..8a6cf158c691 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -829,7 +829,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 }
 
 static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-			      loff_t pos, void *entry)
+			      loff_t pos, void *entry, bool force_ro)
 {
 	const sector_t sector = dax_iomap_sector(iomap, pos);
 	struct vm_area_struct *vma = vmf->vma;
@@ -858,7 +858,7 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		return PTR_ERR(ret);
 
 	trace_dax_insert_mapping(mapping->host, vmf, ret);
-	if (vmf->flags & FAULT_FLAG_WRITE)
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !force_ro)
 		rc = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
 	else
 		rc = vm_insert_mixed(vma, vaddr, pfn);
@@ -870,6 +870,14 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	vmf_ret = dax_fault_return(rc);
 	if (iomap->flags & IOMAP_F_NEW)
 		vmf_ret |= VM_FAULT_MAJOR;
+	if (!rc && (vmf->flags & FAULT_FLAG_WRITE) && force_ro) {
+		vmf_ret |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vm_insert_mixed_mkwrite() when changing PTE to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vma->vm_page_prot);
+	}
 	return vmf_ret;
 }
 
@@ -1092,6 +1100,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 	int error;
 	int vmf_ret = 0;
 	void *entry;
+	bool force_ro;
 
 	trace_dax_pte_fault(inode, vmf, vmf_ret);
 	/*
@@ -1167,13 +1176,15 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 		goto finish_iomap;
 	}
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		}
-		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry);
+		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry, force_ro);
 		goto finish_iomap;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
@@ -1219,7 +1230,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 #define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 
 static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-		loff_t pos, void *entry)
+		loff_t pos, void *entry, bool force_ro)
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1232,6 +1243,7 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	pgoff_t pgoff;
 	pfn_t pfn;
 	int id;
+	int result;
 
 	if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
 		goto fallback;
@@ -1256,8 +1268,19 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		goto fallback;
 
 	trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
-	return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
-			pfn, vmf->flags & FAULT_FLAG_WRITE);
+	result = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+			pfn, (vmf->flags & FAULT_FLAG_WRITE) && !force_ro);
+	/* Did we insert RO PMD despite the fault being a write one? */
+	if (!(result & VM_FAULT_ERROR) && (vmf->flags & FAULT_FLAG_WRITE) &&
+	    force_ro) {
+		result |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vmf_insert_pfn_pmd() when changing PMD to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vmf->vma->vm_page_prot);
+	}
+	return result;
 
 unlock_fallback:
 	dax_read_unlock(id);
@@ -1320,6 +1343,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	void *entry;
 	loff_t pos;
 	int error;
+	bool force_ro;
 
 	/*
 	 * Check whether offset isn't beyond end of file now. Caller is
@@ -1385,9 +1409,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
+
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
+		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry,
+						force_ro);
 		break;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f64dc6ce5161..957463602f6e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,8 @@ struct vm_fault;
  * Flags for all iomap mappings:
  */
 #define IOMAP_F_NEW	0x01	/* blocks have been newly allocated */
+#define IOMAP_F_NEEDDSYNC	0x02	/* inode needs fdatasync for storage to
+					 * become persistent */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa036093e76c..5085647d9f2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1142,6 +1142,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
+#define VM_FAULT_RO	0x2000		/* Write fault was handled just by
+					 * inserting RO page table entry for DAX */
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
 			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
-- 
2.12.3

WARNING: multiple messages have this Message-ID (diff)
From: Jan Kara <jack@suse.cz>
To: linux-fsdevel@vger.kernel.org
Cc: linux-ext4@vger.kernel.org,
	Ross Zwisler <ross.zwisler@linux.intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Andy Lutomirski <luto@kernel.org>,
	linux-nvdimm@lists.01.org, linux-xfs@vger.kernel.org,
	Christoph Hellwig <hch@infradead.org>,
	Dave Chinner <david@fromorbit.com>, Jan Kara <jack@suse.cz>
Subject: [PATCH 5/7] dax, iomap: Add support for synchronous faults
Date: Thu, 27 Jul 2017 15:12:43 +0200	[thread overview]
Message-ID: <20170727131245.28279-6-jack@suse.cz> (raw)
In-Reply-To: <20170727131245.28279-1-jack@suse.cz>

Add a flag to iomap interface informing the caller that inode needs
fdstasync(2) for returned extent to become persistent and use it in DAX
fault code so that we map such extents only read only. We propagate the
information that the page table entry has been inserted write-protected
from dax_iomap_fault() with a new VM_FAULT_RO flag. Filesystem fault
handler is then responsible for calling fdatasync(2) and updating page
tables to map pfns read-write. dax_iomap_fault() also takes care of
updating vmf->orig_pte to match the PTE that was inserted so that we can
safely recheck that PTE did not change while write-enabling it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/dax.c              | 42 +++++++++++++++++++++++++++++++++++-------
 include/linux/iomap.h |  2 ++
 include/linux/mm.h    |  2 ++
 3 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9658975b926a..8a6cf158c691 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -829,7 +829,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 }
 
 static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-			      loff_t pos, void *entry)
+			      loff_t pos, void *entry, bool force_ro)
 {
 	const sector_t sector = dax_iomap_sector(iomap, pos);
 	struct vm_area_struct *vma = vmf->vma;
@@ -858,7 +858,7 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		return PTR_ERR(ret);
 
 	trace_dax_insert_mapping(mapping->host, vmf, ret);
-	if (vmf->flags & FAULT_FLAG_WRITE)
+	if ((vmf->flags & FAULT_FLAG_WRITE) && !force_ro)
 		rc = vm_insert_mixed_mkwrite(vma, vaddr, pfn);
 	else
 		rc = vm_insert_mixed(vma, vaddr, pfn);
@@ -870,6 +870,14 @@ static int dax_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	vmf_ret = dax_fault_return(rc);
 	if (iomap->flags & IOMAP_F_NEW)
 		vmf_ret |= VM_FAULT_MAJOR;
+	if (!rc && (vmf->flags & FAULT_FLAG_WRITE) && force_ro) {
+		vmf_ret |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vm_insert_mixed_mkwrite() when changing PTE to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vma->vm_page_prot);
+	}
 	return vmf_ret;
 }
 
@@ -1092,6 +1100,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 	int error;
 	int vmf_ret = 0;
 	void *entry;
+	bool force_ro;
 
 	trace_dax_pte_fault(inode, vmf, vmf_ret);
 	/*
@@ -1167,13 +1176,15 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 		goto finish_iomap;
 	}
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		}
-		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry);
+		vmf_ret = dax_insert_mapping(vmf, &iomap, pos, entry, force_ro);
 		goto finish_iomap;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
@@ -1219,7 +1230,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, bool sync,
 #define PG_PMD_COLOUR	((PMD_SIZE >> PAGE_SHIFT) - 1)
 
 static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
-		loff_t pos, void *entry)
+		loff_t pos, void *entry, bool force_ro)
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1232,6 +1243,7 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 	pgoff_t pgoff;
 	pfn_t pfn;
 	int id;
+	int result;
 
 	if (bdev_dax_pgoff(bdev, sector, size, &pgoff) != 0)
 		goto fallback;
@@ -1256,8 +1268,19 @@ static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
 		goto fallback;
 
 	trace_dax_pmd_insert_mapping(inode, vmf, length, pfn, ret);
-	return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
-			pfn, vmf->flags & FAULT_FLAG_WRITE);
+	result = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+			pfn, (vmf->flags & FAULT_FLAG_WRITE) && !force_ro);
+	/* Did we insert RO PMD despite the fault being a write one? */
+	if (!(result & VM_FAULT_ERROR) && (vmf->flags & FAULT_FLAG_WRITE) &&
+	    force_ro) {
+		result |= VM_FAULT_RO;
+		/*
+		 * Hack: Store PFN here so that we can pass it to
+		 * vmf_insert_pfn_pmd() when changing PMD to RW.
+		 */
+		vmf->orig_pte = pfn_t_pte(pfn, vmf->vma->vm_page_prot);
+	}
+	return result;
 
 unlock_fallback:
 	dax_read_unlock(id);
@@ -1320,6 +1343,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	void *entry;
 	loff_t pos;
 	int error;
+	bool force_ro;
 
 	/*
 	 * Check whether offset isn't beyond end of file now. Caller is
@@ -1385,9 +1409,13 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, bool sync,
 	if (iomap.offset + iomap.length < pos + PMD_SIZE)
 		goto finish_iomap;
 
+	force_ro = (vmf->flags & FAULT_FLAG_WRITE) && sync &&
+			(iomap.flags & IOMAP_F_NEEDDSYNC);
+
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry);
+		result = dax_pmd_insert_mapping(vmf, &iomap, pos, entry,
+						force_ro);
 		break;
 	case IOMAP_UNWRITTEN:
 	case IOMAP_HOLE:
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index f64dc6ce5161..957463602f6e 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -22,6 +22,8 @@ struct vm_fault;
  * Flags for all iomap mappings:
  */
 #define IOMAP_F_NEW	0x01	/* blocks have been newly allocated */
+#define IOMAP_F_NEEDDSYNC	0x02	/* inode needs fdatasync for storage to
+					 * become persistent */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa036093e76c..5085647d9f2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1142,6 +1142,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
 #define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
 #define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
 #define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
+#define VM_FAULT_RO	0x2000		/* Write fault was handled just by
+					 * inserting RO page table entry for DAX */
 
 #define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
 			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
-- 
2.12.3


  parent reply	other threads:[~2017-07-27 13:10 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-27 13:12 [RFC PATCH 0/7] dax, ext4: Synchronous page faults Jan Kara
2017-07-27 13:12 ` Jan Kara
2017-07-27 13:12 ` Jan Kara
2017-07-27 13:12 ` Jan Kara
2017-07-27 13:12 ` [PATCH 1/7] mm: Remove VM_FAULT_HWPOISON_LARGE_MASK Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 21:57   ` Ross Zwisler
2017-07-27 21:57     ` Ross Zwisler
2017-07-27 21:57     ` Ross Zwisler
2017-08-01 10:52   ` Christoph Hellwig
2017-08-01 10:52     ` Christoph Hellwig
2017-08-01 10:52     ` Christoph Hellwig
2017-07-27 13:12 ` [PATCH 2/7] dax: Add sync argument to dax_iomap_fault() Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:06   ` Ross Zwisler
2017-07-27 22:06     ` Ross Zwisler
2017-07-27 22:06     ` Ross Zwisler
2017-07-28  9:40     ` Jan Kara
2017-07-28  9:40       ` Jan Kara
2017-07-27 13:12 ` [PATCH 3/7] dax: Simplify arguments of dax_insert_mapping() Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:09   ` Ross Zwisler
2017-07-27 22:09     ` Ross Zwisler
2017-07-27 22:09     ` Ross Zwisler
2017-08-01 10:54   ` Christoph Hellwig
2017-08-01 10:54     ` Christoph Hellwig
2017-08-01 10:54     ` Christoph Hellwig
2017-07-27 13:12 ` [PATCH 4/7] dax: Make dax_insert_mapping() return VM_FAULT_ state Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:22   ` Ross Zwisler
2017-07-27 22:22     ` Ross Zwisler
2017-07-28  9:43     ` Jan Kara
2017-07-28  9:43       ` Jan Kara
2017-07-27 13:12 ` Jan Kara [this message]
2017-07-27 13:12   ` [PATCH 5/7] dax, iomap: Add support for synchronous faults Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:42   ` Ross Zwisler
2017-07-27 22:42     ` Ross Zwisler
2017-08-01 10:56     ` Christoph Hellwig
2017-08-01 10:56       ` Christoph Hellwig
2017-08-01 10:56       ` Christoph Hellwig
2017-07-27 13:12 ` [PATCH 6/7] dax: Implement dax_pfn_mkwrite() Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:53   ` Ross Zwisler
2017-07-27 22:53     ` Ross Zwisler
2017-07-27 22:53     ` Ross Zwisler
2017-07-27 23:04     ` Ross Zwisler
2017-07-27 23:04       ` Ross Zwisler
2017-07-28 10:37     ` Jan Kara
2017-07-28 10:37       ` Jan Kara
2017-07-28 10:37       ` Jan Kara
2017-07-27 13:12 ` [PATCH 7/7] ext4: Support for synchronous DAX faults Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 13:12   ` Jan Kara
2017-07-27 22:57   ` Ross Zwisler
2017-07-27 22:57     ` Ross Zwisler
2017-07-27 14:09 ` [RFC PATCH 0/7] dax, ext4: Synchronous page faults Jeff Moyer
2017-07-27 14:09   ` Jeff Moyer
2017-07-27 14:09   ` Jeff Moyer
2017-07-27 21:57   ` Ross Zwisler
2017-07-27 21:57     ` Ross Zwisler
2017-07-28  2:05     ` Andy Lutomirski
2017-07-28  2:05       ` Andy Lutomirski
2017-07-28  9:38       ` Jan Kara
2017-07-28  9:38         ` Jan Kara
2017-07-28  9:38         ` Jan Kara
2017-08-01 11:02         ` Christoph Hellwig
2017-08-01 11:02           ` Christoph Hellwig
2017-08-01 11:26           ` Jan Kara
2017-08-01 11:26             ` Jan Kara
2017-08-01 11:26             ` Jan Kara
2017-08-08  0:24             ` Dan Williams
2017-08-08  0:24               ` Dan Williams
2017-08-11 10:03               ` Christoph Hellwig
2017-08-11 10:03                 ` Christoph Hellwig
2017-08-11 10:03                 ` Christoph Hellwig
2017-08-13  2:44                 ` Dan Williams
2017-08-13  2:44                   ` Dan Williams
2017-08-13  2:44                   ` Dan Williams
2017-08-13  9:25                   ` Christoph Hellwig
2017-08-13  9:25                     ` Christoph Hellwig
2017-08-13 17:08                     ` Dan Williams
2017-08-13 17:08                       ` Dan Williams
2017-08-14  8:30                     ` Jan Kara
2017-08-14  8:30                       ` Jan Kara
2017-08-14 14:04                     ` Boaz Harrosh
2017-08-14 14:04                       ` Boaz Harrosh
2017-08-14 16:03                       ` Dan Williams
2017-08-14 16:03                         ` Dan Williams
2017-08-15  9:06                         ` Boaz Harrosh
2017-08-15  9:06                           ` Boaz Harrosh
2017-08-15  9:44                           ` Boaz Harrosh
2017-08-15  9:44                             ` Boaz Harrosh
2017-08-21 19:57                         ` Ross Zwisler
2017-08-21 19:57                           ` Ross Zwisler
2017-08-21 19:57                           ` Ross Zwisler
2017-08-17 16:08                       ` Jan Kara
2017-08-17 16:08                         ` Jan Kara
2017-08-01 10:52 ` Christoph Hellwig
2017-08-01 10:52   ` Christoph Hellwig
2017-08-01 10:52   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170727131245.28279-6-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=david@fromorbit.com \
    --cc=hch@infradead.org \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=luto@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.