All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: Matthew Wilcox <willy@linux.intel.com>
Subject: [PATCH 05/11] ext4: Start transaction before calling into DAX
Date: Tue,  4 Aug 2015 15:57:59 -0400	[thread overview]
Message-ID: <1438718285-21168-6-git-send-email-matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1438718285-21168-1-git-send-email-matthew.r.wilcox@intel.com>

From: Matthew Wilcox <willy@linux.intel.com>

Jan Kara pointed out that in the case where we are writing to a hole,
we can end up with a lock inversion between the page lock and the
journal lock.  We can avoid this by starting the transaction in ext4
before calling into DAX.  The journal lock nests inside the superblock
pagefault lock, so we have to duplicate that code from dax_fault, like
XFS does.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 fs/ext4/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d5219e4..113837e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,14 +207,63 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_fault(vma, vmf, ext4_get_block_dax, ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+						EXT4_DATA_TRANS_BLOCKS(sb));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_fault(vma, vmf, ext4_get_block_dax,
+						ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 						pmd_t *pmd, unsigned int flags)
 {
-	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_dax,
-				ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	bool write = flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+				ext4_chunk_trans_blocks(inode,
+							PMD_SIZE / PAGE_SIZE));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_pmd_fault(vma, addr, pmd, flags,
+				ext4_get_block_dax, ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-- 
2.1.4


WARNING: multiple messages have this Message-ID (diff)
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org
Cc: Matthew Wilcox <willy@linux.intel.com>
Subject: [PATCH 05/11] ext4: Start transaction before calling into DAX
Date: Tue,  4 Aug 2015 15:57:59 -0400	[thread overview]
Message-ID: <1438718285-21168-6-git-send-email-matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1438718285-21168-1-git-send-email-matthew.r.wilcox@intel.com>

From: Matthew Wilcox <willy@linux.intel.com>

Jan Kara pointed out that in the case where we are writing to a hole,
we can end up with a lock inversion between the page lock and the
journal lock.  We can avoid this by starting the transaction in ext4
before calling into DAX.  The journal lock nests inside the superblock
pagefault lock, so we have to duplicate that code from dax_fault, like
XFS does.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
---
 fs/ext4/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d5219e4..113837e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,14 +207,63 @@ static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
 
 static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_fault(vma, vmf, ext4_get_block_dax, ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+						EXT4_DATA_TRANS_BLOCKS(sb));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_fault(vma, vmf, ext4_get_block_dax,
+						ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 						pmd_t *pmd, unsigned int flags)
 {
-	return dax_pmd_fault(vma, addr, pmd, flags, ext4_get_block_dax,
-				ext4_end_io_unwritten);
+	int result;
+	handle_t *handle = NULL;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	bool write = flags & FAULT_FLAG_WRITE;
+
+	if (write) {
+		sb_start_pagefault(sb);
+		file_update_time(vma->vm_file);
+		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
+				ext4_chunk_trans_blocks(inode,
+							PMD_SIZE / PAGE_SIZE));
+	}
+
+	if (IS_ERR(handle))
+		result = VM_FAULT_SIGBUS;
+	else
+		result = __dax_pmd_fault(vma, addr, pmd, flags,
+				ext4_get_block_dax, ext4_end_io_unwritten);
+
+	if (write) {
+		if (!IS_ERR(handle))
+			ext4_journal_stop(handle);
+		sb_end_pagefault(sb);
+	}
+
+	return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-08-04 20:00 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-04 19:57 [PATCH 00/11] DAX fixes for 4.3 Matthew Wilcox
2015-08-04 19:57 ` Matthew Wilcox
2015-08-04 19:57 ` [PATCH 01/11] ext4: Use ext4_get_block_write() for DAX Matthew Wilcox
2015-08-04 19:57   ` Matthew Wilcox
2015-08-04 19:57 ` [PATCH 02/11] thp: Change insert_pfn's return type to void Matthew Wilcox
2015-08-04 19:57   ` Matthew Wilcox
2015-08-04 19:57 ` [PATCH 03/11] dax: Improve comment about truncate race Matthew Wilcox
2015-08-04 19:57   ` Matthew Wilcox
2015-08-04 19:57 ` [PATCH 04/11] ext4: Add ext4_get_block_dax() Matthew Wilcox
2015-08-04 19:57   ` Matthew Wilcox
2015-08-05  2:03   ` Dave Chinner
2015-08-05  2:03     ` Dave Chinner
2015-08-05 15:19     ` Matthew Wilcox
2015-08-05 15:19       ` Matthew Wilcox
2015-08-04 19:57 ` Matthew Wilcox [this message]
2015-08-04 19:57   ` [PATCH 05/11] ext4: Start transaction before calling into DAX Matthew Wilcox
2015-08-04 19:58 ` [PATCH 06/11] dax: Fix race between simultaneous faults Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox
2015-08-05 11:43   ` Kirill A. Shutemov
2015-08-05 11:43     ` Kirill A. Shutemov
2015-08-04 19:58 ` [PATCH 07/11] thp: Decrement refcount on huge zero page if it is split Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox
2015-08-04 19:58 ` [PATCH 08/11] thp: Fix zap_huge_pmd() for DAX Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox
2015-08-04 19:58 ` [PATCH 09/11] dax: Don't use set_huge_zero_page() Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox
2015-08-04 19:58 ` [PATCH 10/11] dax: Ensure that zero pages are removed from other processes Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox
2015-08-04 19:58 ` [PATCH 11/11] dax: Use linear_page_index() Matthew Wilcox
2015-08-04 19:58   ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1438718285-21168-6-git-send-email-matthew.r.wilcox@intel.com \
    --to=matthew.r.wilcox@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=willy@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.