All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page
@ 2010-12-09  9:30 Zhong, Xin
  2011-01-27 13:09 ` Johannes Hirte
  0 siblings, 1 reply; 68+ messages in thread
From: Zhong, Xin @ 2010-12-09  9:30 UTC (permalink / raw)
  To: linux-btrfs; +Cc: xin.zhong

This problem is found in meego testing:
http://bugs.meego.com/show_bug.cgi?id=6672
A file in btrfs is mmaped and the mmaped buffer is passed to pwrite to write to the same page
of the same file. In btrfs_file_aio_write(), the pages is locked by prepare_pages(). So when
btrfs_copy_from_user() is called, page fault happens and the same page needs to be locked again
in filemap_fault(). The fix is to move iov_iter_fault_in_readable() before prepage_pages() to make page
fault happen before pages are locked. And also disable page fault in critical region in
btrfs_copy_from_user().

Reviewed-by: Yan, Zheng<zheng.z.yan@intel.com>
Signed-off-by: Zhong, Xin <xin.zhong@intel.com>
---
 fs/btrfs/file.c |   92 ++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 60 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1faded..66836d8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 					 struct page **prepared_pages,
 					 struct iov_iter *i)
 {
-	size_t copied;
+	size_t copied = 0;
 	int pg = 0;
 	int offset = pos & (PAGE_CACHE_SIZE - 1);
+	int total_copied = 0;
 
 	while (write_bytes > 0) {
 		size_t count = min_t(size_t,
 				     PAGE_CACHE_SIZE - offset, write_bytes);
 		struct page *page = prepared_pages[pg];
-again:
-		if (unlikely(iov_iter_fault_in_readable(i, count)))
-			return -EFAULT;
-
-		/* Copy data from userspace to the current page */
-		copied = iov_iter_copy_from_user(page, i, offset, count);
+		/*
+		 * Copy data from userspace to the current page
+		 *
+		 * Disable pagefault to avoid recursive lock since
+		 * the pages are already locked
+		 */
+		pagefault_disable();
+		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+		pagefault_enable();
 
 		/* Flush processor's dcache for this page */
 		flush_dcache_page(page);
 		iov_iter_advance(i, copied);
 		write_bytes -= copied;
+		total_copied += copied;
 
+		/* Return to btrfs_file_aio_write to fault page */
 		if (unlikely(copied == 0)) {
-			count = min_t(size_t, PAGE_CACHE_SIZE - offset,
-				      iov_iter_single_seg_count(i));
-			goto again;
+			break;
 		}
 
 		if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
 			offset = 0;
 		}
 	}
-	return 0;
+	return total_copied;
 }
 
 /*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	unsigned long last_index;
 	int will_write;
 	int buffered = 0;
+	int copied = 0;
+	int dirty_pages = 0;
 
 	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
 		      (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(struct page *) * nrptrs);
 
-		ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+		/*
+		 * Fault pages before locking them in prepare_pages
+		 * to avoid recursive lock
+		 */
+		if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		ret = btrfs_delalloc_reserve_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 		if (ret)
 			goto out;
 
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 				    pos, first_index, last_index,
 				    write_bytes);
 		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
+			btrfs_delalloc_release_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 			goto out;
 		}
 
-		ret = btrfs_copy_from_user(pos, num_pages,
+		copied = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, &i);
-		if (ret == 0) {
+		dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT;
+
+		if (num_pages > dirty_pages) {
+			if (copied > 0)
+				atomic_inc(
+					&BTRFS_I(inode)->outstanding_extents);
+			btrfs_delalloc_release_space(inode,
+					(num_pages - dirty_pages) <<
+					PAGE_CACHE_SHIFT);
+		}
+
+		if (copied > 0) {
 			dirty_and_release_pages(NULL, root, file, pages,
-						num_pages, pos, write_bytes);
+						dirty_pages, pos, copied);
 		}
 
 		btrfs_drop_pages(pages, num_pages);
-		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
-			goto out;
-		}
 
-		if (will_write) {
-			filemap_fdatawrite_range(inode->i_mapping, pos,
-						 pos + write_bytes - 1);
-		} else {
-			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-							   num_pages);
-			if (num_pages <
-			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-				btrfs_btree_balance_dirty(root, 1);
-			btrfs_throttle(root);
+		if (copied > 0) {
+			if (will_write) {
+				filemap_fdatawrite_range(inode->i_mapping, pos,
+							 pos + copied - 1);
+			} else {
+				balance_dirty_pages_ratelimited_nr(
+							inode->i_mapping,
+							dirty_pages);
+				if (dirty_pages <
+				(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+					btrfs_btree_balance_dirty(root, 1);
+				btrfs_throttle(root);
+			}
 		}
 
-		pos += write_bytes;
-		num_written += write_bytes;
+		pos += copied;
+		num_written += copied;
 
 		cond_resched();
 	}
-- 
1.6.2.2


^ permalink raw reply related	[flat|nested] 68+ messages in thread
* Re: [PATCH] btrfs file write debugging patch
@ 2011-03-01 16:36 Xin Zhong
  2011-03-01 21:09 ` Mitch Harder
  0 siblings, 1 reply; 68+ messages in thread
From: Xin Zhong @ 2011-03-01 16:36 UTC (permalink / raw)
  To: linux-btrfs


Hi, Mitch
I think you can config ftrace to just trace function calls of btrfs.ko which will save a lot of trace buffer space. See below command:
#echo ':mod:btrfs' > /sys/kernel/debug/tracing/set_ftrace_filterAnd please send out the full ftrace log again.

Another helpful information might be the strace log of the wmldbcreate process. It will show us the io pattern of this command.
Thanks a lot for your help!
  		 	   		  

^ permalink raw reply	[flat|nested] 68+ messages in thread

end of thread, other threads:[~2011-03-08  2:51 UTC | newest]

Thread overview: 68+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-12-09  9:30 [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Zhong, Xin
2011-01-27 13:09 ` Johannes Hirte
2011-01-27 22:12   ` Maria Wikström
2011-01-28  1:26     ` Zhong, Xin
2011-01-28  2:54       ` Johannes Hirte
2011-01-28  3:53         ` Zhong, Xin
2011-02-01 23:34           ` Johannes Hirte
2011-02-11  4:39             ` Zhong, Xin
2011-02-18 11:31               ` Maria Wikström
2011-02-21  1:51                 ` Zhong, Xin
2011-02-24 14:51                   ` Maria Wikström
2011-02-24 15:55                     ` Mitch Harder
2011-02-24 16:00                       ` Chris Mason
2011-02-24 16:03                         ` Mitch Harder
2011-02-24 16:19                           ` Chris Mason
2011-02-24 16:32                             ` Mitch Harder
     [not found]                               ` <AANLkTinvyb-bTVVignd1KGojvh-QrYCFmCnwYKBsYC_2@mail.gmail.com>
2011-02-25 17:11                                 ` Mitch Harder
2011-02-25 18:43                                   ` Mitch Harder
2011-02-25 19:19                                     ` Chris Mason
2011-02-28  1:46                                     ` [PATCH] btrfs file write debugging patch Chris Mason
2011-02-28  8:56                                       ` Zhong, Xin
2011-02-28 14:02                                         ` Chris Mason
2011-02-28 10:13                                       ` Johannes Hirte
2011-02-28 14:00                                         ` Chris Mason
2011-02-28 16:10                                         ` Josef Bacik
2011-02-28 16:45                                           ` Maria Wikström
2011-02-28 17:47                                             ` Mitch Harder
2011-02-28 20:20                                               ` Mitch Harder
2011-03-01  5:09                                                 ` Mitch Harder
2011-03-01 10:14                                                 ` Zhong, Xin
2011-03-01 11:56                                                   ` Zhong, Xin
2011-03-01 14:54                                                     ` Mitch Harder
2011-03-01 14:51                                                   ` Mitch Harder
2011-03-01 21:56                                                 ` Piotr Szymaniak
2011-02-24 23:35                   ` [PATCH v2]Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Piotr Szymaniak
2011-02-22 22:27               ` Johannes Hirte
2011-02-23  7:27                 ` Zhong, Xin
2011-02-23 21:56                   ` Chris Mason
2011-02-23 23:02                     ` Johannes Hirte
2011-02-24 15:23                       ` Chris Mason
2011-01-28 16:47         ` Maria Wikström
2011-01-28 18:27           ` Rui Miguel Silva
2011-01-29 15:38             ` Maria Wikström
2011-03-01 16:36 [PATCH] btrfs file write debugging patch Xin Zhong
2011-03-01 21:09 ` Mitch Harder
2011-03-02 10:58   ` Zhong, Xin
2011-03-02 14:00     ` Xin Zhong
2011-03-04  1:51     ` Chris Mason
2011-03-04  2:32       ` Josef Bacik
2011-03-04  2:42         ` Zhong, Xin
2011-03-04  2:41           ` Josef Bacik
2011-03-04  8:41             ` Zhong, Xin
2011-03-05 16:56             ` Mitch Harder
2011-03-05 17:28               ` Xin Zhong
2011-03-04 12:19       ` Chris Mason
2011-03-04 14:25         ` Xin Zhong
2011-03-04 15:33           ` Mitch Harder
2011-03-04 17:21             ` Mitch Harder
2011-03-05  1:00               ` Xin Zhong
2011-03-05 13:14                 ` Mitch Harder
2011-03-05 16:50                   ` Mitch Harder
2011-03-06 18:00                     ` Chris Mason
2011-03-07  0:58                       ` Chris Mason
2011-03-07  6:07                         ` Mitch Harder
2011-03-07  6:37                           ` Zhong, Xin
2011-03-07 19:56                           ` Maria Wikström
2011-03-07 22:12                             ` Johannes Hirte
2011-03-08  2:51                               ` Zhong, Xin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.