All of lore.kernel.org
 help / color / mirror / Atom feed
* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
@ 2011-01-13 11:01 Wengang Wang
  2011-01-24 23:18 ` Mark Fasheh
  0 siblings, 1 reply; 6+ messages in thread
From: Wengang Wang @ 2011-01-13 11:01 UTC (permalink / raw)
  To: ocfs2-devel

This patch fixes two problems in ocfs2_page_mkwrite path:
1) it makes ocfs2_page_mkwrite returns VM_FAULT_*
2) it fixes a bug of returning -EINVAL.
The -EINVAL is returned when the page is no longger belong to the inode mapping
(pagecache gets truncated).  Make it return VM_FAULT_NOPAGE in that case.

Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
---
 fs/ocfs2/aops.c |   47 +++++++++++++++++++++++++++++++++++++++--------
 fs/ocfs2/mmap.c |   53 ++++++++++++++++++++++++-----------------------------
 2 files changed, 63 insertions(+), 37 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d7c554..619a38f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -876,6 +876,12 @@ struct ocfs2_write_ctxt {
 	struct page			*w_target_page;
 
 	/*
+	 * w_target_locked is used for page_mkwrite path indicating no unlocking
+	 * against w_target_page in ocfs2_write_end_nolock.
+	 */
+	unsigned int			w_target_locked:1;
+
+	/*
 	 * ocfs2_write_end() uses this to know what the real range to
 	 * write in the target should be.
 	 */
@@ -908,8 +914,29 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
 
 static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
 {
+	/*
+	 * don't unlock on the target_page for page_mkwrite pach.
+	 * page_mkwrite() wants a return with the page locked.
+	 * re-acquiring after ocfs2_unlock_and_free_pages is not acceptable.
+	 * During the time when it unlocked, the page can changed to
+	 * not belong to the inode mapping any longer.
+	 *
+	 * caller must hold a ref on w_target_page.
+	 */
+	if (wc->w_target_locked) {
+		int i;
+
+		BUG_ON(!wc->w_target_page);
+		for (i = 0; i < wc->w_num_pages; i++) {
+			if (wc->w_target_page == wc->w_pages[i]) {
+				wc->w_pages[i] = NULL;
+				break;
+			}
+		}
+		mark_page_accessed(wc->w_target_page);
+		page_cache_release(wc->w_target_page);
+	}
 	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
-
 	brelse(wc->w_di_bh);
 	kfree(wc);
 }
@@ -1139,20 +1166,19 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 			 */
 			lock_page(mmap_page);
 
+			/*
+			 * pagecache gets truncated. Let VM retry it.
+			 */
 			if (mmap_page->mapping != mapping) {
+				WARN_ON(mmap_page->mapping);
 				unlock_page(mmap_page);
-				/*
-				 * Sanity check - the locking in
-				 * ocfs2_pagemkwrite() should ensure
-				 * that this code doesn't trigger.
-				 */
-				ret = -EINVAL;
-				mlog_errno(ret);
+				ret = 0;
 				goto out;
 			}
 
 			page_cache_get(mmap_page);
 			wc->w_pages[i] = mmap_page;
+			wc->w_target_locked = true;
 		} else {
 			wc->w_pages[i] = find_or_create_page(mapping, index,
 							     GFP_NOFS);
@@ -1167,6 +1193,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
 			wc->w_target_page = wc->w_pages[i];
 	}
 out:
+	if (ret)
+		wc->w_target_locked = false;
 	return ret;
 }
 
@@ -1786,6 +1814,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
 		mlog_errno(ret);
 		goto out_quota;
 	}
+	/* Did the pagecache lose the page?  Retry. */
+	if (!wc->w_target_page)
+		goto out_quota;
 
 	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
 					  len);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7e32db9..9f88443 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -62,7 +62,7 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
 static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 				struct page *page)
 {
-	int ret;
+	int ret = VM_FAULT_NOPAGE;
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
 	loff_t pos = page_offset(page);
@@ -72,32 +72,25 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 	void *fsdata;
 	loff_t size = i_size_read(inode);
 
-	/*
-	 * Another node might have truncated while we were waiting on
-	 * cluster locks.
-	 * We don't check size == 0 before the shift. This is borrowed
-	 * from do_generic_file_read.
-	 */
 	last_index = (size - 1) >> PAGE_CACHE_SHIFT;
-	if (unlikely(!size || page->index > last_index)) {
-		ret = -EINVAL;
-		goto out;
-	}
 
 	/*
-	 * The i_size check above doesn't catch the case where nodes
-	 * truncated and then re-extended the file. We'll re-check the
-	 * page mapping after taking the page lock inside of
-	 * ocfs2_write_begin_nolock().
+	 * There are cases that lead to the page no longer bebongs to the
+	 * mapping.
+	 * 1) pagecache truncates locally due to memory pressure.
+	 * 2) pagecache truncates when another is taking EX lock against 
+	 * inode lock. see ocfs2_data_convert_worker.
+	 * 
+	 * The i_size check doesn't catch the case where nodes truncated and
+	 * then re-extended the file. We'll re-check the page mapping after
+	 * taking the page lock inside of ocfs2_write_begin_nolock().
+	 *
+	 * Let VM retry with these cases.
 	 */
-	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
-		/*
-		 * the page has been umapped in ocfs2_data_downconvert_worker.
-		 * So return 0 here and let VFS retry.
-		 */
-		ret = 0;
+	if ((page->mapping != inode->i_mapping) ||
+	    (!PageUptodate(page)) ||
+	    (page_offset(page) >= size))
 		goto out;
-	}
 
 	/*
 	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
@@ -117,17 +110,21 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
 	if (ret) {
 		if (ret != -ENOSPC)
 			mlog_errno(ret);
+		if (ret == -ENOMEM)
+			ret = VM_FAULT_OOM;
+		else
+			ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 
-	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
-				     fsdata);
-	if (ret < 0) {
-		mlog_errno(ret);
+	if (!locked_page) {
+		ret = VM_FAULT_NOPAGE;
 		goto out;
 	}
+	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
+				     fsdata);
 	BUG_ON(ret != len);
-	ret = 0;
+	ret = VM_FAULT_LOCKED;
 out:
 	return ret;
 }
@@ -169,8 +166,6 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 out:
 	ocfs2_unblock_signals(&oldset);
-	if (ret)
-		ret = VM_FAULT_SIGBUS;
 	return ret;
 }
 
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
  2011-01-13 11:01 [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path Wengang Wang
@ 2011-01-24 23:18 ` Mark Fasheh
  2011-01-24 23:28   ` Joel Becker
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Fasheh @ 2011-01-24 23:18 UTC (permalink / raw)
  To: ocfs2-devel

Hi Wengang, thanks again for this patch. It looks pretty good, I had only
one comment which is below.

On Thu, Jan 13, 2011 at 07:01:36PM +0800, Wengang Wang wrote:

> @@ -1139,20 +1166,19 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
>  			 */
>  			lock_page(mmap_page);
>  
> +			/*
> +			 * pagecache gets truncated. Let VM retry it.
> +			 */
>  			if (mmap_page->mapping != mapping) {
> +				WARN_ON(mmap_page->mapping);
>  				unlock_page(mmap_page);
> -				/*
> -				 * Sanity check - the locking in
> -				 * ocfs2_pagemkwrite() should ensure
> -				 * that this code doesn't trigger.
> -				 */
> -				ret = -EINVAL;
> -				mlog_errno(ret);
> +				ret = 0;

We're returning zero here (success) when we haven't really met success. I
see that in the next hunk, you test for this particular condition by looking
at wc->w_target_page. There's no bug but this approach makes understanding
the return code of ocfs2_grab_pages_for_write() harder.

Instead, can you have ocfs2_grab_pages_for_write return a specific error
code here which then would get automatically triggered by the error handling
code in ocfs2_write_begin_nolock()?
	--Mark

>  				goto out;
>  			}
>  
>  			page_cache_get(mmap_page);
>  			wc->w_pages[i] = mmap_page;
> +			wc->w_target_locked = true;
>  		} else {
>  			wc->w_pages[i] = find_or_create_page(mapping, index,
>  							     GFP_NOFS);
> @@ -1167,6 +1193,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
>  			wc->w_target_page = wc->w_pages[i];
>  	}
>  out:
> +	if (ret)
> +		wc->w_target_locked = false;
>  	return ret;
>  }
>  
> @@ -1786,6 +1814,9 @@ int ocfs2_write_begin_nolock(struct file *filp,
>  		mlog_errno(ret);
>  		goto out_quota;
>  	}
> +	/* Did the pagecache lose the page?  Retry. */
> +	if (!wc->w_target_page)
> +		goto out_quota;
>  
>  	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
>  					  len);

--
Mark Fasheh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
  2011-01-24 23:18 ` Mark Fasheh
@ 2011-01-24 23:28   ` Joel Becker
  2011-01-24 23:54     ` Mark Fasheh
  0 siblings, 1 reply; 6+ messages in thread
From: Joel Becker @ 2011-01-24 23:28 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jan 24, 2011 at 03:18:35PM -0800, Mark Fasheh wrote:
> >  			if (mmap_page->mapping != mapping) {
> > +				WARN_ON(mmap_page->mapping);
> >  				unlock_page(mmap_page);
> > -				/*
> > -				 * Sanity check - the locking in
> > -				 * ocfs2_pagemkwrite() should ensure
> > -				 * that this code doesn't trigger.
> > -				 */
> > -				ret = -EINVAL;
> > -				mlog_errno(ret);
> > +				ret = 0;
> 
> We're returning zero here (success) when we haven't really met success. I
> see that in the next hunk, you test for this particular condition by looking
> at wc->w_target_page. There's no bug but this approach makes understanding
> the return code of ocfs2_grab_pages_for_write() harder.

	I told him to return 0.  I couldn't come up with a good signal.
We want the caller (__ocfs2_page_mkwrite()) to not error; this is a
valid case of retry.  The check for the locked_page is good for that.
	If we do come up with a logical !0 ret for
ocfs2_write_begin_nolock(), we have to make sure that the error paths do
all of their processing correctly but otherwise treat it as a good
state.  Obviously then __ocfs2_page_mkwrite() can trigger on that error
code (and BUG_ON(locked_page)).

Joel


-- 

"When choosing between two evils, I always like to try the one
 I've never tried before."
        - Mae West

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
  2011-01-24 23:28   ` Joel Becker
@ 2011-01-24 23:54     ` Mark Fasheh
  2011-01-24 23:59       ` Joel Becker
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Fasheh @ 2011-01-24 23:54 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jan 24, 2011 at 03:28:31PM -0800, Joel Becker wrote:
> On Mon, Jan 24, 2011 at 03:18:35PM -0800, Mark Fasheh wrote:
> > >  			if (mmap_page->mapping != mapping) {
> > > +				WARN_ON(mmap_page->mapping);
> > >  				unlock_page(mmap_page);
> > > -				/*
> > > -				 * Sanity check - the locking in
> > > -				 * ocfs2_pagemkwrite() should ensure
> > > -				 * that this code doesn't trigger.
> > > -				 */
> > > -				ret = -EINVAL;
> > > -				mlog_errno(ret);
> > > +				ret = 0;
> > 
> > We're returning zero here (success) when we haven't really met success. I
> > see that in the next hunk, you test for this particular condition by looking
> > at wc->w_target_page. There's no bug but this approach makes understanding
> > the return code of ocfs2_grab_pages_for_write() harder.
> 
> 	I told him to return 0.  I couldn't come up with a good signal.
> We want the caller (__ocfs2_page_mkwrite()) to not error; this is a
> valid case of retry.  The check for the locked_page is good for that.

This is the return 0 bit in ocfs2_grab_page_for_write, which is only called
from ocfs2_write_begin_nolock(). I think we're talking about two different
paths. Specifically, I don't like seeing this:

	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
					 cluster_of_pages, mmap_page);
	if (ret) {
		mlog_errno(ret);
		goto out_quota;
	}

	/* Did the pagecache lose the page?  Retry. */                                                                                                                                                                                                       
	if (!wc->w_target_page)                                                                                                                                                                                                                              
		goto out_quota;                                                                                                                                                                                                                              

And instead I'd rather see:

        ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
                                         cluster_of_pages, mmap_page);
        if (ret) {
		if (ret == -EWHATEVER) {
			/*
			 * Here is a comment explaining why we're turning
			 * this into zero for ocfs2_page_mkwrite().
			 */
			ret = 0;
		}
		goto out_quota;
	}

Which doesn't wind up changing any API between page_mkwrite and write_begin
but results in a more readable and natural calling convention internally.
	--Mark

--
Mark Fasheh

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
  2011-01-24 23:54     ` Mark Fasheh
@ 2011-01-24 23:59       ` Joel Becker
  2011-01-25  0:36         ` Mark Fasheh
  0 siblings, 1 reply; 6+ messages in thread
From: Joel Becker @ 2011-01-24 23:59 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jan 24, 2011 at 03:54:25PM -0800, Mark Fasheh wrote:
> And instead I'd rather see:
> 
>         ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
>                                          cluster_of_pages, mmap_page);
>         if (ret) {
> 		if (ret == -EWHATEVER) {

	Ok, so what is EWHATEVER?

Joel

-- 

	Pitchers and catchers report.

			http://www.jlbec.org/
			jlbec at evilplan.org

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path
  2011-01-24 23:59       ` Joel Becker
@ 2011-01-25  0:36         ` Mark Fasheh
  0 siblings, 0 replies; 6+ messages in thread
From: Mark Fasheh @ 2011-01-25  0:36 UTC (permalink / raw)
  To: ocfs2-devel

On Mon, Jan 24, 2011 at 03:59:24PM -0800, Joel Becker wrote:
> On Mon, Jan 24, 2011 at 03:54:25PM -0800, Mark Fasheh wrote:
> > And instead I'd rather see:
> > 
> >         ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
> >                                          cluster_of_pages, mmap_page);
> >         if (ret) {
> > 		if (ret == -EWHATEVER) {
> 
> 	Ok, so what is EWHATEVER?

-EINVAL? That's what we used to return from ocfs2_grab_pages_for_write().
The only other possible return value right now is ENOMEM so we could pick
anything really.

errno values aren't very descriptive, which sucks (but that's what we have
to deal with).


Then again, if we want to be super explicit we could also just do:

#define	EMAPPINGCHANGED	1
int ocfs2_grab_pages_for_write( ... )
{

}

and return our own nonzero value.
	--Mark

--
Mark Fasheh

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-01-25  0:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-13 11:01 [Ocfs2-devel] [PATCH] ocfs2: fix ocfs2_page_mkwrite path Wengang Wang
2011-01-24 23:18 ` Mark Fasheh
2011-01-24 23:28   ` Joel Becker
2011-01-24 23:54     ` Mark Fasheh
2011-01-24 23:59       ` Joel Becker
2011-01-25  0:36         ` Mark Fasheh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.