All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Avoid unnecessary page locks in the generic read path
@ 2016-01-25 10:03 ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

A long time ago there was an attempt to merge a patch that reduced the
cost of unlock_page by avoiding the page_waitqueue lookup if there were no
waiters. It was rejected on the grounds of complexity but it was pointed
out that the read paths call lock_page unnecessarily. This series reduces
the number of calls to lock_page when multiple processes read data in at
the same time.

 mm/filemap.c | 90 ++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 60 insertions(+), 30 deletions(-)

-- 
2.6.4

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 0/2] Avoid unnecessary page locks in the generic read path
@ 2016-01-25 10:03 ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

A long time ago there was an attempt to merge a patch that reduced the
cost of unlock_page by avoiding the page_waitqueue lookup if there were no
waiters. It was rejected on the grounds of complexity but it was pointed
out that the read paths call lock_page unnecessarily. This series reduces
the number of calls to lock_page when multiple processes read data in at
the same time.

 mm/filemap.c | 90 ++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 60 insertions(+), 30 deletions(-)

-- 
2.6.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/2] mm: filemap: Remove redundant code in do_read_cache_page
  2016-01-25 10:03 ` Mel Gorman
@ 2016-01-25 10:03   ` Mel Gorman
  -1 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

do_read_cache_page and __read_cache_page duplicates page filler code
when filling the page for the first time. This patch simply removes the
duplicate logic.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/filemap.c | 43 ++++++++++++-------------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index bc943867d68c..aa38593d0cd5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2283,7 +2283,7 @@ static struct page *wait_on_page_read(struct page *page)
 	return page;
 }
 
-static struct page *__read_cache_page(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *, struct page *),
 				void *data,
@@ -2305,31 +2305,19 @@ static struct page *__read_cache_page(struct address_space *mapping,
 			/* Presumably ENOMEM for radix tree node */
 			return ERR_PTR(err);
 		}
+
+filler:
 		err = filler(data, page);
 		if (err < 0) {
 			page_cache_release(page);
-			page = ERR_PTR(err);
-		} else {
-			page = wait_on_page_read(page);
+			return ERR_PTR(err);
 		}
-	}
-	return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
-				pgoff_t index,
-				int (*filler)(void *, struct page *),
-				void *data,
-				gfp_t gfp)
-
-{
-	struct page *page;
-	int err;
 
-retry:
-	page = __read_cache_page(mapping, index, filler, data, gfp);
-	if (IS_ERR(page))
-		return page;
+		page = wait_on_page_read(page);
+		if (IS_ERR(page))
+			return page;
+		goto out;
+	}
 	if (PageUptodate(page))
 		goto out;
 
@@ -2337,21 +2325,14 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
-		goto retry;
+		goto repeat;
 	}
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
 	}
-	err = filler(data, page);
-	if (err < 0) {
-		page_cache_release(page);
-		return ERR_PTR(err);
-	} else {
-		page = wait_on_page_read(page);
-		if (IS_ERR(page))
-			return page;
-	}
+	goto filler;
+
 out:
 	mark_page_accessed(page);
 	return page;
-- 
2.6.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 1/2] mm: filemap: Remove redundant code in do_read_cache_page
@ 2016-01-25 10:03   ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

do_read_cache_page and __read_cache_page duplicates page filler code
when filling the page for the first time. This patch simply removes the
duplicate logic.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/filemap.c | 43 ++++++++++++-------------------------------
 1 file changed, 12 insertions(+), 31 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index bc943867d68c..aa38593d0cd5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2283,7 +2283,7 @@ static struct page *wait_on_page_read(struct page *page)
 	return page;
 }
 
-static struct page *__read_cache_page(struct address_space *mapping,
+static struct page *do_read_cache_page(struct address_space *mapping,
 				pgoff_t index,
 				int (*filler)(void *, struct page *),
 				void *data,
@@ -2305,31 +2305,19 @@ static struct page *__read_cache_page(struct address_space *mapping,
 			/* Presumably ENOMEM for radix tree node */
 			return ERR_PTR(err);
 		}
+
+filler:
 		err = filler(data, page);
 		if (err < 0) {
 			page_cache_release(page);
-			page = ERR_PTR(err);
-		} else {
-			page = wait_on_page_read(page);
+			return ERR_PTR(err);
 		}
-	}
-	return page;
-}
-
-static struct page *do_read_cache_page(struct address_space *mapping,
-				pgoff_t index,
-				int (*filler)(void *, struct page *),
-				void *data,
-				gfp_t gfp)
-
-{
-	struct page *page;
-	int err;
 
-retry:
-	page = __read_cache_page(mapping, index, filler, data, gfp);
-	if (IS_ERR(page))
-		return page;
+		page = wait_on_page_read(page);
+		if (IS_ERR(page))
+			return page;
+		goto out;
+	}
 	if (PageUptodate(page))
 		goto out;
 
@@ -2337,21 +2325,14 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
-		goto retry;
+		goto repeat;
 	}
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
 	}
-	err = filler(data, page);
-	if (err < 0) {
-		page_cache_release(page);
-		return ERR_PTR(err);
-	} else {
-		page = wait_on_page_read(page);
-		if (IS_ERR(page))
-			return page;
-	}
+	goto filler;
+
 out:
 	mark_page_accessed(page);
 	return page;
-- 
2.6.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
  2016-01-25 10:03 ` Mel Gorman
@ 2016-01-25 10:03   ` Mel Gorman
  -1 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

In the generic read paths the kernel looks up a page in the page cache
and if it's up to date, it is used. If not, the page lock is acquired to
wait for IO to complete and then check the page.  If multiple processes
are waiting on IO, they all serialise against the lock and duplicate the
checks. This is unnecessary.

The page lock in itself does not give any guarantees to the callers about
the page state as it can be immediately truncated or reclaimed after the
page is unlocked. It's sufficient to wait_on_page_locked and then continue
if the page is up to date on wakeup.

It is possible that a truncated but up-to-date page is returned but the
reference taken during read prevents it disappearing underneath the caller
and the data is still valid if PageUptodate.

The overall impact is small as even if processes serialise on the lock,
the lock section is tiny once the IO is complete. Profiles indicated that
unlock_page and friends are generally a tiny portion of a read-intensive
workload.  An artifical test was created that had instances of dd access
a cache-cold file on an ext4 filesystem and measure how long the read took.

paralleldd
                                    4.4.0                 4.4.0
                                  vanilla             avoidlock
Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)

The impact is small but intuitively, it makes sense to avoid unnecessary
calls to lock_page.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/mm/filemap.c b/mm/filemap.c
index aa38593d0cd5..235ee2b0b5da 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
+			/*
+			 * See comment in do_read_cache_page on why
+			 * wait_on_page_locked is used to avoid unnecessarily
+			 * serialisations and why it's safe.
+			 */
+			wait_on_page_locked(page);
+			if (PageUptodate(page))
+				goto page_ok;
+
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
@@ -2321,12 +2330,52 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (PageUptodate(page))
 		goto out;
 
+	/*
+	 * Page is not up to date and may be locked due one of the following
+	 * case a: Page is being filled and the page lock is held
+	 * case b: Read/write error clearing the page uptodate status
+	 * case c: Truncation in progress (page locked)
+	 * case d: Reclaim in progress
+	 *
+	 * Case a, the page will be up to date when the page is unlocked.
+	 *    There is no need to serialise on the page lock here as the page
+	 *    is pinned so the lock gives no additional protection. Even if the
+	 *    the page is truncated, the data is still valid if PageUptodate as
+	 *    it's a race vs truncate race.
+	 * Case b, the page will not be up to date
+	 * Case c, the page may be truncated but in itself, the data may still
+	 *    be valid after IO completes as it's a read vs truncate race. The
+	 *    operation must restart if the page is not uptodate on unlock but
+	 *    otherwise serialising on page lock to stabilise the mapping gives
+	 *    no additional guarantees to the caller as the page lock is
+	 *    released before return.
+	 * Case d, similar to truncation. If reclaim holds the page lock, it
+	 *    will be a race with remove_mapping that determines if the mapping
+	 *    is valid on unlock but otherwise the data is valid and there is
+	 *    no need to serialise with page lock.
+	 *
+	 * As the page lock gives no additional guarantee, we optimistically
+	 * wait on the page to be unlocked and check if it's up to date and
+	 * use the page if it is. Otherwise, the page lock is required to
+	 * distinguish between the different cases. The motivation is that we
+	 * avoid spurious serialisations and wakeups when multiple processes
+	 * wait on the same page for IO to complete.
+	 */
+	wait_on_page_locked(page);
+	if (PageUptodate(page))
+		goto out;
+
+	/* Distinguish between all the cases under the safety of the lock */
 	lock_page(page);
+
+	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
 		goto repeat;
 	}
+
+	/* Someone else locked and filled the page in a very small window */
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
-- 
2.6.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
@ 2016-01-25 10:03   ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 10:03 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

In the generic read paths the kernel looks up a page in the page cache
and if it's up to date, it is used. If not, the page lock is acquired to
wait for IO to complete and then check the page.  If multiple processes
are waiting on IO, they all serialise against the lock and duplicate the
checks. This is unnecessary.

The page lock in itself does not give any guarantees to the callers about
the page state as it can be immediately truncated or reclaimed after the
page is unlocked. It's sufficient to wait_on_page_locked and then continue
if the page is up to date on wakeup.

It is possible that a truncated but up-to-date page is returned but the
reference taken during read prevents it disappearing underneath the caller
and the data is still valid if PageUptodate.

The overall impact is small as even if processes serialise on the lock,
the lock section is tiny once the IO is complete. Profiles indicated that
unlock_page and friends are generally a tiny portion of a read-intensive
workload.  An artifical test was created that had instances of dd access
a cache-cold file on an ext4 filesystem and measure how long the read took.

paralleldd
                                    4.4.0                 4.4.0
                                  vanilla             avoidlock
Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)

The impact is small but intuitively, it makes sense to avoid unnecessary
calls to lock_page.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
 mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/mm/filemap.c b/mm/filemap.c
index aa38593d0cd5..235ee2b0b5da 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
+			/*
+			 * See comment in do_read_cache_page on why
+			 * wait_on_page_locked is used to avoid unnecessarily
+			 * serialisations and why it's safe.
+			 */
+			wait_on_page_locked(page);
+			if (PageUptodate(page))
+				goto page_ok;
+
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
@@ -2321,12 +2330,52 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (PageUptodate(page))
 		goto out;
 
+	/*
+	 * Page is not up to date and may be locked due one of the following
+	 * case a: Page is being filled and the page lock is held
+	 * case b: Read/write error clearing the page uptodate status
+	 * case c: Truncation in progress (page locked)
+	 * case d: Reclaim in progress
+	 *
+	 * Case a, the page will be up to date when the page is unlocked.
+	 *    There is no need to serialise on the page lock here as the page
+	 *    is pinned so the lock gives no additional protection. Even if the
+	 *    the page is truncated, the data is still valid if PageUptodate as
+	 *    it's a race vs truncate race.
+	 * Case b, the page will not be up to date
+	 * Case c, the page may be truncated but in itself, the data may still
+	 *    be valid after IO completes as it's a read vs truncate race. The
+	 *    operation must restart if the page is not uptodate on unlock but
+	 *    otherwise serialising on page lock to stabilise the mapping gives
+	 *    no additional guarantees to the caller as the page lock is
+	 *    released before return.
+	 * Case d, similar to truncation. If reclaim holds the page lock, it
+	 *    will be a race with remove_mapping that determines if the mapping
+	 *    is valid on unlock but otherwise the data is valid and there is
+	 *    no need to serialise with page lock.
+	 *
+	 * As the page lock gives no additional guarantee, we optimistically
+	 * wait on the page to be unlocked and check if it's up to date and
+	 * use the page if it is. Otherwise, the page lock is required to
+	 * distinguish between the different cases. The motivation is that we
+	 * avoid spurious serialisations and wakeups when multiple processes
+	 * wait on the same page for IO to complete.
+	 */
+	wait_on_page_locked(page);
+	if (PageUptodate(page))
+		goto out;
+
+	/* Distinguish between all the cases under the safety of the lock */
 	lock_page(page);
+
+	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
 		goto repeat;
 	}
+
+	/* Someone else locked and filled the page in a very small window */
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
-- 
2.6.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
  2016-01-25 10:03   ` Mel Gorman
@ 2016-01-25 11:35     ` Jan Kara
  -1 siblings, 0 replies; 14+ messages in thread
From: Jan Kara @ 2016-01-25 11:35 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrew Morton, Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML

On Mon 25-01-16 10:03:24, Mel Gorman wrote:
> In the generic read paths the kernel looks up a page in the page cache
> and if it's up to date, it is used. If not, the page lock is acquired to
> wait for IO to complete and then check the page.  If multiple processes
> are waiting on IO, they all serialise against the lock and duplicate the
> checks. This is unnecessary.
> 
> The page lock in itself does not give any guarantees to the callers about
> the page state as it can be immediately truncated or reclaimed after the
> page is unlocked. It's sufficient to wait_on_page_locked and then continue
> if the page is up to date on wakeup.
> 
> It is possible that a truncated but up-to-date page is returned but the
> reference taken during read prevents it disappearing underneath the caller
> and the data is still valid if PageUptodate.
> 
> The overall impact is small as even if processes serialise on the lock,
> the lock section is tiny once the IO is complete. Profiles indicated that
> unlock_page and friends are generally a tiny portion of a read-intensive
> workload.  An artifical test was created that had instances of dd access
> a cache-cold file on an ext4 filesystem and measure how long the read took.
> 
> paralleldd
>                                     4.4.0                 4.4.0
>                                   vanilla             avoidlock
> Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
> Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
> Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
> Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
> Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
> Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
> Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
> Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
> Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
> Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)
> 
> The impact is small but intuitively, it makes sense to avoid unnecessary
> calls to lock_page.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>

The patch looks good. One small nit below, otherwise feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

> ---
>  mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 49 insertions(+)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index aa38593d0cd5..235ee2b0b5da 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
>  					index, last_index - index);
>  		}
>  		if (!PageUptodate(page)) {
> +			/*
> +			 * See comment in do_read_cache_page on why
> +			 * wait_on_page_locked is used to avoid unnecessarily
> +			 * serialisations and why it's safe.
> +			 */
> +			wait_on_page_locked(page);
> +			if (PageUptodate(page))
> +				goto page_ok;
> +

We want a wait_on_page_locked_killable() here to match the
lock_page_killable() later in do_generic_file_read()?

									Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
@ 2016-01-25 11:35     ` Jan Kara
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Kara @ 2016-01-25 11:35 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrew Morton, Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML

On Mon 25-01-16 10:03:24, Mel Gorman wrote:
> In the generic read paths the kernel looks up a page in the page cache
> and if it's up to date, it is used. If not, the page lock is acquired to
> wait for IO to complete and then check the page.  If multiple processes
> are waiting on IO, they all serialise against the lock and duplicate the
> checks. This is unnecessary.
> 
> The page lock in itself does not give any guarantees to the callers about
> the page state as it can be immediately truncated or reclaimed after the
> page is unlocked. It's sufficient to wait_on_page_locked and then continue
> if the page is up to date on wakeup.
> 
> It is possible that a truncated but up-to-date page is returned but the
> reference taken during read prevents it disappearing underneath the caller
> and the data is still valid if PageUptodate.
> 
> The overall impact is small as even if processes serialise on the lock,
> the lock section is tiny once the IO is complete. Profiles indicated that
> unlock_page and friends are generally a tiny portion of a read-intensive
> workload.  An artifical test was created that had instances of dd access
> a cache-cold file on an ext4 filesystem and measure how long the read took.
> 
> paralleldd
>                                     4.4.0                 4.4.0
>                                   vanilla             avoidlock
> Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
> Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
> Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
> Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
> Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
> Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
> Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
> Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
> Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
> Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)
> 
> The impact is small but intuitively, it makes sense to avoid unnecessary
> calls to lock_page.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>

The patch looks good. One small nit below, otherwise feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

> ---
>  mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 49 insertions(+)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index aa38593d0cd5..235ee2b0b5da 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
>  					index, last_index - index);
>  		}
>  		if (!PageUptodate(page)) {
> +			/*
> +			 * See comment in do_read_cache_page on why
> +			 * wait_on_page_locked is used to avoid unnecessarily
> +			 * serialisations and why it's safe.
> +			 */
> +			wait_on_page_locked(page);
> +			if (PageUptodate(page))
> +				goto page_ok;
> +

We want a wait_on_page_locked_killable() here to match the
lock_page_killable() later in do_generic_file_read()?

									Honza

-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] mm: filemap: Remove redundant code in do_read_cache_page
  2016-01-25 10:03   ` Mel Gorman
@ 2016-01-25 11:35     ` Jan Kara
  -1 siblings, 0 replies; 14+ messages in thread
From: Jan Kara @ 2016-01-25 11:35 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrew Morton, Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML

On Mon 25-01-16 10:03:23, Mel Gorman wrote:
> do_read_cache_page and __read_cache_page duplicates page filler code
> when filling the page for the first time. This patch simply removes the
> duplicate logic.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>

Looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  mm/filemap.c | 43 ++++++++++++-------------------------------
>  1 file changed, 12 insertions(+), 31 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bc943867d68c..aa38593d0cd5 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2283,7 +2283,7 @@ static struct page *wait_on_page_read(struct page *page)
>  	return page;
>  }
>  
> -static struct page *__read_cache_page(struct address_space *mapping,
> +static struct page *do_read_cache_page(struct address_space *mapping,
>  				pgoff_t index,
>  				int (*filler)(void *, struct page *),
>  				void *data,
> @@ -2305,31 +2305,19 @@ static struct page *__read_cache_page(struct address_space *mapping,
>  			/* Presumably ENOMEM for radix tree node */
>  			return ERR_PTR(err);
>  		}
> +
> +filler:
>  		err = filler(data, page);
>  		if (err < 0) {
>  			page_cache_release(page);
> -			page = ERR_PTR(err);
> -		} else {
> -			page = wait_on_page_read(page);
> +			return ERR_PTR(err);
>  		}
> -	}
> -	return page;
> -}
> -
> -static struct page *do_read_cache_page(struct address_space *mapping,
> -				pgoff_t index,
> -				int (*filler)(void *, struct page *),
> -				void *data,
> -				gfp_t gfp)
> -
> -{
> -	struct page *page;
> -	int err;
>  
> -retry:
> -	page = __read_cache_page(mapping, index, filler, data, gfp);
> -	if (IS_ERR(page))
> -		return page;
> +		page = wait_on_page_read(page);
> +		if (IS_ERR(page))
> +			return page;
> +		goto out;
> +	}
>  	if (PageUptodate(page))
>  		goto out;
>  
> @@ -2337,21 +2325,14 @@ static struct page *do_read_cache_page(struct address_space *mapping,
>  	if (!page->mapping) {
>  		unlock_page(page);
>  		page_cache_release(page);
> -		goto retry;
> +		goto repeat;
>  	}
>  	if (PageUptodate(page)) {
>  		unlock_page(page);
>  		goto out;
>  	}
> -	err = filler(data, page);
> -	if (err < 0) {
> -		page_cache_release(page);
> -		return ERR_PTR(err);
> -	} else {
> -		page = wait_on_page_read(page);
> -		if (IS_ERR(page))
> -			return page;
> -	}
> +	goto filler;
> +
>  out:
>  	mark_page_accessed(page);
>  	return page;
> -- 
> 2.6.4
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/2] mm: filemap: Remove redundant code in do_read_cache_page
@ 2016-01-25 11:35     ` Jan Kara
  0 siblings, 0 replies; 14+ messages in thread
From: Jan Kara @ 2016-01-25 11:35 UTC (permalink / raw)
  To: Mel Gorman
  Cc: Andrew Morton, Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML

On Mon 25-01-16 10:03:23, Mel Gorman wrote:
> do_read_cache_page and __read_cache_page duplicates page filler code
> when filling the page for the first time. This patch simply removes the
> duplicate logic.
> 
> Signed-off-by: Mel Gorman <mgorman@techsingularity.net>

Looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  mm/filemap.c | 43 ++++++++++++-------------------------------
>  1 file changed, 12 insertions(+), 31 deletions(-)
> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index bc943867d68c..aa38593d0cd5 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -2283,7 +2283,7 @@ static struct page *wait_on_page_read(struct page *page)
>  	return page;
>  }
>  
> -static struct page *__read_cache_page(struct address_space *mapping,
> +static struct page *do_read_cache_page(struct address_space *mapping,
>  				pgoff_t index,
>  				int (*filler)(void *, struct page *),
>  				void *data,
> @@ -2305,31 +2305,19 @@ static struct page *__read_cache_page(struct address_space *mapping,
>  			/* Presumably ENOMEM for radix tree node */
>  			return ERR_PTR(err);
>  		}
> +
> +filler:
>  		err = filler(data, page);
>  		if (err < 0) {
>  			page_cache_release(page);
> -			page = ERR_PTR(err);
> -		} else {
> -			page = wait_on_page_read(page);
> +			return ERR_PTR(err);
>  		}
> -	}
> -	return page;
> -}
> -
> -static struct page *do_read_cache_page(struct address_space *mapping,
> -				pgoff_t index,
> -				int (*filler)(void *, struct page *),
> -				void *data,
> -				gfp_t gfp)
> -
> -{
> -	struct page *page;
> -	int err;
>  
> -retry:
> -	page = __read_cache_page(mapping, index, filler, data, gfp);
> -	if (IS_ERR(page))
> -		return page;
> +		page = wait_on_page_read(page);
> +		if (IS_ERR(page))
> +			return page;
> +		goto out;
> +	}
>  	if (PageUptodate(page))
>  		goto out;
>  
> @@ -2337,21 +2325,14 @@ static struct page *do_read_cache_page(struct address_space *mapping,
>  	if (!page->mapping) {
>  		unlock_page(page);
>  		page_cache_release(page);
> -		goto retry;
> +		goto repeat;
>  	}
>  	if (PageUptodate(page)) {
>  		unlock_page(page);
>  		goto out;
>  	}
> -	err = filler(data, page);
> -	if (err < 0) {
> -		page_cache_release(page);
> -		return ERR_PTR(err);
> -	} else {
> -		page = wait_on_page_read(page);
> -		if (IS_ERR(page))
> -			return page;
> -	}
> +	goto filler;
> +
>  out:
>  	mark_page_accessed(page);
>  	return page;
> -- 
> 2.6.4
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
  2016-01-25 11:35     ` Jan Kara
@ 2016-01-25 14:05       ` Mel Gorman
  -1 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 14:05 UTC (permalink / raw)
  To: Jan Kara; +Cc: Andrew Morton, Hugh Dickins, Linux-FSDevel, Linux-MM, LKML

On Mon, Jan 25, 2016 at 12:35:13PM +0100, Jan Kara wrote:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> 

Thanks!

> > ---
> >  mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 49 insertions(+)
> > 
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index aa38593d0cd5..235ee2b0b5da 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
> >  					index, last_index - index);
> >  		}
> >  		if (!PageUptodate(page)) {
> > +			/*
> > +			 * See comment in do_read_cache_page on why
> > +			 * wait_on_page_locked is used to avoid unnecessarily
> > +			 * serialisations and why it's safe.
> > +			 */
> > +			wait_on_page_locked(page);
> > +			if (PageUptodate(page))
> > +				goto page_ok;
> > +
> 
> We want a wait_on_page_locked_killable() here to match the
> lock_page_killable() later in do_generic_file_read()?
> 

Yes, I'll fix it in v2.

-- 
Mel Gorman
SUSE Labs

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
@ 2016-01-25 14:05       ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-25 14:05 UTC (permalink / raw)
  To: Jan Kara; +Cc: Andrew Morton, Hugh Dickins, Linux-FSDevel, Linux-MM, LKML

On Mon, Jan 25, 2016 at 12:35:13PM +0100, Jan Kara wrote:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> 

Thanks!

> > ---
> >  mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 49 insertions(+)
> > 
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index aa38593d0cd5..235ee2b0b5da 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
> >  					index, last_index - index);
> >  		}
> >  		if (!PageUptodate(page)) {
> > +			/*
> > +			 * See comment in do_read_cache_page on why
> > +			 * wait_on_page_locked is used to avoid unnecessarily
> > +			 * serialisations and why it's safe.
> > +			 */
> > +			wait_on_page_locked(page);
> > +			if (PageUptodate(page))
> > +				goto page_ok;
> > +
> 
> We want a wait_on_page_locked_killable() here to match the
> lock_page_killable() later in do_generic_file_read()?
> 

Yes, I'll fix it in v2.

-- 
Mel Gorman
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
  2016-01-26 14:09 [PATCH 0/2] Avoid unnecessary page locks in the generic read path v2r1 Mel Gorman
@ 2016-01-26 14:09   ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-26 14:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

In the generic read paths the kernel looks up a page in the page cache
and if it's up to date, it is used. If not, the page lock is acquired to
wait for IO to complete and then check the page.  If multiple processes
are waiting on IO, they all serialise against the lock and duplicate the
checks. This is unnecessary.

The page lock in itself does not give any guarantees to the callers about
the page state as it can be immediately truncated or reclaimed after the
page is unlocked. It's sufficient to wait_on_page_locked and then continue
if the page is up to date on wakeup.

It is possible that a truncated but up-to-date page is returned but the
reference taken during read prevents it disappearing underneath the caller
and the data is still valid if PageUptodate.

The overall impact is small as even if processes serialise on the lock,
the lock section is tiny once the IO is complete. Profiles indicated that
unlock_page and friends are generally a tiny portion of a read-intensive
workload.  An artificial test was created that had instances of dd access
a cache-cold file on an ext4 filesystem and measure how long the read took.

paralleldd
                                    4.4.0                 4.4.0
                                  vanilla             avoidlock
Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)

The impact is small but intuitively, it makes sense to avoid unnecessary
calls to lock_page.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/mm/filemap.c b/mm/filemap.c
index aa38593d0cd5..6a3c030b0c8d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
+			/*
+			 * See comment in do_read_cache_page on why
+			 * wait_on_page_locked is used to avoid unnecessarily
+			 * serialisations and why it's safe.
+			 */
+			wait_on_page_locked_killable(page);
+			if (PageUptodate(page))
+				goto page_ok;
+
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
@@ -2321,12 +2330,52 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (PageUptodate(page))
 		goto out;
 
+	/*
+	 * Page is not up to date and may be locked due one of the following
+	 * case a: Page is being filled and the page lock is held
+	 * case b: Read/write error clearing the page uptodate status
+	 * case c: Truncation in progress (page locked)
+	 * case d: Reclaim in progress
+	 *
+	 * Case a, the page will be up to date when the page is unlocked.
+	 *    There is no need to serialise on the page lock here as the page
+	 *    is pinned so the lock gives no additional protection. Even if the
+	 *    the page is truncated, the data is still valid if PageUptodate as
+	 *    it's a race vs truncate race.
+	 * Case b, the page will not be up to date
+	 * Case c, the page may be truncated but in itself, the data may still
+	 *    be valid after IO completes as it's a read vs truncate race. The
+	 *    operation must restart if the page is not uptodate on unlock but
+	 *    otherwise serialising on page lock to stabilise the mapping gives
+	 *    no additional guarantees to the caller as the page lock is
+	 *    released before return.
+	 * Case d, similar to truncation. If reclaim holds the page lock, it
+	 *    will be a race with remove_mapping that determines if the mapping
+	 *    is valid on unlock but otherwise the data is valid and there is
+	 *    no need to serialise with page lock.
+	 *
+	 * As the page lock gives no additional guarantee, we optimistically
+	 * wait on the page to be unlocked and check if it's up to date and
+	 * use the page if it is. Otherwise, the page lock is required to
+	 * distinguish between the different cases. The motivation is that we
+	 * avoid spurious serialisations and wakeups when multiple processes
+	 * wait on the same page for IO to complete.
+	 */
+	wait_on_page_locked(page);
+	if (PageUptodate(page))
+		goto out;
+
+	/* Distinguish between all the cases under the safety of the lock */
 	lock_page(page);
+
+	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
 		goto repeat;
 	}
+
+	/* Someone else locked and filled the page in a very small window */
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
-- 
2.6.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read
@ 2016-01-26 14:09   ` Mel Gorman
  0 siblings, 0 replies; 14+ messages in thread
From: Mel Gorman @ 2016-01-26 14:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Hugh Dickins, Jan Kara, Linux-FSDevel, Linux-MM, LKML, Mel Gorman

In the generic read paths the kernel looks up a page in the page cache
and if it's up to date, it is used. If not, the page lock is acquired to
wait for IO to complete and then check the page.  If multiple processes
are waiting on IO, they all serialise against the lock and duplicate the
checks. This is unnecessary.

The page lock in itself does not give any guarantees to the callers about
the page state as it can be immediately truncated or reclaimed after the
page is unlocked. It's sufficient to wait_on_page_locked and then continue
if the page is up to date on wakeup.

It is possible that a truncated but up-to-date page is returned but the
reference taken during read prevents it disappearing underneath the caller
and the data is still valid if PageUptodate.

The overall impact is small as even if processes serialise on the lock,
the lock section is tiny once the IO is complete. Profiles indicated that
unlock_page and friends are generally a tiny portion of a read-intensive
workload.  An artificial test was created that had instances of dd access
a cache-cold file on an ext4 filesystem and measure how long the read took.

paralleldd
                                    4.4.0                 4.4.0
                                  vanilla             avoidlock
Amean    Elapsd-1          5.28 (  0.00%)        5.15 (  2.50%)
Amean    Elapsd-4          5.29 (  0.00%)        5.17 (  2.12%)
Amean    Elapsd-7          5.28 (  0.00%)        5.18 (  1.78%)
Amean    Elapsd-12         5.20 (  0.00%)        5.33 ( -2.50%)
Amean    Elapsd-21         5.14 (  0.00%)        5.21 ( -1.41%)
Amean    Elapsd-30         5.30 (  0.00%)        5.12 (  3.38%)
Amean    Elapsd-48         5.78 (  0.00%)        5.42 (  6.21%)
Amean    Elapsd-79         6.78 (  0.00%)        6.62 (  2.46%)
Amean    Elapsd-110        9.09 (  0.00%)        8.99 (  1.15%)
Amean    Elapsd-128       10.60 (  0.00%)       10.43 (  1.66%)

The impact is small but intuitively, it makes sense to avoid unnecessary
calls to lock_page.

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 mm/filemap.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/mm/filemap.c b/mm/filemap.c
index aa38593d0cd5..6a3c030b0c8d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1649,6 +1649,15 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
 					index, last_index - index);
 		}
 		if (!PageUptodate(page)) {
+			/*
+			 * See comment in do_read_cache_page on why
+			 * wait_on_page_locked is used to avoid unnecessarily
+			 * serialisations and why it's safe.
+			 */
+			wait_on_page_locked_killable(page);
+			if (PageUptodate(page))
+				goto page_ok;
+
 			if (inode->i_blkbits == PAGE_CACHE_SHIFT ||
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
@@ -2321,12 +2330,52 @@ static struct page *do_read_cache_page(struct address_space *mapping,
 	if (PageUptodate(page))
 		goto out;
 
+	/*
+	 * Page is not up to date and may be locked due one of the following
+	 * case a: Page is being filled and the page lock is held
+	 * case b: Read/write error clearing the page uptodate status
+	 * case c: Truncation in progress (page locked)
+	 * case d: Reclaim in progress
+	 *
+	 * Case a, the page will be up to date when the page is unlocked.
+	 *    There is no need to serialise on the page lock here as the page
+	 *    is pinned so the lock gives no additional protection. Even if the
+	 *    the page is truncated, the data is still valid if PageUptodate as
+	 *    it's a race vs truncate race.
+	 * Case b, the page will not be up to date
+	 * Case c, the page may be truncated but in itself, the data may still
+	 *    be valid after IO completes as it's a read vs truncate race. The
+	 *    operation must restart if the page is not uptodate on unlock but
+	 *    otherwise serialising on page lock to stabilise the mapping gives
+	 *    no additional guarantees to the caller as the page lock is
+	 *    released before return.
+	 * Case d, similar to truncation. If reclaim holds the page lock, it
+	 *    will be a race with remove_mapping that determines if the mapping
+	 *    is valid on unlock but otherwise the data is valid and there is
+	 *    no need to serialise with page lock.
+	 *
+	 * As the page lock gives no additional guarantee, we optimistically
+	 * wait on the page to be unlocked and check if it's up to date and
+	 * use the page if it is. Otherwise, the page lock is required to
+	 * distinguish between the different cases. The motivation is that we
+	 * avoid spurious serialisations and wakeups when multiple processes
+	 * wait on the same page for IO to complete.
+	 */
+	wait_on_page_locked(page);
+	if (PageUptodate(page))
+		goto out;
+
+	/* Distinguish between all the cases under the safety of the lock */
 	lock_page(page);
+
+	/* Case c or d, restart the operation */
 	if (!page->mapping) {
 		unlock_page(page);
 		page_cache_release(page);
 		goto repeat;
 	}
+
+	/* Someone else locked and filled the page in a very small window */
 	if (PageUptodate(page)) {
 		unlock_page(page);
 		goto out;
-- 
2.6.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2016-01-26 14:09 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-01-25 10:03 [PATCH 0/2] Avoid unnecessary page locks in the generic read path Mel Gorman
2016-01-25 10:03 ` Mel Gorman
2016-01-25 10:03 ` [PATCH 1/2] mm: filemap: Remove redundant code in do_read_cache_page Mel Gorman
2016-01-25 10:03   ` Mel Gorman
2016-01-25 11:35   ` Jan Kara
2016-01-25 11:35     ` Jan Kara
2016-01-25 10:03 ` [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read Mel Gorman
2016-01-25 10:03   ` Mel Gorman
2016-01-25 11:35   ` Jan Kara
2016-01-25 11:35     ` Jan Kara
2016-01-25 14:05     ` Mel Gorman
2016-01-25 14:05       ` Mel Gorman
2016-01-26 14:09 [PATCH 0/2] Avoid unnecessary page locks in the generic read path v2r1 Mel Gorman
2016-01-26 14:09 ` [PATCH 2/2] mm: filemap: Avoid unnecessary calls to lock_page when waiting for IO to complete during a read Mel Gorman
2016-01-26 14:09   ` Mel Gorman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.