All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
@ 2014-08-08 14:35 Chris Mason
  2014-08-08 15:17 ` Chris Mason
                   ` (4 more replies)
  0 siblings, 5 replies; 19+ messages in thread
From: Chris Mason @ 2014-08-08 14:35 UTC (permalink / raw)
  To: xfs, Dave Chinner, Eric Sandeen


xfs is using truncate_pagecache_range to invalidate the page cache
during DIO reads.  This is different from the other filesystems who only
invalidate pages during DIO writes.

truncate_pagecache_range is meant to be used when we are freeing the
underlying data structs from disk, so it will zero any partial ranges
in the page.  This means a DIO read can zero out part of the page cache
page, and it is possible the page will stay in cache.

buffered reads will find an up to date page with zeros instead of the
data actually on disk.

This patch fixes things by leaving the page cache alone during DIO
reads.

We discovered this when our buffered IO program for distributing
database indexes was finding zero filled blocks.  I think writes
are broken too, but I'll leave that for a separate patch because I don't
fully understand what XFS needs to happen during a DIO write.

Test program:

/*
 * gcc -Wall -o read-race read-race.c
 * ./read-race filename
 */
#define _XOPEN_SOURCE 600
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/time.h>

#ifndef O_DIRECT
#define O_DIRECT	00040000
#endif

#define READ_SIZE 512
#define BUF_SIZE 1024 * 1024

static void usage(char *name)
{
	fprintf(stderr, "usage: %s filename\n", name);
	exit(1);
}

/* return 1 if the buffer is full of zeros */
static int all_zeros(char *buf, int sz)
{
	int i;
	for (i = 0; i < sz; i++) {
		if (*buf++)
			return 0;
	}
	return 1;
}

static void run_test(char *filename, char *buf, int direct)
{
	int fd;
	int ret;
	struct timeval start;
	struct timeval now;

	fd = open(filename, O_RDONLY | direct);
	if (fd < 0) {
		perror("open");
		exit(1);
	}

	/*
	 * seek to a 512b aligned offset in the page and then do a
	 * read.  Check the read for zeros, and if we're buffered
	 * use FADV_DONTNEED to drop the page cache.  repeat for 15 seconds
	 */
	gettimeofday(&start, NULL);
	while (1) {
		ret = lseek(fd, 5120, SEEK_SET);
		if (ret < 0) {
			perror("lseek");
			exit(1);
		}
		if (!direct) {
			ret = posix_fadvise(fd, 0, 8192, POSIX_FADV_DONTNEED);
			if (ret) {
				perror("fadvise");
				exit(1);
			}
		}
		ret = read(fd, buf, READ_SIZE);
		if (ret < READ_SIZE) {
			fprintf(stderr, "invalid read\n");
			exit(1);
		}

		if (all_zeros(buf, READ_SIZE)) {
			fprintf(stderr, "error: found zero range direct: %d\n",
				direct ? 1 : 0);
			exit(255);
		}

		gettimeofday(&now, NULL);
		if (now.tv_sec - start.tv_sec > 15)
			exit(0);
	}
}

int main(int ac, char **av)
{
	int ret;
	int fd;
	char *filename;
	char *buf;
	int pagesize = sysconf(_SC_PAGESIZE);
	pid_t buffered_pid = 0;
	pid_t direct_pid = 0;
	pid_t wait_pid;
	int status = 0;
	int test_failure = 0;

	if (ac != 2)
		usage(av[0]);
	else
		filename = av[1];

	ret = posix_memalign((void **)&buf, pagesize, BUF_SIZE);
	if (ret) {
		perror("posix_memalign");
		exit(1);
	}

	/* step one, create our test file and fill with non-zero */
	fd = open(filename, O_WRONLY | O_CREAT, 0700);
	if (fd < 0) {
		perror("open for writing");
		exit(1);
	}
	memset(buf, 1, BUF_SIZE);

	ret = write(fd, buf, BUF_SIZE);
	if (ret != BUF_SIZE) {
		fprintf(stderr, "failed to fill the test file\n");
		exit(1);
	}
	close(fd);

	/* start the buffered reader */
	buffered_pid = fork();
	if (buffered_pid < 0) {
		perror("fork");
		exit(1);
	} else if (buffered_pid == 0) {
		run_test(filename, buf, 0);
		exit(0);
	}

	/* start the direct reader */
	direct_pid = fork();
	if (direct_pid < 0) {
		perror("fork");
		goto cleanup;
	} else if (direct_pid == 0) {
		run_test(filename, buf, O_DIRECT);
		exit(0);
	}

	/* wait for buffered to finish */
	wait_pid = waitpid(buffered_pid, &status, 0);
	if (wait_pid < 0) {
		perror("waitpid buffered");
		goto cleanup;
	}
	if (WIFEXITED(status)) {
		int ret = WEXITSTATUS(status);
		printf("buffered exits with %d\n", ret);
		if (ret) {
			buffered_pid = 0;
			test_failure = ret;
			goto cleanup;
		}
	} else {
		test_failure = 1;
	}


	/* wait for direct to finish */
	wait_pid = waitpid(direct_pid, &status, 0);
	if (wait_pid < 0) {
		perror("waitpid direct");
		goto cleanup;
	}
	if (WIFEXITED(status)) {
		int ret = WEXITSTATUS(status);
		printf("direct exits with %d\n", ret);
		test_failure |= ret;
	} else {
		test_failure |= 1;
	}

	exit(test_failure);

cleanup:

	if (direct_pid > 0)
		kill(direct_pid, SIGTERM);
	if (buffered_pid > 0)
		kill(buffered_pid, SIGTERM);

	exit(test_failure);
}

Signed-off-by: Chris Mason <clm@fb.com>
cc: stable@vger.kernel.org

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f66779..8d25d98 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -295,7 +295,11 @@ xfs_file_read_iter(
 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
 				return ret;
 			}
-			truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+			/* we don't remove any pages here.  A direct read
+			 * does not invalidate any contents of the page
+			 * cache
+			 */
 		}
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
@ 2014-08-08 15:17 ` Chris Mason
  2014-08-08 16:04 ` [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes Chris Mason
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 19+ messages in thread
From: Chris Mason @ 2014-08-08 15:17 UTC (permalink / raw)
  To: xfs, Dave Chinner, Eric Sandeen

On 08/08/2014 10:35 AM, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO reads.  This is different from the other filesystems who only
> invalidate pages during DIO writes.
> 
> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges
> in the page.  This means a DIO read can zero out part of the page cache
> page, and it is possible the page will stay in cache.
> 
> buffered reads will find an up to date page with zeros instead of the
> data actually on disk.
> 
> This patch fixes things by leaving the page cache alone during DIO
> reads.
> 
> We discovered this when our buffered IO program for distributing
> database indexes was finding zero filled blocks.  I think writes
> are broken too, but I'll leave that for a separate patch because I don't
> fully understand what XFS needs to happen during a DIO write.

I stuck a cc: stable@vger.kernel.org after my sob, but then inserted a
giant test program.  Just realized the cc might get lost...sorry I
wasn't trying to sneak it in.

I've been trying to figure out why this bug doesn't show up in our 3.2
kernels but does show up now.  Today xfs does this:

     truncate_pagecache_range(VFS_I(ip), pos, -1);

But in 3.2 we did this:

     ret = -xfs_flushinval_pages(ip,
                              (iocb->ki_pos & PAGE_CACHE_MASK),
                              -1, FI_REMAPF_LOCKED);


Since we've done pos & PAGE_CACHE_MASK, the 3.2 code never sent a
partial offset.  So it never zero'd partial pages.

> 
> Signed-off-by: Chris Mason <clm@fb.com>
> cc: stable@vger.kernel.org
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..8d25d98 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -295,7 +295,11 @@ xfs_file_read_iter(
>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>  				return ret;
>  			}
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +
> +			/* we don't remove any pages here.  A direct read
> +			 * does not invalidate any contents of the page
> +			 * cache
> +			 */
>  		}
>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  	}
> 

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes
  2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
  2014-08-08 15:17 ` Chris Mason
@ 2014-08-08 16:04 ` Chris Mason
  2014-08-09  0:48   ` Dave Chinner
  2014-08-08 20:39 ` [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Brian Foster
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 19+ messages in thread
From: Chris Mason @ 2014-08-08 16:04 UTC (permalink / raw)
  To: xfs, Dave Chinner, Eric Sandeen


xfs is using truncate_pagecache_range to invalidate the page cache
during DIO writes.  The other filesystems are calling
invalidate_inode_pages2_range
 
truncate_pagecache_range is meant to be used when we are freeing the
underlying data structs from disk, so it will zero any partial ranges
in the page.  This means a DIO write can zero out part of the page cache
page, and it is possible the page will stay in cache.

This one is an RFC because it is untested and because I don't understand
how XFS is dealing with pages the truncate was unable to clear away.
I'm not able to actually trigger zeros by mixing DIO writes with
buffered reads.

Signed-off-by: Chris Mason <clm@fb.com>

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d25d98..c30c112 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -638,7 +638,10 @@ xfs_file_dio_aio_write(
 						    pos, -1);
 		if (ret)
 			goto out;
-		truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+		/* what do we do if we can't invalidate the pages? */
+		invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+					      pos >> PAGE_CACHE_SHIFT, -1);
 	}
 
 	/*

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
  2014-08-08 15:17 ` Chris Mason
  2014-08-08 16:04 ` [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes Chris Mason
@ 2014-08-08 20:39 ` Brian Foster
  2014-08-09  0:36 ` Dave Chinner
  2014-08-09 12:57 ` [PATCH v2] " Chris Mason
  4 siblings, 0 replies; 19+ messages in thread
From: Brian Foster @ 2014-08-08 20:39 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Fri, Aug 08, 2014 at 10:35:38AM -0400, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO reads.  This is different from the other filesystems who only
> invalidate pages during DIO writes.
> 
> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges
> in the page.  This means a DIO read can zero out part of the page cache
> page, and it is possible the page will stay in cache.
> 
> buffered reads will find an up to date page with zeros instead of the
> data actually on disk.
> 
> This patch fixes things by leaving the page cache alone during DIO
> reads.
> 
> We discovered this when our buffered IO program for distributing
> database indexes was finding zero filled blocks.  I think writes
> are broken too, but I'll leave that for a separate patch because I don't
> fully understand what XFS needs to happen during a DIO write.
> 
> Test program:
> 
...
> 
> Signed-off-by: Chris Mason <clm@fb.com>
> cc: stable@vger.kernel.org
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..8d25d98 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -295,7 +295,11 @@ xfs_file_read_iter(
>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>  				return ret;
>  			}
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +
> +			/* we don't remove any pages here.  A direct read
> +			 * does not invalidate any contents of the page
> +			 * cache
> +			 */
>  		}

That seems sane to me at first glance. I don't know why we would need to
completely kill the cache on a dio read. I'm not a fan of the additional
comment though. We should probably just fix up the existing comment
instead. It also seems like we might be able to kill the XFS_IOLOCK_EXCL
dance here if the truncate goes away.. Dave?

FWIW, I had to go back to the following commit to see where this
originates from:

commit 9cea236492ebabb9545564eb039aa0f477a05c96
Author: Nathan Scott <nathans@sgi.com>
Date:   Fri Mar 17 17:26:41 2006 +1100

    [XFS] Flush and invalidate dirty pages at the start of a direct read also,
    else we can hit a delalloc-extents-via-direct-io BUG.
    
    SGI-PV: 949916
    SGI-Modid: xfs-linux-melb:xfs-kern:25483a
    
    Signed-off-by: Nathan Scott <nathans@sgi.com>
    ...

That adds a VOP_FLUSHINVAL_PAGES() call that looks like it's some kind
of portability API. I would expect the flush to deal with any delalloc
conversion issues vs. the invalidation, so perhaps the invalidation part
is a historical artifact of the api. Then again, there's also a straight
'flushpages' call so perhaps there's more to it than that.

Brian

>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  	}
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
                   ` (2 preceding siblings ...)
  2014-08-08 20:39 ` [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Brian Foster
@ 2014-08-09  0:36 ` Dave Chinner
  2014-08-09  2:32   ` Chris Mason
  2014-08-09 12:57 ` [PATCH v2] " Chris Mason
  4 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-09  0:36 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Fri, Aug 08, 2014 at 10:35:38AM -0400, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO reads.  This is different from the other filesystems who only
> invalidate pages during DIO writes.

Historical oddity thanks to wrapper functions that were kept way
longer than they should have been.

> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges
> in the page.  This means a DIO read can zero out part of the page cache
> page, and it is possible the page will stay in cache.

commit fb59581 ("xfs: remove xfs_flushinval_pages").  also removed
the offset masks that seem to be the issue here. Classic case of a
regression caused by removing 10+ year old code that was not clearly
documented and didn't appear important.

The real question is why isn't fsx and other corner case data
integrity tools tripping over this?

> buffered reads will find an up to date page with zeros instead of the
> data actually on disk.
> 
> This patch fixes things by leaving the page cache alone during DIO
> reads.
> 
> We discovered this when our buffered IO program for distributing
> database indexes was finding zero filled blocks.  I think writes
> are broken too, but I'll leave that for a separate patch because I don't
> fully understand what XFS needs to happen during a DIO write.
> 
> Test program:

Encapsulate it in a generic xfstest, please, and send it to
fstests@vger.kernel.org.

...

> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..8d25d98 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -295,7 +295,11 @@ xfs_file_read_iter(
>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>  				return ret;
>  			}
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +
> +			/* we don't remove any pages here.  A direct read
> +			 * does not invalidate any contents of the page
> +			 * cache
> +			 */

I guarantee you that there are applications out there that rely on
the implicit invalidation behaviour for performance. There are also
applications out that rely on it for correctness, too, because the
OS is not the only source of data in the filesystem the OS has
mounted.

Besides, XFS's direct IO semantics are far saner, more predictable
and hence are more widely useful than the generic code. As such,
we're not going to regress semantics that have been unchanged
over 20 years just to match whatever insanity the generic Linux code
does right now.

Go on, call me a deranged monkey on some serious mind-controlling
substances. I don't care. :)

I think the fix should probably just be:

-			truncate_pagecache_range(VFS_I(ip), pos, -1);
+			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+					pos >> PAGE_CACHE_SHIFT, -1);

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes
  2014-08-08 16:04 ` [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes Chris Mason
@ 2014-08-09  0:48   ` Dave Chinner
  2014-08-09  2:42     ` Chris Mason
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-09  0:48 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Fri, Aug 08, 2014 at 12:04:40PM -0400, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO writes.  The other filesystems are calling
> invalidate_inode_pages2_range
>  
> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges
> in the page.  This means a DIO write can zero out part of the page cache
> page, and it is possible the page will stay in cache.
> 
> This one is an RFC because it is untested and because I don't understand
> how XFS is dealing with pages the truncate was unable to clear away.
> I'm not able to actually trigger zeros by mixing DIO writes with
> buffered reads.
> 
> Signed-off-by: Chris Mason <clm@fb.com>
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 8d25d98..c30c112 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -638,7 +638,10 @@ xfs_file_dio_aio_write(
>  						    pos, -1);
>  		if (ret)
>  			goto out;
> -		truncate_pagecache_range(VFS_I(ip), pos, -1);
> +
> +		/* what do we do if we can't invalidate the pages? */
> +		invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> +					      pos >> PAGE_CACHE_SHIFT, -1);

I don't think it can on XFS.

We're holding the XFS_IOLOCK_EXCL, so no other syscall based IO can
dirty pages, all the pages are clean, try_to_free_buffers() will
never fail, no-one can run a truncate operation concurently, and
so on.

The only thing that could cause an issue is a racing mmap page fault
dirtying the page. But if you are mixing mmap+direct IO on the same
file, you lost a long time ago so that's nothing new at all.

So, I'd just do:

		ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
						pos >> PAGE_CACHE_SHIFT, -1);
		WARN_ON_ONCE(ret);
		ret = 0;

That way we'll find out if it does ever fail, and if it does we can
take it from there.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-09  0:36 ` Dave Chinner
@ 2014-08-09  2:32   ` Chris Mason
  2014-08-09  3:19     ` Eric Sandeen
  2014-08-09  4:17     ` Dave Chinner
  0 siblings, 2 replies; 19+ messages in thread
From: Chris Mason @ 2014-08-09  2:32 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Eric Sandeen, xfs



On 08/08/2014 08:36 PM, Dave Chinner wrote:
> On Fri, Aug 08, 2014 at 10:35:38AM -0400, Chris Mason wrote:
>>
>> xfs is using truncate_pagecache_range to invalidate the page cache
>> during DIO reads.  This is different from the other filesystems who only
>> invalidate pages during DIO writes.
> 
> Historical oddity thanks to wrapper functions that were kept way
> longer than they should have been.
> 
>> truncate_pagecache_range is meant to be used when we are freeing the
>> underlying data structs from disk, so it will zero any partial ranges
>> in the page.  This means a DIO read can zero out part of the page cache
>> page, and it is possible the page will stay in cache.
> 
> commit fb59581 ("xfs: remove xfs_flushinval_pages").  also removed
> the offset masks that seem to be the issue here. Classic case of a
> regression caused by removing 10+ year old code that was not clearly
> documented and didn't appear important.
> 
> The real question is why isn't fsx and other corner case data
> integrity tools tripping over this?
> 

My question too.  Maybe not mixing buffered/direct for partial pages?
Does fsx only do 4K O_DIRECT?

>> buffered reads will find an up to date page with zeros instead of the
>> data actually on disk.
>>
>> This patch fixes things by leaving the page cache alone during DIO
>> reads.
>>
>> We discovered this when our buffered IO program for distributing
>> database indexes was finding zero filled blocks.  I think writes
>> are broken too, but I'll leave that for a separate patch because I don't
>> fully understand what XFS needs to happen during a DIO write.
>>
>> Test program:
> 
> Encapsulate it in a generic xfstest, please, and send it to
> fstests@vger.kernel.org.

This test prog was looking for races, which we really don't have.  It
can be much shorter to just look for the improper zeroing from a single
thread.  I can send it either way.

[ ... ]

> I guarantee you that there are applications out there that rely on
> the implicit invalidation behaviour for performance. There are also
> applications out that rely on it for correctness, too, because the
> OS is not the only source of data in the filesystem the OS has
> mounted.
> 
> Besides, XFS's direct IO semantics are far saner, more predictable
> and hence are more widely useful than the generic code. As such,
> we're not going to regress semantics that have been unchanged
> over 20 years just to match whatever insanity the generic Linux code
> does right now.
> 
> Go on, call me a deranged monkey on some serious mind-controlling
> substances. I don't care. :)

The deranged part is invalidating pos -> -1 on a huge file because of a
single 512b direct read.  But, if you mix O_DIRECT and buffered you get
what the monkeys give you and like it.

> 
> I think the fix should probably just be:
> 
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> +					pos >> PAGE_CACHE_SHIFT, -1);
> 

I'll retest with this in the morning.  The invalidate is basically what
we had before with the masking & PAGE_CACHE_SHIFT.

-chris

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes
  2014-08-09  0:48   ` Dave Chinner
@ 2014-08-09  2:42     ` Chris Mason
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Mason @ 2014-08-09  2:42 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Eric Sandeen, xfs



On 08/08/2014 08:48 PM, Dave Chinner wrote:
> On Fri, Aug 08, 2014 at 12:04:40PM -0400, Chris Mason wrote:
>>
>> xfs is using truncate_pagecache_range to invalidate the page cache
>> during DIO writes.  The other filesystems are calling
>> invalidate_inode_pages2_range
>>  
>> truncate_pagecache_range is meant to be used when we are freeing the
>> underlying data structs from disk, so it will zero any partial ranges
>> in the page.  This means a DIO write can zero out part of the page cache
>> page, and it is possible the page will stay in cache.
>>
>> This one is an RFC because it is untested and because I don't understand
>> how XFS is dealing with pages the truncate was unable to clear away.
>> I'm not able to actually trigger zeros by mixing DIO writes with
>> buffered reads.
>>
>> Signed-off-by: Chris Mason <clm@fb.com>
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 8d25d98..c30c112 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -638,7 +638,10 @@ xfs_file_dio_aio_write(
>>  						    pos, -1);
>>  		if (ret)
>>  			goto out;
>> -		truncate_pagecache_range(VFS_I(ip), pos, -1);
>> +
>> +		/* what do we do if we can't invalidate the pages? */
>> +		invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
>> +					      pos >> PAGE_CACHE_SHIFT, -1);
> 
> I don't think it can on XFS.
> 
> We're holding the XFS_IOLOCK_EXCL, so no other syscall based IO can
> dirty pages, all the pages are clean, try_to_free_buffers() will
> never fail, no-one can run a truncate operation concurently, and
> so on.
> 
> So, I'd just do:
> 
> 		ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> 						pos >> PAGE_CACHE_SHIFT, -1);
> 		WARN_ON_ONCE(ret);
> 		ret = 0;
> 

Since pos is page aligned I agree this should be fine.  I'll leave that
one to you though, since I don't have a great test case for/against it.


-chris

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-09  2:32   ` Chris Mason
@ 2014-08-09  3:19     ` Eric Sandeen
  2014-08-09  4:17     ` Dave Chinner
  1 sibling, 0 replies; 19+ messages in thread
From: Eric Sandeen @ 2014-08-09  3:19 UTC (permalink / raw)
  To: Chris Mason, Dave Chinner; +Cc: xfs

On 8/8/14, 9:32 PM, Chris Mason wrote:

>> Go on, call me a deranged monkey on some serious mind-controlling
>> substances. I don't care. :)

Insider / obscure quote achievement unlocked!
 
> The deranged part is invalidating pos -> -1 on a huge file because of a
> single 512b direct read.  But, if you mix O_DIRECT and buffered you get
> what the monkeys give you and like it.

Agreed, I really scratch my head at that one...

-Eric

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-09  2:32   ` Chris Mason
  2014-08-09  3:19     ` Eric Sandeen
@ 2014-08-09  4:17     ` Dave Chinner
  1 sibling, 0 replies; 19+ messages in thread
From: Dave Chinner @ 2014-08-09  4:17 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Fri, Aug 08, 2014 at 10:32:58PM -0400, Chris Mason wrote:
> 
> 
> On 08/08/2014 08:36 PM, Dave Chinner wrote:
> > On Fri, Aug 08, 2014 at 10:35:38AM -0400, Chris Mason wrote:
> >>
> >> xfs is using truncate_pagecache_range to invalidate the page cache
> >> during DIO reads.  This is different from the other filesystems who only
> >> invalidate pages during DIO writes.
> > 
> > Historical oddity thanks to wrapper functions that were kept way
> > longer than they should have been.
> > 
> >> truncate_pagecache_range is meant to be used when we are freeing the
> >> underlying data structs from disk, so it will zero any partial ranges
> >> in the page.  This means a DIO read can zero out part of the page cache
> >> page, and it is possible the page will stay in cache.
> > 
> > commit fb59581 ("xfs: remove xfs_flushinval_pages").  also removed
> > the offset masks that seem to be the issue here. Classic case of a
> > regression caused by removing 10+ year old code that was not clearly
> > documented and didn't appear important.
> > 
> > The real question is why isn't fsx and other corner case data
> > integrity tools tripping over this?
> > 
> 
> My question too.  Maybe not mixing buffered/direct for partial pages?
> Does fsx only do 4K O_DIRECT?

No. xfstests::tests/generic/091 is supposed to cover this exact
case.  It runs fsx with direct IO aligned to sector boundaries
amongst other things.

$ ./lsqa.pl tests/generic/091
FS QA Test No. 091

fsx exercising direct IO -- sub-block sizes and concurrent buffered IO

$

> 
> >> buffered reads will find an up to date page with zeros instead of the
> >> data actually on disk.
> >>
> >> This patch fixes things by leaving the page cache alone during DIO
> >> reads.
> >>
> >> We discovered this when our buffered IO program for distributing
> >> database indexes was finding zero filled blocks.  I think writes
> >> are broken too, but I'll leave that for a separate patch because I don't
> >> fully understand what XFS needs to happen during a DIO write.
> >>
> >> Test program:
> > 
> > Encapsulate it in a generic xfstest, please, and send it to
> > fstests@vger.kernel.org.
> 
> This test prog was looking for races, which we really don't have.  It
> can be much shorter to just look for the improper zeroing from a single
> thread.  I can send it either way.

Doesn't matter, as long as we have something that exercises this
case....

> > Besides, XFS's direct IO semantics are far saner, more predictable
> > and hence are more widely useful than the generic code. As such,
> > we're not going to regress semantics that have been unchanged
> > over 20 years just to match whatever insanity the generic Linux code
> > does right now.
> > 
> > Go on, call me a deranged monkey on some serious mind-controlling
> > substances. I don't care. :)
> 
> The deranged part is invalidating pos -> -1 on a huge file because of a
> single 512b direct read.  But, if you mix O_DIRECT and buffered you get
> what the monkeys give you and like it.

That's a historical artifact - it predates the range interfaces that
Linux has grown over the years, and every time we've changed it to
match teh I/O range subtle problems have arisen. THose are usually
due to other bugs we knew nothing about at the time, but that's the
way it goes...

> > I think the fix should probably just be:
> > 
> > -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> > +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> > +					pos >> PAGE_CACHE_SHIFT, -1);
> > 
> 
> I'll retest with this in the morning.  The invalidate is basically what
> we had before with the masking & PAGE_CACHE_SHIFT.

Yup. Thanks for finding these issuesi, Chris!

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
                   ` (3 preceding siblings ...)
  2014-08-09  0:36 ` Dave Chinner
@ 2014-08-09 12:57 ` Chris Mason
  2014-08-11 13:29   ` Brian Foster
  2014-08-12  1:17   ` Dave Chinner
  4 siblings, 2 replies; 19+ messages in thread
From: Chris Mason @ 2014-08-09 12:57 UTC (permalink / raw)
  To: xfs, Dave Chinner, Eric Sandeen


xfs is using truncate_pagecache_range to invalidate the page cache
during DIO reads.  This is different from the other filesystems who only
invalidate pages during DIO writes.

truncate_pagecache_range is meant to be used when we are freeing the
underlying data structs from disk, so it will zero any partial ranges in
the page.  This means a DIO read can zero out part of the page cache
page, and it is possible the page will stay in cache.

buffered reads will find an up to date page with zeros instead of the
data actually on disk.

This patch fixes things by using invalidate_inode_pages2_range instead.
It preserves the page cache invalidation, but won't zero any pages.

Signed-off-by: Chris Mason <clm@fb.com>
cc: stable@vger.kernel.org

---
 fs/xfs/xfs_file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 1f66779..023d575 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -295,7 +295,8 @@ xfs_file_read_iter(
 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
 				return ret;
 			}
-			truncate_pagecache_range(VFS_I(ip), pos, -1);
+			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+					      pos >> PAGE_CACHE_SHIFT, -1);
 		}
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
-- 
1.8.1

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-09 12:57 ` [PATCH v2] " Chris Mason
@ 2014-08-11 13:29   ` Brian Foster
  2014-08-12  1:17   ` Dave Chinner
  1 sibling, 0 replies; 19+ messages in thread
From: Brian Foster @ 2014-08-11 13:29 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO reads.  This is different from the other filesystems who only
> invalidate pages during DIO writes.
> 
> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges in
> the page.  This means a DIO read can zero out part of the page cache
> page, and it is possible the page will stay in cache.
> 
> buffered reads will find an up to date page with zeros instead of the
> data actually on disk.
> 
> This patch fixes things by using invalidate_inode_pages2_range instead.
> It preserves the page cache invalidation, but won't zero any pages.
> 
> Signed-off-by: Chris Mason <clm@fb.com>
> cc: stable@vger.kernel.org
> 
> ---

I can't say I grok the risk of removing the invalidation entirely, but
this is certainly a more isolated fix:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_file.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..023d575 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -295,7 +295,8 @@ xfs_file_read_iter(
>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>  				return ret;
>  			}
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> +					      pos >> PAGE_CACHE_SHIFT, -1);
>  		}
>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  	}
> -- 
> 1.8.1
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-09 12:57 ` [PATCH v2] " Chris Mason
  2014-08-11 13:29   ` Brian Foster
@ 2014-08-12  1:17   ` Dave Chinner
  2014-08-19 19:24     ` Chris Mason
  1 sibling, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-12  1:17 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> 
> xfs is using truncate_pagecache_range to invalidate the page cache
> during DIO reads.  This is different from the other filesystems who only
> invalidate pages during DIO writes.
> 
> truncate_pagecache_range is meant to be used when we are freeing the
> underlying data structs from disk, so it will zero any partial ranges in
> the page.  This means a DIO read can zero out part of the page cache
> page, and it is possible the page will stay in cache.
> 
> buffered reads will find an up to date page with zeros instead of the
> data actually on disk.
> 
> This patch fixes things by using invalidate_inode_pages2_range instead.
> It preserves the page cache invalidation, but won't zero any pages.
> 
> Signed-off-by: Chris Mason <clm@fb.com>
> cc: stable@vger.kernel.org
> 
> ---
>  fs/xfs/xfs_file.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 1f66779..023d575 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -295,7 +295,8 @@ xfs_file_read_iter(
>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>  				return ret;
>  			}
> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> +					      pos >> PAGE_CACHE_SHIFT, -1);
>  		}
>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>  	}

I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
occasionally. It always fires immediately before some other ASSERT()
they fires with a block map/page cache inconsistency. It usually
fires in a test that runs fsx or fsstress. The fsx failures are new
regressions caused by this patch. e.g. generic/263 hasn't failed for
months on any of my systems and this patch causes it to fail
reliably on my 1k block size test config.

I'm going to assume at this point that this is uncovering some other
existing bug, but it means I'm not going to push this fix until I
understand what is actually happening here. It is possible that what
I'm seeing is related to Brian's collapse range bug fixes, but until
I applied this direct IO patch I'd never seen fsx throw ASSERTs in
xfs_bmap_shift_extents()....

Either way, more testing and understanding is needed.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-12  1:17   ` Dave Chinner
@ 2014-08-19 19:24     ` Chris Mason
  2014-08-19 22:35       ` Dave Chinner
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Mason @ 2014-08-19 19:24 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Eric Sandeen, xfs

On 08/11/2014 09:17 PM, Dave Chinner wrote:
> On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 1f66779..023d575 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -295,7 +295,8 @@ xfs_file_read_iter(
>>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>>  				return ret;
>>  			}
>> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
>> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
>> +					      pos >> PAGE_CACHE_SHIFT, -1);
>>  		}
>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>>  	}
> 
> I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
> occasionally. It always fires immediately before some other ASSERT()
> they fires with a block map/page cache inconsistency. It usually
> fires in a test that runs fsx or fsstress. The fsx failures are new
> regressions caused by this patch. e.g. generic/263 hasn't failed for
> months on any of my systems and this patch causes it to fail
> reliably on my 1k block size test config.
> 
> I'm going to assume at this point that this is uncovering some other
> existing bug, but it means I'm not going to push this fix until I
> understand what is actually happening here. It is possible that what
> I'm seeing is related to Brian's collapse range bug fixes, but until
> I applied this direct IO patch I'd never seen fsx throw ASSERTs in
> xfs_bmap_shift_extents()....
> 
> Either way, more testing and understanding is needed.

Do you have the output from xfs and the command line args it used?  For
my device, it picks:

-r 4096 -t 512 -w 512 -Z

And for a blocksize 1024 test I did mkfs.xfs -b size=1024

But I can't trigger failures with or without the invalidate_inode_pages2
change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
my patch to see if the patch alone was at fault.

-chris

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-19 19:24     ` Chris Mason
@ 2014-08-19 22:35       ` Dave Chinner
  2014-08-20  1:54         ` Chris Mason
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-19 22:35 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Tue, Aug 19, 2014 at 03:24:48PM -0400, Chris Mason wrote:
> On 08/11/2014 09:17 PM, Dave Chinner wrote:
> > On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> >>
> >> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> >> index 1f66779..023d575 100644
> >> --- a/fs/xfs/xfs_file.c
> >> +++ b/fs/xfs/xfs_file.c
> >> @@ -295,7 +295,8 @@ xfs_file_read_iter(
> >>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> >>  				return ret;
> >>  			}
> >> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> >> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> >> +					      pos >> PAGE_CACHE_SHIFT, -1);
> >>  		}
> >>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> >>  	}
> > 
> > I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
> > occasionally. It always fires immediately before some other ASSERT()
> > they fires with a block map/page cache inconsistency. It usually
> > fires in a test that runs fsx or fsstress. The fsx failures are new
> > regressions caused by this patch. e.g. generic/263 hasn't failed for
> > months on any of my systems and this patch causes it to fail
> > reliably on my 1k block size test config.
> > 
> > I'm going to assume at this point that this is uncovering some other
> > existing bug, but it means I'm not going to push this fix until I
> > understand what is actually happening here. It is possible that what
> > I'm seeing is related to Brian's collapse range bug fixes, but until
> > I applied this direct IO patch I'd never seen fsx throw ASSERTs in
> > xfs_bmap_shift_extents()....
> > 
> > Either way, more testing and understanding is needed.
> 
> Do you have the output from xfs and the command line args it used?  For
> my device, it picks:
> 
> -r 4096 -t 512 -w 512 -Z
> 
> And for a blocksize 1024 test I did mkfs.xfs -b size=1024

I'm running:

$ mkfs.xfs -f -m crc=1,finobt=1 -b size=1k /dev/vda
$ mount /dev/vda /mnt/test
$ ltp/fsx -o 128000   -l 500000 -r 4096 -t 512 -w 512 -Z -d /mnt/test/foo

> But I can't trigger failures with or without the invalidate_inode_pages2
> change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
> my patch to see if the patch alone was at fault.

I am seeing failures at operation 1192.

Yesterday, I found a new class of bufferhead state coherency issues
to do with EOF handling that are causing the problems. Basically,
when the page cache marks a page dirty, the generic code marks all
the buffers on the page dirty, even when they are beyond EOF.

As a result, when we go to invalidate the page that spans EOF, it
cannot be invalidated because there are dirty buffers on the page.
Those buffers persist in that state because they are beyond EOF,
have no blocks allocated to them, and cannot be written. And so when
we do a direct IO that spans the current EOF, it now fails to
invalidate that page and so triggers the warning.

Worse is that it appears that these bufferheads can leak into the
internal blocks into the file when the file is extended, leading to
all sorts of other ASSERT failures (which I've been seeing for a
while now).

I've got the above fsx command to run for somewhere between 100k and
110k operations with the fixes I currently have, but I haven't found
the cause of the dirty buffer beyond EOF state leaking into the
interior of the file from extend operations yet.

Once I have something that passes a few million fsx ops....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-19 22:35       ` Dave Chinner
@ 2014-08-20  1:54         ` Chris Mason
  2014-08-20  2:19           ` Dave Chinner
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Mason @ 2014-08-20  1:54 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Eric Sandeen, xfs



On 08/19/2014 06:35 PM, Dave Chinner wrote:
> On Tue, Aug 19, 2014 at 03:24:48PM -0400, Chris Mason wrote:
>> On 08/11/2014 09:17 PM, Dave Chinner wrote:
>>> On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
>>>>
>>>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>>>> index 1f66779..023d575 100644
>>>> --- a/fs/xfs/xfs_file.c
>>>> +++ b/fs/xfs/xfs_file.c
>>>> @@ -295,7 +295,8 @@ xfs_file_read_iter(
>>>>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
>>>>  				return ret;
>>>>  			}
>>>> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
>>>> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
>>>> +					      pos >> PAGE_CACHE_SHIFT, -1);
>>>>  		}
>>>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
>>>>  	}
>>>
>>> I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
>>> occasionally. It always fires immediately before some other ASSERT()
>>> they fires with a block map/page cache inconsistency. It usually
>>> fires in a test that runs fsx or fsstress. The fsx failures are new
>>> regressions caused by this patch. e.g. generic/263 hasn't failed for
>>> months on any of my systems and this patch causes it to fail
>>> reliably on my 1k block size test config.
>>>
>>> I'm going to assume at this point that this is uncovering some other
>>> existing bug, but it means I'm not going to push this fix until I
>>> understand what is actually happening here. It is possible that what
>>> I'm seeing is related to Brian's collapse range bug fixes, but until
>>> I applied this direct IO patch I'd never seen fsx throw ASSERTs in
>>> xfs_bmap_shift_extents()....
>>>
>>> Either way, more testing and understanding is needed.
>>
>> Do you have the output from xfs and the command line args it used?  For
>> my device, it picks:
>>
>> -r 4096 -t 512 -w 512 -Z
>>
>> And for a blocksize 1024 test I did mkfs.xfs -b size=1024
> 
> I'm running:
> 
> $ mkfs.xfs -f -m crc=1,finobt=1 -b size=1k /dev/vda
> $ mount /dev/vda /mnt/test
> $ ltp/fsx -o 128000   -l 500000 -r 4096 -t 512 -w 512 -Z -d /mnt/test/foo
> 
>> But I can't trigger failures with or without the invalidate_inode_pages2
>> change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
>> my patch to see if the patch alone was at fault.
> 
> I am seeing failures at operation 1192.
> 
> Yesterday, I found a new class of bufferhead state coherency issues
> to do with EOF handling that are causing the problems. Basically,
> when the page cache marks a page dirty, the generic code marks all
> the buffers on the page dirty, even when they are beyond EOF.
> 
> As a result, when we go to invalidate the page that spans EOF, it
> cannot be invalidated because there are dirty buffers on the page.
> Those buffers persist in that state because they are beyond EOF,
> have no blocks allocated to them, and cannot be written. And so when
> we do a direct IO that spans the current EOF, it now fails to
> invalidate that page and so triggers the warning.
> 
> Worse is that it appears that these bufferheads can leak into the
> internal blocks into the file when the file is extended, leading to
> all sorts of other ASSERT failures (which I've been seeing for a
> while now).
> 
> I've got the above fsx command to run for somewhere between 100k and
> 110k operations with the fixes I currently have, but I haven't found
> the cause of the dirty buffer beyond EOF state leaking into the
> interior of the file from extend operations yet.
> 
> Once I have something that passes a few million fsx ops....

I have to admit, I'm not sure where this leaves us in terms of safely
applying my patch to our 3.10 or mainline kernel... Failing to
invalidate the page and zeroing the page are really both wrong.

It feels like this zeroing and cleaning should be happening in truncate,
but that's because I always blame truncate for fsx bugs.

Totally unrelated, but I'll abuse this email thread anyway.  With a pull
to today's xfstests so I can send my shiny new test, ./check -g auto now
only runs the btrfs tests and nothing else.

I think its safe to assume this doesn't happen for you, any hints on
what might make xfstests suddenly love btrfs this much?

-chris

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-20  1:54         ` Chris Mason
@ 2014-08-20  2:19           ` Dave Chinner
  2014-08-20  2:36             ` Dave Chinner
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-20  2:19 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Tue, Aug 19, 2014 at 09:54:14PM -0400, Chris Mason wrote:
> On 08/19/2014 06:35 PM, Dave Chinner wrote:
> > On Tue, Aug 19, 2014 at 03:24:48PM -0400, Chris Mason wrote:
> >> On 08/11/2014 09:17 PM, Dave Chinner wrote:
> >>> On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> >>>>
> >>>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> >>>> index 1f66779..023d575 100644
> >>>> --- a/fs/xfs/xfs_file.c
> >>>> +++ b/fs/xfs/xfs_file.c
> >>>> @@ -295,7 +295,8 @@ xfs_file_read_iter(
> >>>>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> >>>>  				return ret;
> >>>>  			}
> >>>> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> >>>> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> >>>> +					      pos >> PAGE_CACHE_SHIFT, -1);
> >>>>  		}
> >>>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> >>>>  	}
> >>>
> >>> I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
> >>> occasionally. It always fires immediately before some other ASSERT()
> >>> they fires with a block map/page cache inconsistency. It usually
> >>> fires in a test that runs fsx or fsstress. The fsx failures are new
> >>> regressions caused by this patch. e.g. generic/263 hasn't failed for
> >>> months on any of my systems and this patch causes it to fail
> >>> reliably on my 1k block size test config.
> >>>
> >>> I'm going to assume at this point that this is uncovering some other
> >>> existing bug, but it means I'm not going to push this fix until I
> >>> understand what is actually happening here. It is possible that what
> >>> I'm seeing is related to Brian's collapse range bug fixes, but until
> >>> I applied this direct IO patch I'd never seen fsx throw ASSERTs in
> >>> xfs_bmap_shift_extents()....
> >>>
> >>> Either way, more testing and understanding is needed.
> >>
> >> Do you have the output from xfs and the command line args it used?  For
> >> my device, it picks:
> >>
> >> -r 4096 -t 512 -w 512 -Z
> >>
> >> And for a blocksize 1024 test I did mkfs.xfs -b size=1024
> > 
> > I'm running:
> > 
> > $ mkfs.xfs -f -m crc=1,finobt=1 -b size=1k /dev/vda
> > $ mount /dev/vda /mnt/test
> > $ ltp/fsx -o 128000   -l 500000 -r 4096 -t 512 -w 512 -Z -d /mnt/test/foo
> > 
> >> But I can't trigger failures with or without the invalidate_inode_pages2
> >> change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
> >> my patch to see if the patch alone was at fault.
> > 
> > I am seeing failures at operation 1192.
> > 
> > Yesterday, I found a new class of bufferhead state coherency issues
> > to do with EOF handling that are causing the problems. Basically,
> > when the page cache marks a page dirty, the generic code marks all
> > the buffers on the page dirty, even when they are beyond EOF.
> > 
> > As a result, when we go to invalidate the page that spans EOF, it
> > cannot be invalidated because there are dirty buffers on the page.
> > Those buffers persist in that state because they are beyond EOF,
> > have no blocks allocated to them, and cannot be written. And so when
> > we do a direct IO that spans the current EOF, it now fails to
> > invalidate that page and so triggers the warning.
> > 
> > Worse is that it appears that these bufferheads can leak into the
> > internal blocks into the file when the file is extended, leading to
> > all sorts of other ASSERT failures (which I've been seeing for a
> > while now).
> > 
> > I've got the above fsx command to run for somewhere between 100k and
> > 110k operations with the fixes I currently have, but I haven't found
> > the cause of the dirty buffer beyond EOF state leaking into the
> > interior of the file from extend operations yet.
> > 
> > Once I have something that passes a few million fsx ops....
> 
> I have to admit, I'm not sure where this leaves us in terms of safely
> applying my patch to our 3.10 or mainline kernel... Failing to
> invalidate the page and zeroing the page are really both wrong.

Well, yes they are, and that's one of the reasons why I wanted to
ensure we caught failures. As such, I think this invalidation bug
goes back a *long time* because we've never, ever checked for
invalidation failure before this patch.

However, I think I've got a self-contained fix that can be
backported for the invalidation problem now - it's well past 10
million fsx ops at this point with both read and write DIO
invalidation fixes included. I'll post my series when I've done
some more robust testing on it....

> It feels like this zeroing and cleaning should be happening in truncate,
> but that's because I always blame truncate for fsx bugs.

The problem is the other way around - buffers beyond EOF should
never be marked dirty because they can never be written and hence
never be marked clean in a consistent and sane manner.

> Totally unrelated, but I'll abuse this email thread anyway.  With a pull
> to today's xfstests so I can send my shiny new test, ./check -g auto now
> only runs the btrfs tests and nothing else.

Works just fine for me. Did you reformat you test dev to xfs first?
Do you have stray env vars set?  What does ./setup tell you about
your config? And maybe you can bisect - it was only ~20
commits in the last push.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-20  2:19           ` Dave Chinner
@ 2014-08-20  2:36             ` Dave Chinner
  2014-08-20  4:41               ` Dave Chinner
  0 siblings, 1 reply; 19+ messages in thread
From: Dave Chinner @ 2014-08-20  2:36 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Wed, Aug 20, 2014 at 12:19:50PM +1000, Dave Chinner wrote:
> On Tue, Aug 19, 2014 at 09:54:14PM -0400, Chris Mason wrote:
> > On 08/19/2014 06:35 PM, Dave Chinner wrote:
> > > On Tue, Aug 19, 2014 at 03:24:48PM -0400, Chris Mason wrote:
> > >> On 08/11/2014 09:17 PM, Dave Chinner wrote:
> > >>> On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> > >>>>
> > >>>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > >>>> index 1f66779..023d575 100644
> > >>>> --- a/fs/xfs/xfs_file.c
> > >>>> +++ b/fs/xfs/xfs_file.c
> > >>>> @@ -295,7 +295,8 @@ xfs_file_read_iter(
> > >>>>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> > >>>>  				return ret;
> > >>>>  			}
> > >>>> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> > >>>> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> > >>>> +					      pos >> PAGE_CACHE_SHIFT, -1);
> > >>>>  		}
> > >>>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> > >>>>  	}
> > >>>
> > >>> I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
> > >>> occasionally. It always fires immediately before some other ASSERT()
> > >>> they fires with a block map/page cache inconsistency. It usually
> > >>> fires in a test that runs fsx or fsstress. The fsx failures are new
> > >>> regressions caused by this patch. e.g. generic/263 hasn't failed for
> > >>> months on any of my systems and this patch causes it to fail
> > >>> reliably on my 1k block size test config.
> > >>>
> > >>> I'm going to assume at this point that this is uncovering some other
> > >>> existing bug, but it means I'm not going to push this fix until I
> > >>> understand what is actually happening here. It is possible that what
> > >>> I'm seeing is related to Brian's collapse range bug fixes, but until
> > >>> I applied this direct IO patch I'd never seen fsx throw ASSERTs in
> > >>> xfs_bmap_shift_extents()....
> > >>>
> > >>> Either way, more testing and understanding is needed.
> > >>
> > >> Do you have the output from xfs and the command line args it used?  For
> > >> my device, it picks:
> > >>
> > >> -r 4096 -t 512 -w 512 -Z
> > >>
> > >> And for a blocksize 1024 test I did mkfs.xfs -b size=1024
> > > 
> > > I'm running:
> > > 
> > > $ mkfs.xfs -f -m crc=1,finobt=1 -b size=1k /dev/vda
> > > $ mount /dev/vda /mnt/test
> > > $ ltp/fsx -o 128000   -l 500000 -r 4096 -t 512 -w 512 -Z -d /mnt/test/foo
> > > 
> > >> But I can't trigger failures with or without the invalidate_inode_pages2
> > >> change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
> > >> my patch to see if the patch alone was at fault.
> > > 
> > > I am seeing failures at operation 1192.
> > > 
> > > Yesterday, I found a new class of bufferhead state coherency issues
> > > to do with EOF handling that are causing the problems. Basically,
> > > when the page cache marks a page dirty, the generic code marks all
> > > the buffers on the page dirty, even when they are beyond EOF.
> > > 
> > > As a result, when we go to invalidate the page that spans EOF, it
> > > cannot be invalidated because there are dirty buffers on the page.
> > > Those buffers persist in that state because they are beyond EOF,
> > > have no blocks allocated to them, and cannot be written. And so when
> > > we do a direct IO that spans the current EOF, it now fails to
> > > invalidate that page and so triggers the warning.
> > > 
> > > Worse is that it appears that these bufferheads can leak into the
> > > internal blocks into the file when the file is extended, leading to
> > > all sorts of other ASSERT failures (which I've been seeing for a
> > > while now).
> > > 
> > > I've got the above fsx command to run for somewhere between 100k and
> > > 110k operations with the fixes I currently have, but I haven't found
> > > the cause of the dirty buffer beyond EOF state leaking into the
> > > interior of the file from extend operations yet.
> > > 
> > > Once I have something that passes a few million fsx ops....
> > 
> > I have to admit, I'm not sure where this leaves us in terms of safely
> > applying my patch to our 3.10 or mainline kernel... Failing to
> > invalidate the page and zeroing the page are really both wrong.
> 
> Well, yes they are, and that's one of the reasons why I wanted to
> ensure we caught failures. As such, I think this invalidation bug
> goes back a *long time* because we've never, ever checked for
> invalidation failure before this patch.
> 
> However, I think I've got a self-contained fix that can be
> backported for the invalidation problem now - it's well past 10
> million fsx ops at this point with both read and write DIO
> invalidation fixes included. I'll post my series when I've done
> some more robust testing on it....

There's more issues. generic/247 on a 4k block size filesystem just
triggered an invalidation failure on a direct IO write. That test is
specifically mixing DIO overwrite with mmap overwrite on the same
file, so there's clearly more issues in play here than I first
thought.

So, more debugging needed to determine if this is a result of the
usual, currently unsolvable page fault vs DIO races...

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v2] xfs: don't zero partial page cache pages during O_DIRECT
  2014-08-20  2:36             ` Dave Chinner
@ 2014-08-20  4:41               ` Dave Chinner
  0 siblings, 0 replies; 19+ messages in thread
From: Dave Chinner @ 2014-08-20  4:41 UTC (permalink / raw)
  To: Chris Mason; +Cc: Eric Sandeen, xfs

On Wed, Aug 20, 2014 at 12:36:49PM +1000, Dave Chinner wrote:
> On Wed, Aug 20, 2014 at 12:19:50PM +1000, Dave Chinner wrote:
> > On Tue, Aug 19, 2014 at 09:54:14PM -0400, Chris Mason wrote:
> > > On 08/19/2014 06:35 PM, Dave Chinner wrote:
> > > > On Tue, Aug 19, 2014 at 03:24:48PM -0400, Chris Mason wrote:
> > > >> On 08/11/2014 09:17 PM, Dave Chinner wrote:
> > > >>> On Sat, Aug 09, 2014 at 08:57:00AM -0400, Chris Mason wrote:
> > > >>>>
> > > >>>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > >>>> index 1f66779..023d575 100644
> > > >>>> --- a/fs/xfs/xfs_file.c
> > > >>>> +++ b/fs/xfs/xfs_file.c
> > > >>>> @@ -295,7 +295,8 @@ xfs_file_read_iter(
> > > >>>>  				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
> > > >>>>  				return ret;
> > > >>>>  			}
> > > >>>> -			truncate_pagecache_range(VFS_I(ip), pos, -1);
> > > >>>> +			invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
> > > >>>> +					      pos >> PAGE_CACHE_SHIFT, -1);
> > > >>>>  		}
> > > >>>>  		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> > > >>>>  	}
> > > >>>
> > > >>> I added the WARN_ON_ONCE(ret) check to this and I am seeing it fire
> > > >>> occasionally. It always fires immediately before some other ASSERT()
> > > >>> they fires with a block map/page cache inconsistency. It usually
> > > >>> fires in a test that runs fsx or fsstress. The fsx failures are new
> > > >>> regressions caused by this patch. e.g. generic/263 hasn't failed for
> > > >>> months on any of my systems and this patch causes it to fail
> > > >>> reliably on my 1k block size test config.
> > > >>>
> > > >>> I'm going to assume at this point that this is uncovering some other
> > > >>> existing bug, but it means I'm not going to push this fix until I
> > > >>> understand what is actually happening here. It is possible that what
> > > >>> I'm seeing is related to Brian's collapse range bug fixes, but until
> > > >>> I applied this direct IO patch I'd never seen fsx throw ASSERTs in
> > > >>> xfs_bmap_shift_extents()....
> > > >>>
> > > >>> Either way, more testing and understanding is needed.
> > > >>
> > > >> Do you have the output from xfs and the command line args it used?  For
> > > >> my device, it picks:
> > > >>
> > > >> -r 4096 -t 512 -w 512 -Z
> > > >>
> > > >> And for a blocksize 1024 test I did mkfs.xfs -b size=1024
> > > > 
> > > > I'm running:
> > > > 
> > > > $ mkfs.xfs -f -m crc=1,finobt=1 -b size=1k /dev/vda
> > > > $ mount /dev/vda /mnt/test
> > > > $ ltp/fsx -o 128000   -l 500000 -r 4096 -t 512 -w 512 -Z -d /mnt/test/foo
> > > > 
> > > >> But I can't trigger failures with or without the invalidate_inode_pages2
> > > >> change.  I was hoping to trigger on 3.16, and then jump back to 3.10 +
> > > >> my patch to see if the patch alone was at fault.
> > > > 
> > > > I am seeing failures at operation 1192.
> > > > 
> > > > Yesterday, I found a new class of bufferhead state coherency issues
> > > > to do with EOF handling that are causing the problems. Basically,
> > > > when the page cache marks a page dirty, the generic code marks all
> > > > the buffers on the page dirty, even when they are beyond EOF.
> > > > 
> > > > As a result, when we go to invalidate the page that spans EOF, it
> > > > cannot be invalidated because there are dirty buffers on the page.
> > > > Those buffers persist in that state because they are beyond EOF,
> > > > have no blocks allocated to them, and cannot be written. And so when
> > > > we do a direct IO that spans the current EOF, it now fails to
> > > > invalidate that page and so triggers the warning.
> > > > 
> > > > Worse is that it appears that these bufferheads can leak into the
> > > > internal blocks into the file when the file is extended, leading to
> > > > all sorts of other ASSERT failures (which I've been seeing for a
> > > > while now).
> > > > 
> > > > I've got the above fsx command to run for somewhere between 100k and
> > > > 110k operations with the fixes I currently have, but I haven't found
> > > > the cause of the dirty buffer beyond EOF state leaking into the
> > > > interior of the file from extend operations yet.
> > > > 
> > > > Once I have something that passes a few million fsx ops....
> > > 
> > > I have to admit, I'm not sure where this leaves us in terms of safely
> > > applying my patch to our 3.10 or mainline kernel... Failing to
> > > invalidate the page and zeroing the page are really both wrong.
> > 
> > Well, yes they are, and that's one of the reasons why I wanted to
> > ensure we caught failures. As such, I think this invalidation bug
> > goes back a *long time* because we've never, ever checked for
> > invalidation failure before this patch.
> > 
> > However, I think I've got a self-contained fix that can be
> > backported for the invalidation problem now - it's well past 10
> > million fsx ops at this point with both read and write DIO
> > invalidation fixes included. I'll post my series when I've done
> > some more robust testing on it....
> 
> There's more issues. generic/247 on a 4k block size filesystem just
> triggered an invalidation failure on a direct IO write. That test is
> specifically mixing DIO overwrite with mmap overwrite on the same
> file, so there's clearly more issues in play here than I first
> thought.
> 
> So, more debugging needed to determine if this is a result of the
> usual, currently unsolvable page fault vs DIO races...

OK, it's almost certainly page fault vs writeback races in the
DIO code. The page that is failing invalidation is uptodate, dirty
and mapped, which indicates that it was dirtied after writeback
occurred. So, just a demonstration of how mmap+DIO on the same file
is just a bad idea, and having the kernel now issue a warning the
first time this happens is probably a good idea.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2014-08-20  4:42 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-08 14:35 [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Chris Mason
2014-08-08 15:17 ` Chris Mason
2014-08-08 16:04 ` [PATCH RFC] xfs: use invalidate_inode_pages2_range for DIO writes Chris Mason
2014-08-09  0:48   ` Dave Chinner
2014-08-09  2:42     ` Chris Mason
2014-08-08 20:39 ` [PATCH] xfs: don't zero partial page cache pages during O_DIRECT Brian Foster
2014-08-09  0:36 ` Dave Chinner
2014-08-09  2:32   ` Chris Mason
2014-08-09  3:19     ` Eric Sandeen
2014-08-09  4:17     ` Dave Chinner
2014-08-09 12:57 ` [PATCH v2] " Chris Mason
2014-08-11 13:29   ` Brian Foster
2014-08-12  1:17   ` Dave Chinner
2014-08-19 19:24     ` Chris Mason
2014-08-19 22:35       ` Dave Chinner
2014-08-20  1:54         ` Chris Mason
2014-08-20  2:19           ` Dave Chinner
2014-08-20  2:36             ` Dave Chinner
2014-08-20  4:41               ` Dave Chinner

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.