linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* xfs: untangle the direct I/O and DAX path, fix DAX locking
@ 2016-06-22 15:27 Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
                   ` (8 more replies)
  0 siblings, 9 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

The last patch is what started the series:  XFS currently uses the
direct I/O locking strategy for DAX because DAX was overloaded onto
the direct I/O path.  For XFS this means that we only take a shared
inode lock instead of the normal exclusive one for writes IFF they
are properly aligned.  While this is fine for O_DIRECT which requires
explicit opt-in from the application it's not fine for DAX where we'll
suddenly lose expected and required synchronization of the file system
happens to use DAX undeneath.

Patches 1-7 just untangle the code so that we can deal with DAX on
it's own easily.


^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 2/8] xfs: kill ioflags Christoph Hellwig
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

Instead check the file pointer for the invisble I/O flag directly, and
use the chance to drop redundant arguments from the xfs_ioc_space
prototype.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_ioctl.c   | 22 ++++++++--------------
 fs/xfs/xfs_ioctl.h   |  3 ---
 fs/xfs/xfs_ioctl32.c |  6 +-----
 3 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index dbca737..6ab5a24 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -595,13 +595,12 @@ xfs_attrmulti_by_handle(
 
 int
 xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
 	struct file		*filp,
-	int			ioflags,
 	unsigned int		cmd,
 	xfs_flock64_t		*bf)
 {
+	struct inode		*inode = file_inode(filp);
+	struct xfs_inode	*ip = XFS_I(inode);
 	struct iattr		iattr;
 	enum xfs_prealloc_flags	flags = 0;
 	uint			iolock = XFS_IOLOCK_EXCL;
@@ -626,7 +625,7 @@ xfs_ioc_space(
 
 	if (filp->f_flags & O_DSYNC)
 		flags |= XFS_PREALLOC_SYNC;
-	if (ioflags & XFS_IO_INVIS)
+	if (filp->f_mode & FMODE_NOCMTIME)
 		flags |= XFS_PREALLOC_INVISIBLE;
 
 	error = mnt_want_write_file(filp);
@@ -1464,8 +1463,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
 
 STATIC int
 xfs_ioc_getbmap(
-	struct xfs_inode	*ip,
-	int			ioflags,
+	struct file		*file,
 	unsigned int		cmd,
 	void			__user *arg)
 {
@@ -1479,10 +1477,10 @@ xfs_ioc_getbmap(
 		return -EINVAL;
 
 	bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
-	if (ioflags & XFS_IO_INVIS)
+	if (file->f_mode & FMODE_NOCMTIME)
 		bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
 
-	error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
+	error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
 			    (__force struct getbmap *)arg+1);
 	if (error)
 		return error;
@@ -1619,12 +1617,8 @@ xfs_file_ioctl(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
 	int			error;
 
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	trace_xfs_file_ioctl(ip);
 
 	switch (cmd) {
@@ -1643,7 +1637,7 @@ xfs_file_ioctl(
 
 		if (copy_from_user(&bf, arg, sizeof(bf)))
 			return -EFAULT;
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+		return xfs_ioc_space(filp, cmd, &bf);
 	}
 	case XFS_IOC_DIOINFO: {
 		struct dioattr	da;
@@ -1702,7 +1696,7 @@ xfs_file_ioctl(
 
 	case XFS_IOC_GETBMAP:
 	case XFS_IOC_GETBMAPA:
-		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
+		return xfs_ioc_getbmap(filp, cmd, arg);
 
 	case XFS_IOC_GETBMAPX:
 		return xfs_ioc_getbmapx(ip, arg);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 77c02c7..8b52881 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -20,10 +20,7 @@
 
 extern int
 xfs_ioc_space(
-	struct xfs_inode	*ip,
-	struct inode		*inode,
 	struct file		*filp,
-	int			ioflags,
 	unsigned int		cmd,
 	xfs_flock64_t		*bf);
 
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 1a05d8a..321f577 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -532,12 +532,8 @@ xfs_file_compat_ioctl(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	void			__user *arg = (void __user *)p;
-	int			ioflags = 0;
 	int			error;
 
-	if (filp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	trace_xfs_file_compat_ioctl(ip);
 
 	switch (cmd) {
@@ -589,7 +585,7 @@ xfs_file_compat_ioctl(
 		if (xfs_compat_flock64_copyin(&bf, arg))
 			return -EFAULT;
 		cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
-		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
+		return xfs_ioc_space(filp, cmd, &bf);
 	}
 	case XFS_IOC_FSGEOMETRY_V1_32:
 		return xfs_compat_ioc_fsgeometry_v1(mp, arg);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 2/8] xfs: kill ioflags
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter Christoph Hellwig
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

Now that we have the direct I/O kiocb flag there is no real need to sample
the value inside of XFS, and the invis flag was always just partially used
and isn't worth keeping this infrastructure around for.   This also splits
the read tracepoint into buffered vs direct as we've done for writes a long
time ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c  | 26 +++++++++-----------------
 fs/xfs/xfs_inode.h | 10 ----------
 fs/xfs/xfs_trace.h | 19 ++++++++-----------
 3 files changed, 17 insertions(+), 38 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 713991c..b32e6b0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,18 +249,12 @@ xfs_file_read_iter(
 	struct xfs_mount	*mp = ip->i_mount;
 	size_t			size = iov_iter_count(to);
 	ssize_t			ret = 0;
-	int			ioflags = 0;
 	xfs_fsize_t		n;
 	loff_t			pos = iocb->ki_pos;
 
 	XFS_STATS_INC(mp, xs_read_calls);
 
-	if (unlikely(iocb->ki_flags & IOCB_DIRECT))
-		ioflags |= XFS_IO_ISDIRECT;
-	if (file->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
-	if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
+	if ((iocb->ki_flags & IOCB_DIRECT) && !IS_DAX(inode)) {
 		xfs_buftarg_t	*target =
 			XFS_IS_REALTIME_INODE(ip) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
@@ -293,7 +287,7 @@ xfs_file_read_iter(
 	 * serialisation.
 	 */
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
+	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_mapping->nrpages) {
 		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
@@ -327,7 +321,10 @@ xfs_file_read_iter(
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
 
-	trace_xfs_file_read(ip, size, pos, ioflags);
+	if (iocb->ki_flags & IOCB_DIRECT)
+		trace_xfs_file_direct_read(ip, size, pos);
+	else
+		trace_xfs_file_buffered_read(ip, size, pos);
 
 	ret = generic_file_read_iter(iocb, to);
 	if (ret > 0)
@@ -346,18 +343,14 @@ xfs_file_splice_read(
 	unsigned int		flags)
 {
 	struct xfs_inode	*ip = XFS_I(infilp->f_mapping->host);
-	int			ioflags = 0;
 	ssize_t			ret;
 
 	XFS_STATS_INC(ip->i_mount, xs_read_calls);
 
-	if (infilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= XFS_IO_INVIS;
-
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
+	trace_xfs_file_splice_read(ip, count, *ppos);
 
 	/*
 	 * DAX inodes cannot ues the page cache for splice, so we have to push
@@ -620,7 +613,7 @@ xfs_file_dio_aio_write(
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
+	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 
 	data = *from;
 	ret = mapping->a_ops->direct_IO(iocb, &data);
@@ -670,8 +663,7 @@ xfs_file_buffered_aio_write(
 	current->backing_dev_info = inode_to_bdi(inode);
 
 write_retry:
-	trace_xfs_file_buffered_write(ip, iov_iter_count(from),
-				      iocb->ki_pos, 0);
+	trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
 	ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
 	if (likely(ret >= 0))
 		iocb->ki_pos += ret;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0c19d3d..8eb78ec 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -473,14 +473,4 @@ do { \
 
 extern struct kmem_zone	*xfs_inode_zone;
 
-/*
- * Flags for read/write calls
- */
-#define XFS_IO_ISDIRECT	0x00001		/* bypass page cache */
-#define XFS_IO_INVIS	0x00002		/* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
-	{ XFS_IO_ISDIRECT,	"DIRECT" }, \
-	{ XFS_IO_INVIS,		"INVIS"}
-
 #endif	/* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6787a9f..2504f94 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1135,15 +1135,14 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
 )
 
 DECLARE_EVENT_CLASS(xfs_file_class,
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
-	TP_ARGS(ip, count, offset, flags),
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset),
+	TP_ARGS(ip, count, offset),
 	TP_STRUCT__entry(
 		__field(dev_t, dev)
 		__field(xfs_ino_t, ino)
 		__field(xfs_fsize_t, size)
 		__field(loff_t, offset)
 		__field(size_t, count)
-		__field(int, flags)
 	),
 	TP_fast_assign(
 		__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -1151,23 +1150,21 @@ DECLARE_EVENT_CLASS(xfs_file_class,
 		__entry->size = ip->i_d.di_size;
 		__entry->offset = offset;
 		__entry->count = count;
-		__entry->flags = flags;
 	),
-	TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
-		  "offset 0x%llx count 0x%zx ioflags %s",
+	TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->ino,
 		  __entry->size,
 		  __entry->offset,
-		  __entry->count,
-		  __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
+		  __entry->count)
 )
 
 #define DEFINE_RW_EVENT(name)		\
 DEFINE_EVENT(xfs_file_class, name,	\
-	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),	\
-	TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
+	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset),	\
+	TP_ARGS(ip, count, offset))
+DEFINE_RW_EVENT(xfs_file_buffered_read);
+DEFINE_RW_EVENT(xfs_file_direct_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 2/8] xfs: kill ioflags Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers Christoph Hellwig
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

All the three low-level read implementations that we might call already
take care of not overflowing the maximum supported bytes, no need to
duplicate it here.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b32e6b0..09a5a78 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,7 +249,6 @@ xfs_file_read_iter(
 	struct xfs_mount	*mp = ip->i_mount;
 	size_t			size = iov_iter_count(to);
 	ssize_t			ret = 0;
-	xfs_fsize_t		n;
 	loff_t			pos = iocb->ki_pos;
 
 	XFS_STATS_INC(mp, xs_read_calls);
@@ -266,13 +265,6 @@ xfs_file_read_iter(
 		}
 	}
 
-	n = mp->m_super->s_maxbytes - pos;
-	if (n <= 0 || size == 0)
-		return 0;
-
-	if (n < size)
-		size = n;
-
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (2 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O Christoph Hellwig
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

Similar to what we did on the write side a while ago.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 83 ++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 09a5a78..e584333 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -239,35 +239,33 @@ xfs_file_fsync(
 }
 
 STATIC ssize_t
-xfs_file_read_iter(
+xfs_file_dio_aio_read(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct file		*file = iocb->ki_filp;
-	struct inode		*inode = file->f_mapping->host;
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	size_t			size = iov_iter_count(to);
+	size_t			count = iov_iter_count(to);
+	struct xfs_buftarg	*target;
 	ssize_t			ret = 0;
-	loff_t			pos = iocb->ki_pos;
 
-	XFS_STATS_INC(mp, xs_read_calls);
+	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
 
-	if ((iocb->ki_flags & IOCB_DIRECT) && !IS_DAX(inode)) {
-		xfs_buftarg_t	*target =
-			XFS_IS_REALTIME_INODE(ip) ?
-				mp->m_rtdev_targp : mp->m_ddev_targp;
+	if (XFS_IS_REALTIME_INODE(ip))
+		target = ip->i_mount->m_rtdev_targp;
+	else
+		target = ip->i_mount->m_ddev_targp;
+
+	if (!IS_DAX(inode)) {
 		/* DIO must be aligned to device logical sector size */
-		if ((pos | size) & target->bt_logical_sectormask) {
-			if (pos == i_size_read(inode))
+		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
+			if (iocb->ki_pos == i_size_read(inode))
 				return 0;
 			return -EINVAL;
 		}
 	}
 
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
 	/*
 	 * Locking is a bit tricky here. If we take an exclusive lock for direct
 	 * IO, we effectively serialise all new concurrent read IO to this file
@@ -279,7 +277,7 @@ xfs_file_read_iter(
 	 * serialisation.
 	 */
 	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-	if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_mapping->nrpages) {
+	if (mapping->nrpages) {
 		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
@@ -294,8 +292,8 @@ xfs_file_read_iter(
 		 * flush and reduce the chances of repeated iolock cycles going
 		 * forward.
 		 */
-		if (inode->i_mapping->nrpages) {
-			ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+		if (mapping->nrpages) {
+			ret = filemap_write_and_wait(mapping);
 			if (ret) {
 				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
 				return ret;
@@ -306,23 +304,56 @@ xfs_file_read_iter(
 			 * we fail to invalidate a page, but this should never
 			 * happen on XFS. Warn if it does fail.
 			 */
-			ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+			ret = invalidate_inode_pages2(mapping);
 			WARN_ON_ONCE(ret);
 			ret = 0;
 		}
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
 
+	ret = generic_file_read_iter(iocb, to);
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_buffered_aio_read(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
+	ssize_t			ret;
+
+	trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
+
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+	ret = generic_file_read_iter(iocb, to);
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_read_iter(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
+	ssize_t			ret = 0;
+
+	XFS_STATS_INC(mp, xs_read_calls);
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
 	if (iocb->ki_flags & IOCB_DIRECT)
-		trace_xfs_file_direct_read(ip, size, pos);
+		ret = xfs_file_dio_aio_read(iocb, to);
 	else
-		trace_xfs_file_buffered_read(ip, size, pos);
+		ret = xfs_file_buffered_aio_read(iocb, to);
 
-	ret = generic_file_read_iter(iocb, to);
 	if (ret > 0)
 		XFS_STATS_ADD(mp, xs_read_bytes, ret);
-
-	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 	return ret;
 }
 
@@ -578,7 +609,7 @@ xfs_file_dio_aio_write(
 	end = iocb->ki_pos + count - 1;
 
 	/*
-	 * See xfs_file_read_iter() for why we do a full-file flush here.
+	 * See xfs_file_dio_aio_read() for why we do a full-file flush here.
 	 */
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (3 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 6/8] xfs: direct calls in the direct I/O path Christoph Hellwig
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

XFS already implement it's own flushing of the pagecache because it
implements proper synchronization for direct I/O reads.  This means
calling generic_file_read_iter for direct I/O is rather useless,
as it doesn't do much but updating the atime and iocb position for
us.  This also gets rid of the buffered I/O fallback that isn't used
for XFS.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e584333..f761f49 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -246,12 +246,17 @@ xfs_file_dio_aio_read(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
+	loff_t			isize = i_size_read(inode);
 	size_t			count = iov_iter_count(to);
+	struct iov_iter		data;
 	struct xfs_buftarg	*target;
 	ssize_t			ret = 0;
 
 	trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
 
+	if (!count)
+		return 0; /* skip atime */
+
 	if (XFS_IS_REALTIME_INODE(ip))
 		target = ip->i_mount->m_rtdev_targp;
 	else
@@ -260,7 +265,7 @@ xfs_file_dio_aio_read(
 	if (!IS_DAX(inode)) {
 		/* DIO must be aligned to device logical sector size */
 		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
-			if (iocb->ki_pos == i_size_read(inode))
+			if (iocb->ki_pos == isize)
 				return 0;
 			return -EINVAL;
 		}
@@ -311,9 +316,15 @@ xfs_file_dio_aio_read(
 		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 	}
 
-	ret = generic_file_read_iter(iocb, to);
+	data = *to;
+	ret = mapping->a_ops->direct_IO(iocb, &data);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(to, ret);
+	}
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
+	file_accessed(iocb->ki_filp);
 	return ret;
 }
 
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 6/8] xfs: direct calls in the direct I/O path
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (4 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-22 15:27 ` [PATCH 7/8] xfs: split direct I/O and DAX path Christoph Hellwig
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

We control both the callers and callees of ->direct_IO, so remove the
indirect calls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_aops.c | 24 +++++-------------------
 fs/xfs/xfs_aops.h |  3 +++
 fs/xfs/xfs_file.c | 17 +++++++++++++++--
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 80714eb..b368277 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1303,7 +1303,7 @@ xfs_get_blocks_dax_fault(
  * whereas if we have flags set we will always be called in task context
  * (i.e. from a workqueue).
  */
-STATIC int
+int
 xfs_end_io_direct_write(
 	struct kiocb		*iocb,
 	loff_t			offset,
@@ -1374,24 +1374,10 @@ xfs_vm_direct_IO(
 	struct kiocb		*iocb,
 	struct iov_iter		*iter)
 {
-	struct inode		*inode = iocb->ki_filp->f_mapping->host;
-	dio_iodone_t		*endio = NULL;
-	int			flags = 0;
-	struct block_device	*bdev;
-
-	if (iov_iter_rw(iter) == WRITE) {
-		endio = xfs_end_io_direct_write;
-		flags = DIO_ASYNC_EXTEND;
-	}
-
-	if (IS_DAX(inode)) {
-		return dax_do_io(iocb, inode, iter,
-				 xfs_get_blocks_direct, endio, 0);
-	}
-
-	bdev = xfs_find_bdev_for_inode(inode);
-	return  __blockdev_direct_IO(iocb, inode, bdev, iter,
-			xfs_get_blocks_direct, endio, NULL, flags);
+	/*
+	 * We just need the method present so that open/fcntl allow direct I/O.
+	 */
+	return -EINVAL;
 }
 
 STATIC sector_t
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 814aab7..bf2d9a1 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -60,6 +60,9 @@ int	xfs_get_blocks_direct(struct inode *inode, sector_t offset,
 int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
 			         struct buffer_head *map_bh, int create);
 
+int	xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
+		ssize_t size, void *private);
+
 extern void xfs_count_page_state(struct page *, int *, int *);
 extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f761f49..24b267d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -317,7 +317,13 @@ xfs_file_dio_aio_read(
 	}
 
 	data = *to;
-	ret = mapping->a_ops->direct_IO(iocb, &data);
+	if (IS_DAX(inode)) {
+		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
+				NULL, 0);
+	} else {
+		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+				xfs_get_blocks_direct, NULL, NULL, 0);
+	}
 	if (ret > 0) {
 		iocb->ki_pos += ret;
 		iov_iter_advance(to, ret);
@@ -650,7 +656,14 @@ xfs_file_dio_aio_write(
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 
 	data = *from;
-	ret = mapping->a_ops->direct_IO(iocb, &data);
+	if (IS_DAX(inode)) {
+		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
+				xfs_end_io_direct_write, 0);
+	} else {
+		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+				xfs_get_blocks_direct, xfs_end_io_direct_write,
+				NULL, DIO_ASYNC_EXTEND);
+	}
 
 	/* see generic_file_direct_write() for why this is necessary */
 	if (mapping->nrpages) {
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (5 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 6/8] xfs: direct calls in the direct I/O path Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-09-29  2:53   ` Darrick J. Wong
  2016-06-22 15:27 ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
  2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
  8 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

So far the DAX code overloaded the direct I/O code path.  There is very little
in common between the two, and untangling them allows to clean up both variants.

As a ѕide effect we also get separate trace points for both I/O types.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c  | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
 fs/xfs/xfs_trace.h |   2 +
 2 files changed, 112 insertions(+), 29 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 24b267d..0e74325 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
 	else
 		target = ip->i_mount->m_ddev_targp;
 
-	if (!IS_DAX(inode)) {
-		/* DIO must be aligned to device logical sector size */
-		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
-			if (iocb->ki_pos == isize)
-				return 0;
-			return -EINVAL;
-		}
+	/* DIO must be aligned to device logical sector size */
+	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
+		if (iocb->ki_pos == isize)
+			return 0;
+		return -EINVAL;
 	}
 
 	/*
@@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
 	}
 
 	data = *to;
-	if (IS_DAX(inode)) {
-		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-				NULL, 0);
-	} else {
-		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-				xfs_get_blocks_direct, NULL, NULL, 0);
+	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+			xfs_get_blocks_direct, NULL, NULL, 0);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(to, ret);
 	}
+	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
+
+	file_accessed(iocb->ki_filp);
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_dax_read(
+	struct kiocb		*iocb,
+	struct iov_iter		*to)
+{
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct iov_iter		data = *to;
+	size_t			count = iov_iter_count(to);
+	ssize_t			ret = 0;
+
+	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
+
+	if (!count)
+		return 0; /* skip atime */
+
+	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
+	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
 	if (ret > 0) {
 		iocb->ki_pos += ret;
 		iov_iter_advance(to, ret);
@@ -356,7 +378,8 @@ xfs_file_read_iter(
 	struct kiocb		*iocb,
 	struct iov_iter		*to)
 {
-	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
+	struct inode		*inode = file_inode(iocb->ki_filp);
+	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
 	ssize_t			ret = 0;
 
 	XFS_STATS_INC(mp, xs_read_calls);
@@ -364,7 +387,9 @@ xfs_file_read_iter(
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
 
-	if (iocb->ki_flags & IOCB_DIRECT)
+	if (IS_DAX(inode))
+		ret = xfs_file_dax_read(iocb, to);
+	else if (iocb->ki_flags & IOCB_DIRECT)
 		ret = xfs_file_dio_aio_read(iocb, to);
 	else
 		ret = xfs_file_buffered_aio_read(iocb, to);
@@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
 					mp->m_rtdev_targp : mp->m_ddev_targp;
 
 	/* DIO must be aligned to device logical sector size */
-	if (!IS_DAX(inode) &&
-	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
+	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
 		return -EINVAL;
 
 	/* "unaligned" here means not aligned to a filesystem block */
@@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
 
 	data = *from;
-	if (IS_DAX(inode)) {
-		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
-				xfs_end_io_direct_write, 0);
-	} else {
-		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
-				xfs_get_blocks_direct, xfs_end_io_direct_write,
-				NULL, DIO_ASYNC_EXTEND);
-	}
+	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
+			xfs_get_blocks_direct, xfs_end_io_direct_write,
+			NULL, DIO_ASYNC_EXTEND);
 
 	/* see generic_file_direct_write() for why this is necessary */
 	if (mapping->nrpages) {
@@ -680,10 +699,70 @@ out:
 	xfs_rw_iunlock(ip, iolock);
 
 	/*
-	 * No fallback to buffered IO on errors for XFS. DAX can result in
-	 * partial writes, but direct IO will either complete fully or fail.
+	 * No fallback to buffered IO on errors for XFS, direct IO will either
+	 * complete fully or fail.
+	 */
+	ASSERT(ret < 0 || ret == count);
+	return ret;
+}
+
+STATIC ssize_t
+xfs_file_dax_write(
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	struct address_space	*mapping = iocb->ki_filp->f_mapping;
+	struct inode		*inode = mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	ssize_t			ret = 0;
+	int			unaligned_io = 0;
+	int			iolock;
+	struct iov_iter		data;
+
+	/* "unaligned" here means not aligned to a filesystem block */
+	if ((iocb->ki_pos & mp->m_blockmask) ||
+	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
+		unaligned_io = 1;
+		iolock = XFS_IOLOCK_EXCL;
+	} else if (mapping->nrpages) {
+		iolock = XFS_IOLOCK_EXCL;
+	} else {
+		iolock = XFS_IOLOCK_SHARED;
+	}
+	xfs_rw_ilock(ip, iolock);
+
+	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
+	if (ret)
+		goto out;
+
+	/*
+	 * Yes, even DAX files can have page cache attached to them:  A zeroed
+	 * page is inserted into the pagecache when we have to serve a write
+	 * fault on a hole.  It should never be dirtied and can simply be
+	 * dropped from the pagecache once we get real data for the page.
 	 */
-	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
+	if (mapping->nrpages) {
+		ret = invalidate_inode_pages2(mapping);
+		WARN_ON_ONCE(ret);
+	}
+
+	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
+		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+		iolock = XFS_IOLOCK_SHARED;
+	}
+
+	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
+
+	data = *from;
+	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
+			xfs_end_io_direct_write, 0);
+	if (ret > 0) {
+		iocb->ki_pos += ret;
+		iov_iter_advance(from, ret);
+	}
+out:
+	xfs_rw_iunlock(ip, iolock);
 	return ret;
 }
 
@@ -765,7 +844,9 @@ xfs_file_write_iter(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
+	if (IS_DAX(inode))
+		ret = xfs_file_dax_write(iocb, from);
+	else if (iocb->ki_flags & IOCB_DIRECT)
 		ret = xfs_file_dio_aio_write(iocb, from);
 	else
 		ret = xfs_file_buffered_aio_write(iocb, from);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2504f94..1451690 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name,	\
 	TP_ARGS(ip, count, offset))
 DEFINE_RW_EVENT(xfs_file_buffered_read);
 DEFINE_RW_EVENT(xfs_file_direct_read);
+DEFINE_RW_EVENT(xfs_file_dax_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
+DEFINE_RW_EVENT(xfs_file_dax_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* [PATCH 8/8] xfs: fix locking for DAX writes
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (6 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 7/8] xfs: split direct I/O and DAX path Christoph Hellwig
@ 2016-06-22 15:27 ` Christoph Hellwig
  2016-06-23 14:22   ` Boaz Harrosh
  2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
  8 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-22 15:27 UTC (permalink / raw)
  To: xfs; +Cc: linux-nvdimm, linux-fsdevel

So far DAX writes inherited the locking from direct I/O writes, but the direct
I/O model of using shared locks for writes is actually wrong for DAX.  For
direct I/O we're out of any standards and don't have to provide the Posix
required exclusion between writers, but for DAX which gets transparently
enable on applications without any knowledge of it we can't simply drop the
requirement.  Even worse this only happens for aligned writes and thus
doesn't show up for many typical use cases.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_file.c | 20 +-------------------
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0e74325..413c9e0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -714,24 +714,11 @@ xfs_file_dax_write(
 	struct address_space	*mapping = iocb->ki_filp->f_mapping;
 	struct inode		*inode = mapping->host;
 	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	int			unaligned_io = 0;
-	int			iolock;
+	int			iolock = XFS_IOLOCK_EXCL;
 	struct iov_iter		data;
 
-	/* "unaligned" here means not aligned to a filesystem block */
-	if ((iocb->ki_pos & mp->m_blockmask) ||
-	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
-		unaligned_io = 1;
-		iolock = XFS_IOLOCK_EXCL;
-	} else if (mapping->nrpages) {
-		iolock = XFS_IOLOCK_EXCL;
-	} else {
-		iolock = XFS_IOLOCK_SHARED;
-	}
 	xfs_rw_ilock(ip, iolock);
-
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
 		goto out;
@@ -747,11 +734,6 @@ xfs_file_dax_write(
 		WARN_ON_ONCE(ret);
 	}
 
-	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
-		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
-		iolock = XFS_IOLOCK_SHARED;
-	}
-
 	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 
 	data = *from;
-- 
2.1.4


^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: [PATCH 8/8] xfs: fix locking for DAX writes
  2016-06-22 15:27 ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
@ 2016-06-23 14:22   ` Boaz Harrosh
  0 siblings, 0 replies; 27+ messages in thread
From: Boaz Harrosh @ 2016-06-23 14:22 UTC (permalink / raw)
  To: Christoph Hellwig, xfs; +Cc: linux-fsdevel, linux-nvdimm

On 06/22/2016 06:27 PM, Christoph Hellwig wrote:
> So far DAX writes inherited the locking from direct I/O writes, but the direct
> I/O model of using shared locks for writes is actually wrong for DAX.  For
> direct I/O we're out of any standards and don't have to provide the Posix
> required exclusion between writers, but for DAX which gets transparently
> enable on applications without any knowledge of it we can't simply drop the
> requirement.  Even worse this only happens for aligned writes and thus
> doesn't show up for many typical use cases.
> 

Hi Sir Christoph

You raise a very interesting point and I would please like to ask questions.
Is this a theoretical standards problem or a real applications problem that
you know of?

You say above: " Posix required exclusion between writers"

As I understand, what it means is that if two threads/processes A & B write to the
same offset-length, in parallel. then a consistent full version will hold
of either A or B, which ever comes last. But never a torn version of both.

Is this really POSIX. I mean I knew POSIX is silly but so much so?
What about NFS CEPH Luster and all these network shared stuff. Does POSIX
say "On a single Node?". (Trond been yelling about file locks for *any* kind
of synchronization for years.)

And even with the write-lock to serialize writers (Or i_mute in case of ext4)
I do not see how this serialization works, because in a cached environment a
write_back can start and crash while the second thread above starts his memcopy
and on disk we still get a torn version of the record that was half from
A half from B. (Or maybe I do not understand what your automicity means)

Is not a rant I would really like to know what application uses this
"single-writer" facility and how does it actually works for them? I honestly
don't see how it works. (And do they really check that they are only working
on a local file system?)
Sorry for my slowness please explain?

BTW: I think that all the patches except this one makes a lot of sense
because of all the hidden quirks of direct_IO code paths. Just for example the
difference between "aligned and none align writes" as you mentioned above.

My $0.017:
Who In the real world would actually break without this patch, which
is not already broken?
And why sacrifice the vast majority of good applications for the sake
of an already broken (theoretical?) applications.

Thank you
Boaz

> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c | 20 +-------------------
>  1 file changed, 1 insertion(+), 19 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 0e74325..413c9e0 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -714,24 +714,11 @@ xfs_file_dax_write(
>  	struct address_space	*mapping = iocb->ki_filp->f_mapping;
>  	struct inode		*inode = mapping->host;
>  	struct xfs_inode	*ip = XFS_I(inode);
> -	struct xfs_mount	*mp = ip->i_mount;
>  	ssize_t			ret = 0;
> -	int			unaligned_io = 0;
> -	int			iolock;
> +	int			iolock = XFS_IOLOCK_EXCL;
>  	struct iov_iter		data;
>  
> -	/* "unaligned" here means not aligned to a filesystem block */
> -	if ((iocb->ki_pos & mp->m_blockmask) ||
> -	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
> -		unaligned_io = 1;
> -		iolock = XFS_IOLOCK_EXCL;
> -	} else if (mapping->nrpages) {
> -		iolock = XFS_IOLOCK_EXCL;
> -	} else {
> -		iolock = XFS_IOLOCK_SHARED;
> -	}
>  	xfs_rw_ilock(ip, iolock);
> -
>  	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
>  	if (ret)
>  		goto out;
> @@ -747,11 +734,6 @@ xfs_file_dax_write(
>  		WARN_ON_ONCE(ret);
>  	}
>  
> -	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
> -		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> -		iolock = XFS_IOLOCK_SHARED;
> -	}
> -
>  	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
>  
>  	data = *from;
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
                   ` (7 preceding siblings ...)
  2016-06-22 15:27 ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
@ 2016-06-23 23:24 ` Dave Chinner
  2016-06-24  1:14   ` Dan Williams
  2016-06-24  7:26   ` Christoph Hellwig
  8 siblings, 2 replies; 27+ messages in thread
From: Dave Chinner @ 2016-06-23 23:24 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs, linux-fsdevel, linux-nvdimm

On Wed, Jun 22, 2016 at 05:27:08PM +0200, Christoph Hellwig wrote:
> The last patch is what started the series:  XFS currently uses the
> direct I/O locking strategy for DAX because DAX was overloaded onto
> the direct I/O path.  For XFS this means that we only take a shared
> inode lock instead of the normal exclusive one for writes IFF they
> are properly aligned.  While this is fine for O_DIRECT which requires
> explicit opt-in from the application it's not fine for DAX where we'll
> suddenly lose expected and required synchronization of the file system
> happens to use DAX undeneath.

Except we did that *intentionally* - by definition there is no
cache to bypass with DAX and so all IO is "direct". That, combined
with the fact that all Linux filesystems except XFS break the POSIX
exclusive writer rule you are quoting to begin with, it seemed
pointless to enforce it for DAX....

So, before taking any patches to change that behaviour in XFS, a
wider discussion about the policy needs to be had. I don't think
we should care about POSIX here - if you have an application that
needs this serialisation, turn off DAX. That's why I made it a
per-inode inheritable flag and why the mount option will go away
over time.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
@ 2016-06-24  1:14   ` Dan Williams
  2016-06-24  7:13     ` Dave Chinner
  2016-06-24  7:26   ` Christoph Hellwig
  1 sibling, 1 reply; 27+ messages in thread
From: Dan Williams @ 2016-06-24  1:14 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm, XFS Developers

On Thu, Jun 23, 2016 at 4:24 PM, Dave Chinner <david@fromorbit.com> wrote:
> On Wed, Jun 22, 2016 at 05:27:08PM +0200, Christoph Hellwig wrote:
>> The last patch is what started the series:  XFS currently uses the
>> direct I/O locking strategy for DAX because DAX was overloaded onto
>> the direct I/O path.  For XFS this means that we only take a shared
>> inode lock instead of the normal exclusive one for writes IFF they
>> are properly aligned.  While this is fine for O_DIRECT which requires
>> explicit opt-in from the application it's not fine for DAX where we'll
>> suddenly lose expected and required synchronization of the file system
>> happens to use DAX undeneath.
>
> Except we did that *intentionally* - by definition there is no
> cache to bypass with DAX and so all IO is "direct". That, combined
> with the fact that all Linux filesystems except XFS break the POSIX
> exclusive writer rule you are quoting to begin with, it seemed
> pointless to enforce it for DAX....

If we're going to be strict about POSIX fsync() semantics we should be
strict about this exclusive write semantic.  In other words why is it
ok to loosen one and not the other, if application compatibility is
the concern?

>
> So, before taking any patches to change that behaviour in XFS, a
> wider discussion about the policy needs to be had. I don't think
> we should care about POSIX here - if you have an application that
> needs this serialisation, turn off DAX.

s/needs this serialisation/needs the kernel to flush cpu cache/

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-24  1:14   ` Dan Williams
@ 2016-06-24  7:13     ` Dave Chinner
  2016-06-24  7:31       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Dave Chinner @ 2016-06-24  7:13 UTC (permalink / raw)
  To: Dan Williams
  Cc: Christoph Hellwig, linux-fsdevel, linux-nvdimm, XFS Developers

On Thu, Jun 23, 2016 at 06:14:47PM -0700, Dan Williams wrote:
> On Thu, Jun 23, 2016 at 4:24 PM, Dave Chinner <david@fromorbit.com> wrote:
> > On Wed, Jun 22, 2016 at 05:27:08PM +0200, Christoph Hellwig wrote:
> >> The last patch is what started the series:  XFS currently uses the
> >> direct I/O locking strategy for DAX because DAX was overloaded onto
> >> the direct I/O path.  For XFS this means that we only take a shared
> >> inode lock instead of the normal exclusive one for writes IFF they
> >> are properly aligned.  While this is fine for O_DIRECT which requires
> >> explicit opt-in from the application it's not fine for DAX where we'll
> >> suddenly lose expected and required synchronization of the file system
> >> happens to use DAX undeneath.
> >
> > Except we did that *intentionally* - by definition there is no
> > cache to bypass with DAX and so all IO is "direct". That, combined
> > with the fact that all Linux filesystems except XFS break the POSIX
> > exclusive writer rule you are quoting to begin with, it seemed
> > pointless to enforce it for DAX....
> 
> If we're going to be strict about POSIX fsync() semantics we should be
> strict about this exclusive write semantic.  In other words why is it
> ok to loosen one and not the other, if application compatibility is
> the concern?

This is a POSIX compliant fsync() implementation:

int fsync(int fd)
{
	return 0;
}

That's not what we require from Linux filesystems and storage
subsystems.  Our data integrity requirements are not actually
defined by POSIX - we go way beyond what POSIX actually requires us
to implement. If all we cared about is POSIX, then the above is how
we'd implement fsync() simply because it's fast. Everyone implements
fsync differently, so portable applications can't actually rely on
the POSIX standard fsync() implementation to keep their data safe...

IOWs, we don't give a shit about what POSIX says about fsync
because, in practice, it's useless. Instead, we implement something
that *works* and provides users with real data integrity guarantees.

If you like the POSIX specs for data integrity, go use
sync_file_range() - it doesn't guarantee data integrity, just like
posix compliant fsync(). And yes, applications that use
sync_file_range() are known to lose data when systems crash...

The POSIX exclusive write requirement is a different case. No linux
filesystem except XFS has ever met that requirement (in 20 something
years), yet I don't see applications falling over with corrupt data
from non-exclusive writes all the time, nor do I see application
developers shouting at us to provide it. i.e. reality tells us this
isn't a POSIX behaviour that applications rely on because everyone
implements it differently.

So, like fsync(), if everyone implements it differently,
applications don't rely on posix smeantics to serialise access to
overlapping ranges of a file. And if that's the case, then
why even bother exclusive write locking in the filesystem when there
is no need for serialisation of page cache contents?

We don't do it because POSIX says so, because we already ignore what
POSIX says about this topic for technical reasons. So why should we
make DAX conform to POSIX exclusive writer behaviour when DAX is
being specifically aimed at high performance, highly concurrent
applications where exclusive writer behaviour will cause major
performance issues?

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
  2016-06-24  1:14   ` Dan Williams
@ 2016-06-24  7:26   ` Christoph Hellwig
  2016-06-24 23:00     ` Dave Chinner
  1 sibling, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-24  7:26 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-nvdimm

On Fri, Jun 24, 2016 at 09:24:46AM +1000, Dave Chinner wrote:
> Except we did that *intentionally* - by definition there is no
> cache to bypass with DAX and so all IO is "direct". That, combined
> with the fact that all Linux filesystems except XFS break the POSIX
> exclusive writer rule you are quoting to begin with, it seemed
> pointless to enforce it for DAX....

No file system breaks the exclusive writer rule - most filesystem
don't make writers atomic vs readers.

More importantly every other filesystem (well there only are ext2
and ext4..) exludes DAX writers against other DAX writers.

> So, before taking any patches to change that behaviour in XFS, a
> wider discussion about the policy needs to be had. I don't think
> we should care about POSIX here - if you have an application that
> needs this serialisation, turn off DAX. That's why I made it a
> per-inode inheritable flag and why the mount option will go away
> over time.

Sorry, but this is simply broken - allowing apps to opt-in behavior
(e.g. like we're using O_DIRECT) is always fine.  Requriring
filesystem-specific tuning that has affect outside the app to get
existing documented behavior is not how to design APIs.

Maybe we'll need to opt-in to use DAX for mmap, but giving the same
existing behavior for read and write and avoiding a copy to the pagecache
is an obvious win.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-24  7:13     ` Dave Chinner
@ 2016-06-24  7:31       ` Christoph Hellwig
  0 siblings, 0 replies; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-24  7:31 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Dan Williams, Christoph Hellwig, linux-fsdevel, linux-nvdimm,
	XFS Developers

On Fri, Jun 24, 2016 at 05:13:18PM +1000, Dave Chinner wrote:
> This is a POSIX compliant fsync() implementation:
> 
> int fsync(int fd)
> {
> 	return 0;
> }

Depends on what you mean with "Posix".  Modern Posix which includex
XPG has the _POSIX_SYNCHRONIZED_IO option, which Linux implements.  For
that Posix says about fsync:

    [SIO] [Option Start] If _POSIX_SYNCHRONIZED_IO is defined, the fsync()
    function shall force all currently queued I/O operations associated with
    the file indicated by file descriptor fildes to the synchronized I/O
    completion state. All I/O operations shall be completed as defined for
    synchronized I/O file integrity completion. [Option End]


Whereas synchronized I/O file integrity completion is defined as:

     3.378 Synchronized I/O Data Integrity Completion

     For read, when the operation has been completed or diagnosed if
     unsuccessful. The read is complete only when an image of the data has been
     successfully transferred to the requesting process. If there were any
     pending write requests affecting the data to be read at the time that the
     synchronized read operation was requested, these write requests are
     successfully transferred prior to reading the data.

     For write, when the operation has been completed or diagnosed if
     unsuccessful. The write is complete only when the data specified in the
     write request is successfully transferred and all file system information
     required to retrieve the data is successfully transferred.

     File attributes that are not necessary for data retrieval (access time,
     modification time, status change time) need not be successfully
     transferred prior to returning to the calling process.

     3.379 Synchronized I/O File Integrity Completion

     Identical to a synchronized I/O data integrity completion with the
     addition that all file attributes relative to the I/O operation (including
     access time, modification time, status change time) are successfully
     transferred prior to returning to the calling process.


So in this case Posix very much requires data to be on a stable
medium.

> The POSIX exclusive write requirement is a different case. No linux
> filesystem except XFS has ever met that requirement (in 20 something
> years), yet I don't see applications falling over with corrupt data
> from non-exclusive writes all the time, nor do I see application
> developers shouting at us to provide it. i.e. reality tells us this
> isn't a POSIX behaviour that applications rely on because everyone
> implements it differently.

Every file system exludes writes from other writes.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-24  7:26   ` Christoph Hellwig
@ 2016-06-24 23:00     ` Dave Chinner
  2016-06-28 13:10       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Dave Chinner @ 2016-06-24 23:00 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs, linux-fsdevel, linux-nvdimm

On Fri, Jun 24, 2016 at 09:26:12AM +0200, Christoph Hellwig wrote:
> On Fri, Jun 24, 2016 at 09:24:46AM +1000, Dave Chinner wrote:
> > Except we did that *intentionally* - by definition there is no
> > cache to bypass with DAX and so all IO is "direct". That, combined
> > with the fact that all Linux filesystems except XFS break the POSIX
> > exclusive writer rule you are quoting to begin with, it seemed
> > pointless to enforce it for DAX....
> 
> No file system breaks the exclusive writer rule - most filesystem
> don't make writers atomic vs readers.
> 
> More importantly every other filesystem (well there only are ext2
> and ext4..) exludes DAX writers against other DAX writers.
> 
> > So, before taking any patches to change that behaviour in XFS, a
> > wider discussion about the policy needs to be had. I don't think
> > we should care about POSIX here - if you have an application that
> > needs this serialisation, turn off DAX. That's why I made it a
> > per-inode inheritable flag and why the mount option will go away
> > over time.
> 
> Sorry, but this is simply broken - allowing apps to opt-in behavior
> (e.g. like we're using O_DIRECT) is always fine.  Requriring
> filesystem-specific tuning that has affect outside the app to get
> existing documented behavior is not how to design APIs.

Using DAX is an *admin decision*, not an application decision.
Indeed, it's a mount option right now, and that's most definitely not
something the application can turn on or off! Inode flags allow the
admin to decide that two apps working on the same filesystem can use
(or not use) DAX independently, rather than needing to put them on
different filesystems.

> Maybe we'll need to opt-in to use DAX for mmap, but giving the same
> existing behavior for read and write and avoiding a copy to the pagecache
> is an obvious win.

You can't use DAX just for mmap. It's an inode scope behaviour -
once it's turned on, all accesses to that inode - regardless of user
interface - must use DAX. It's all or nothing, not a per file
descript/mmap context option.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-24 23:00     ` Dave Chinner
@ 2016-06-28 13:10       ` Christoph Hellwig
  2016-06-28 13:27         ` Boaz Harrosh
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-28 13:10 UTC (permalink / raw)
  To: Dave Chinner; +Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-nvdimm

On Sat, Jun 25, 2016 at 09:00:45AM +1000, Dave Chinner wrote:
> > 
> > Sorry, but this is simply broken - allowing apps to opt-in behavior
> > (e.g. like we're using O_DIRECT) is always fine.  Requriring
> > filesystem-specific tuning that has affect outside the app to get
> > existing documented behavior is not how to design APIs.
> 
> Using DAX is an *admin decision*, not an application decision.

Of course - that's exactly my point.

> Indeed, it's a mount option right now, and that's most definitely not
> something the application can turn on or off! Inode flags allow the
> admin to decide that two apps working on the same filesystem can use
> (or not use) DAX independently, rather than needing to put them on
> different filesystems.

Right.  And an existing application can get DAX turned on under its
back, and will now suddently get different synchronization behavior.
That is if it's writes happen to be aligned to the fs block size.

> > Maybe we'll need to opt-in to use DAX for mmap, but giving the same
> > existing behavior for read and write and avoiding a copy to the pagecache
> > is an obvious win.
> 
> You can't use DAX just for mmap. It's an inode scope behaviour -
> once it's turned on, all accesses to that inode - regardless of user
> interface - must use DAX. It's all or nothing, not a per file
> descript/mmap context option.

Right now it is.  But when discussing mmap behavior one option was to
require an opt-in to get DAX-specific mmap semantics.  For plain
read/write we have no such option and thus absolutely need to behave as
all normal reads and writes behave.  If you think the exclusive lock
for writes hurts we have two options:
 
 a) implement range locks (although they might be more expensive for
    typical loads)
 b) add a new O_* or RWF_* option to not require the synchronization
    for apps that don't want it.

Neither of those cases really is DAX-specific.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-28 13:10       ` Christoph Hellwig
@ 2016-06-28 13:27         ` Boaz Harrosh
  2016-06-28 13:39           ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Boaz Harrosh @ 2016-06-28 13:27 UTC (permalink / raw)
  To: Christoph Hellwig, Dave Chinner; +Cc: xfs, linux-fsdevel, linux-nvdimm

On 06/28/2016 04:10 PM, Christoph Hellwig wrote:
> On Sat, Jun 25, 2016 at 09:00:45AM +1000, Dave Chinner wrote:
<>
> 
> Right.  And an existing application can get DAX turned on under its
> back, and will now suddently get different synchronization behavior.
> That is if it's writes happen to be aligned to the fs block size.
> 

Is there an actual application that does that? or is this purely
theoretical right now?


Thanks
Boaz


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-28 13:27         ` Boaz Harrosh
@ 2016-06-28 13:39           ` Christoph Hellwig
  2016-06-28 13:56             ` Boaz Harrosh
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-28 13:39 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Christoph Hellwig, Dave Chinner, xfs, linux-fsdevel, linux-nvdimm

On Tue, Jun 28, 2016 at 04:27:03PM +0300, Boaz Harrosh wrote:
> > Right.  And an existing application can get DAX turned on under its
> > back, and will now suddently get different synchronization behavior.
> > That is if it's writes happen to be aligned to the fs block size.
> > 
> 
> Is there an actual application that does that? or is this purely
> theoretical right now?

Lots of them.  The typical case is multithreaded (or preforked like old
apache) daemons that use O_APPEND to write to a common log file.  Then
again those log records will usually not be 4k aligned, so they'd still
accidentally get the exclusive locking even without this patch.  But
beware the case where a log record actually matches the alignment..

> 
> 
> Thanks
> Boaz
---end quoted text---

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-28 13:39           ` Christoph Hellwig
@ 2016-06-28 13:56             ` Boaz Harrosh
  2016-06-28 15:39               ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Boaz Harrosh @ 2016-06-28 13:56 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Dave Chinner, xfs, linux-fsdevel, linux-nvdimm

On 06/28/2016 04:39 PM, Christoph Hellwig wrote:
> On Tue, Jun 28, 2016 at 04:27:03PM +0300, Boaz Harrosh wrote:
>>> Right.  And an existing application can get DAX turned on under its
>>> back, and will now suddently get different synchronization behavior.
>>> That is if it's writes happen to be aligned to the fs block size.
>>>
>>
>> Is there an actual application that does that? or is this purely
>> theoretical right now?
> 
> Lots of them.  The typical case is multithreaded (or preforked like old
> apache) daemons that use O_APPEND to write to a common log file.  Then
> again those log records will usually not be 4k aligned, so they'd still
> accidentally get the exclusive locking even without this patch.  But
> beware the case where a log record actually matches the alignment..
> 

But O_APPEND is exclusion problem of i_size update not of the actual memcpy
done in parallel or not.

Actually with O_APPEND each write request should write a different region
of the file, there are no overlapping writes. And no issue of which version of the
write came last and got its data written.

If it is the issue of isize update vs O_APPEND is a different issue don't
you think? One way that we solved it is to update isize = isize + len;
atomically before starting the IO, then on the error case sub the unwritten
bytes. And still allow concurrent writers. I agree that isize updates needs
to be atomic, but why does the memcpy?

And BTW in NFS O_APPEND concurrent readers to a writer may see
ZEROs in the interim.

I still don't see how an application can use the fact that two writers
will not give them mixed records. And surly it does not work on a shared
FS. So I was really wondering if you know of any such app

Thanks
Boaz

>>
>>
>> Thanks
>> Boaz
> ---end quoted text---
> 


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-28 13:56             ` Boaz Harrosh
@ 2016-06-28 15:39               ` Christoph Hellwig
  2016-06-29 12:23                 ` Boaz Harrosh
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-06-28 15:39 UTC (permalink / raw)
  To: Boaz Harrosh
  Cc: Christoph Hellwig, Dave Chinner, xfs, linux-fsdevel, linux-nvdimm

On Tue, Jun 28, 2016 at 04:56:30PM +0300, Boaz Harrosh wrote:
> Actually with O_APPEND each write request should write a different region
> of the file, there are no overlapping writes. And no issue of which version of the
> write came last and got its data written.

You have one fd for multiple threads or processes (it doesn't matter if
you're using O_APPEND or not), and all of them write to it.

i_size is only updated once the write finishes, so having multiple
concurrent writes will mean multiple records go into the same regions.
Now to be fair in current XFS writes beyond i_size will always take
the lock exclusively, so for this case we will not get concurrent
writes and thus data corruption anyway.  But if you have a cycling
log that gets overwritten (say a database journal) we're back to
square one.

> I still don't see how an application can use the fact that two writers
> will not give them mixed records. And surly it does not work on a shared
> FS. So I was really wondering if you know of any such app

If it doesn't work for two threads using the same fd on a shared fs
the fs is broken.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: xfs: untangle the direct I/O and DAX path, fix DAX locking
  2016-06-28 15:39               ` Christoph Hellwig
@ 2016-06-29 12:23                 ` Boaz Harrosh
  0 siblings, 0 replies; 27+ messages in thread
From: Boaz Harrosh @ 2016-06-29 12:23 UTC (permalink / raw)
  To: Christoph Hellwig, Boaz Harrosh
  Cc: Dave Chinner, xfs, linux-fsdevel, linux-nvdimm

On 06/28/2016 06:39 PM, Christoph Hellwig wrote:
> On Tue, Jun 28, 2016 at 04:56:30PM +0300, Boaz Harrosh wrote:
>> Actually with O_APPEND each write request should write a different region
>> of the file, there are no overlapping writes. And no issue of which version of the
>> write came last and got its data written.
> 
> You have one fd for multiple threads or processes (it doesn't matter if
> you're using O_APPEND or not), and all of them write to it.
> 

Yes so? but they do not write to the same (overlapping) region of the
file, each thread usually writes to his own record.

> i_size is only updated once the write finishes, so having multiple
> concurrent writes will mean multiple records go into the same regions.
> Now to be fair in current XFS writes beyond i_size will always take
> the lock exclusively, so for this case we will not get concurrent
> writes and thus data corruption anyway.  

Exactly that I understand. And it must be so.

> But if you have a cycling
> log that gets overwritten (say a database journal) we're back to
> square one.
> 

No! In this "cycling log" case the application (the DB) has an Head and a Tail
pointers each thread grabs the next available record and writes to it. The IO
is not overlapping, each thread writes to his own record, and even if they
write at the same time they do not over-write each other. As long as they
properly sync on the Head pointer the write itself can happen in parallel. This is not a
good example and will work perfectly well with the old (current) DAX code.
(Even if such records where 4k aligned)

>> I still don't see how an application can use the fact that two writers
>> will not give them mixed records. And surly it does not work on a shared
>> FS. So I was really wondering if you know of any such app
> 
> If it doesn't work for two threads using the same fd on a shared fs
> the fs is broken.

What works? the above cycling log, sure it will work, also on current
dax code.

I wish you could write a test to demonstrate this bug. Sorry for my
slowness but I don't see it. I do not see how the fact that there is
only a single memcpy in progress can help an application.
Yes sure isize update must be synced, which it is in current code.

Thanks Christoph, I've taken too much of your time, I guess the QA
will need to bang every possible application and see what breaks when
multiple parallel writers are allowed on a single file. So far for
whatever I use in a VM it all works just the same.

Boaz


^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-06-22 15:27 ` [PATCH 7/8] xfs: split direct I/O and DAX path Christoph Hellwig
@ 2016-09-29  2:53   ` Darrick J. Wong
  2016-09-29  8:38     ` aio completions vs file_accessed race, was: " Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Darrick J. Wong @ 2016-09-29  2:53 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs, linux-fsdevel, linux-nvdimm

On Wed, Jun 22, 2016 at 05:27:15PM +0200, Christoph Hellwig wrote:
> So far the DAX code overloaded the direct I/O code path.  There is very little
> in common between the two, and untangling them allows to clean up both variants.
> 
> As a ѕide effect we also get separate trace points for both I/O types.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_file.c  | 139 ++++++++++++++++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_trace.h |   2 +
>  2 files changed, 112 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 24b267d..0e74325 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -262,13 +262,11 @@ xfs_file_dio_aio_read(
>  	else
>  		target = ip->i_mount->m_ddev_targp;
>  
> -	if (!IS_DAX(inode)) {
> -		/* DIO must be aligned to device logical sector size */
> -		if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> -			if (iocb->ki_pos == isize)
> -				return 0;
> -			return -EINVAL;
> -		}
> +	/* DIO must be aligned to device logical sector size */
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
> +		if (iocb->ki_pos == isize)
> +			return 0;
> +		return -EINVAL;
>  	}
>  
>  	/*
> @@ -317,13 +315,37 @@ xfs_file_dio_aio_read(
>  	}
>  
>  	data = *to;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				NULL, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, NULL, NULL, 0);
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, NULL, NULL, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(to, ret);
>  	}
> +	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
> +
> +	file_accessed(iocb->ki_filp);

So I noticed that generic/323 starts crashing in file_accessed -> touch_atime
because iocb->ki_filp->f_path.dentry == NULL.  For a while I thought it was
some weird reflink bug, but I finally had time to go build a vanilla 4.8-rc8
kernel and that blew up here too.  I'm not sure why this line got inserted
here, since it wasn't there prior to this patch, AFAICT.

<shrug>

--D

> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_read(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*to)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct iov_iter		data = *to;
> +	size_t			count = iov_iter_count(to);
> +	ssize_t			ret = 0;
> +
> +	trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
> +
> +	if (!count)
> +		return 0; /* skip atime */
> +
> +	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
>  	if (ret > 0) {
>  		iocb->ki_pos += ret;
>  		iov_iter_advance(to, ret);
> @@ -356,7 +378,8 @@ xfs_file_read_iter(
>  	struct kiocb		*iocb,
>  	struct iov_iter		*to)
>  {
> -	struct xfs_mount	*mp = XFS_I(file_inode(iocb->ki_filp))->i_mount;
> +	struct inode		*inode = file_inode(iocb->ki_filp);
> +	struct xfs_mount	*mp = XFS_I(inode)->i_mount;
>  	ssize_t			ret = 0;
>  
>  	XFS_STATS_INC(mp, xs_read_calls);
> @@ -364,7 +387,9 @@ xfs_file_read_iter(
>  	if (XFS_FORCED_SHUTDOWN(mp))
>  		return -EIO;
>  
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_read(iocb, to);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_read(iocb, to);
>  	else
>  		ret = xfs_file_buffered_aio_read(iocb, to);
> @@ -586,8 +611,7 @@ xfs_file_dio_aio_write(
>  					mp->m_rtdev_targp : mp->m_ddev_targp;
>  
>  	/* DIO must be aligned to device logical sector size */
> -	if (!IS_DAX(inode) &&
> -	    ((iocb->ki_pos | count) & target->bt_logical_sectormask))
> +	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
>  		return -EINVAL;
>  
>  	/* "unaligned" here means not aligned to a filesystem block */
> @@ -656,14 +680,9 @@ xfs_file_dio_aio_write(
>  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
>  
>  	data = *from;
> -	if (IS_DAX(inode)) {
> -		ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> -				xfs_end_io_direct_write, 0);
> -	} else {
> -		ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> -				xfs_get_blocks_direct, xfs_end_io_direct_write,
> -				NULL, DIO_ASYNC_EXTEND);
> -	}
> +	ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
> +			xfs_get_blocks_direct, xfs_end_io_direct_write,
> +			NULL, DIO_ASYNC_EXTEND);
>  
>  	/* see generic_file_direct_write() for why this is necessary */
>  	if (mapping->nrpages) {
> @@ -680,10 +699,70 @@ out:
>  	xfs_rw_iunlock(ip, iolock);
>  
>  	/*
> -	 * No fallback to buffered IO on errors for XFS. DAX can result in
> -	 * partial writes, but direct IO will either complete fully or fail.
> +	 * No fallback to buffered IO on errors for XFS, direct IO will either
> +	 * complete fully or fail.
> +	 */
> +	ASSERT(ret < 0 || ret == count);
> +	return ret;
> +}
> +
> +STATIC ssize_t
> +xfs_file_dax_write(
> +	struct kiocb		*iocb,
> +	struct iov_iter		*from)
> +{
> +	struct address_space	*mapping = iocb->ki_filp->f_mapping;
> +	struct inode		*inode = mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	ssize_t			ret = 0;
> +	int			unaligned_io = 0;
> +	int			iolock;
> +	struct iov_iter		data;
> +
> +	/* "unaligned" here means not aligned to a filesystem block */
> +	if ((iocb->ki_pos & mp->m_blockmask) ||
> +	    ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
> +		unaligned_io = 1;
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else if (mapping->nrpages) {
> +		iolock = XFS_IOLOCK_EXCL;
> +	} else {
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +	xfs_rw_ilock(ip, iolock);
> +
> +	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * Yes, even DAX files can have page cache attached to them:  A zeroed
> +	 * page is inserted into the pagecache when we have to serve a write
> +	 * fault on a hole.  It should never be dirtied and can simply be
> +	 * dropped from the pagecache once we get real data for the page.
>  	 */
> -	ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
> +	if (mapping->nrpages) {
> +		ret = invalidate_inode_pages2(mapping);
> +		WARN_ON_ONCE(ret);
> +	}
> +
> +	if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
> +		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
> +		iolock = XFS_IOLOCK_SHARED;
> +	}
> +
> +	trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
> +
> +	data = *from;
> +	ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
> +			xfs_end_io_direct_write, 0);
> +	if (ret > 0) {
> +		iocb->ki_pos += ret;
> +		iov_iter_advance(from, ret);
> +	}
> +out:
> +	xfs_rw_iunlock(ip, iolock);
>  	return ret;
>  }
>  
> @@ -765,7 +844,9 @@ xfs_file_write_iter(
>  	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
>  		return -EIO;
>  
> -	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
> +	if (IS_DAX(inode))
> +		ret = xfs_file_dax_write(iocb, from);
> +	else if (iocb->ki_flags & IOCB_DIRECT)
>  		ret = xfs_file_dio_aio_write(iocb, from);
>  	else
>  		ret = xfs_file_buffered_aio_write(iocb, from);
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 2504f94..1451690 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -1165,8 +1165,10 @@ DEFINE_EVENT(xfs_file_class, name,	\
>  	TP_ARGS(ip, count, offset))
>  DEFINE_RW_EVENT(xfs_file_buffered_read);
>  DEFINE_RW_EVENT(xfs_file_direct_read);
> +DEFINE_RW_EVENT(xfs_file_dax_read);
>  DEFINE_RW_EVENT(xfs_file_buffered_write);
>  DEFINE_RW_EVENT(xfs_file_direct_write);
> +DEFINE_RW_EVENT(xfs_file_dax_write);
>  DEFINE_RW_EVENT(xfs_file_splice_read);
>  
>  DECLARE_EVENT_CLASS(xfs_page_class,
> -- 
> 2.1.4
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 27+ messages in thread

* aio completions vs file_accessed race, was: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-09-29  2:53   ` Darrick J. Wong
@ 2016-09-29  8:38     ` Christoph Hellwig
  2016-09-29 20:18       ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-29  8:38 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-aio

On Wed, Sep 28, 2016 at 07:53:52PM -0700, Darrick J. Wong wrote:
> So I noticed that generic/323 starts crashing in file_accessed -> touch_atime
> because iocb->ki_filp->f_path.dentry == NULL.  For a while I thought it was
> some weird reflink bug, but I finally had time to go build a vanilla 4.8-rc8
> kernel and that blew up here too.  I'm not sure why this line got inserted
> here, since it wasn't there prior to this patch, AFAICT.

This line was there before near the end of xfs_file_dio_aio_read already,
e.g. line 376 just before the above commit, but it only got introduced
a bit earlier in "xfs: stop using generic_file_read_iter for direct I/O",
which copied it over from generic_file_read_iter.  І think any new
issues in these commits could just be a minor timing change, as
we're not changing struct file refcounting in any way here.

generic/323 reproduces the last struct file reference being dropped
by aio completions, so it seems like we have an issue here, which
I suspect is something in the common code.  I can't reproduce it
locally, but looking at the aio_complete -> kiocb_free callchain
and the lack of other struct file refcounting in aio.c it seems
inherently unsafe to reference struct file once the completion
may have run, that is after (__)blkdev_direct_IO returned.

I'll see if I can come up with a solution for that, most likely
that would involve moving the file_accessed call into __blkdev_direct_IO
before we drop the final reference on the dio structure.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: aio completions vs file_accessed race, was: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-09-29  8:38     ` aio completions vs file_accessed race, was: " Christoph Hellwig
@ 2016-09-29 20:18       ` Christoph Hellwig
  2016-09-29 20:18         ` Christoph Hellwig
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-29 20:18 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-aio

Can you try the patch below?  That just moves the file_accessed call
before the I/O, similar to how we handle timestamp updates on the write
side.  generic_file_read_iter will also need a similar update.

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: aio completions vs file_accessed race, was: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-09-29 20:18       ` Christoph Hellwig
@ 2016-09-29 20:18         ` Christoph Hellwig
  2016-09-29 20:33           ` Darrick J. Wong
  0 siblings, 1 reply; 27+ messages in thread
From: Christoph Hellwig @ 2016-09-29 20:18 UTC (permalink / raw)
  To: Darrick J. Wong; +Cc: Christoph Hellwig, xfs, linux-fsdevel, linux-aio

On Thu, Sep 29, 2016 at 10:18:34PM +0200, Christoph Hellwig wrote:
> Can you try the patch below?  That just moves the file_accessed call
> before the I/O, similar to how we handle timestamp updates on the write
> side.  generic_file_read_iter will also need a similar update.

And now with patch:

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 349f328..6919412 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -270,6 +270,8 @@ xfs_file_dio_aio_read(
 		return -EINVAL;
 	}
 
+	file_accessed(iocb->ki_filp);
+
 	/*
 	 * Locking is a bit tricky here. If we take an exclusive lock for direct
 	 * IO, we effectively serialise all new concurrent read IO to this file
@@ -324,7 +326,6 @@ xfs_file_dio_aio_read(
 	}
 	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
-	file_accessed(iocb->ki_filp);
 	return ret;
 }
 

^ permalink raw reply related	[flat|nested] 27+ messages in thread

* Re: aio completions vs file_accessed race, was: Re: [PATCH 7/8] xfs: split direct I/O and DAX path
  2016-09-29 20:18         ` Christoph Hellwig
@ 2016-09-29 20:33           ` Darrick J. Wong
  0 siblings, 0 replies; 27+ messages in thread
From: Darrick J. Wong @ 2016-09-29 20:33 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: xfs, linux-fsdevel, linux-aio

On Thu, Sep 29, 2016 at 10:18:49PM +0200, Christoph Hellwig wrote:
> On Thu, Sep 29, 2016 at 10:18:34PM +0200, Christoph Hellwig wrote:
> > Can you try the patch below?  That just moves the file_accessed call
> > before the I/O, similar to how we handle timestamp updates on the write
> > side.  generic_file_read_iter will also need a similar update.
> 
> And now with patch:
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 349f328..6919412 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -270,6 +270,8 @@ xfs_file_dio_aio_read(
>  		return -EINVAL;
>  	}
>  
> +	file_accessed(iocb->ki_filp);
> +
>  	/*
>  	 * Locking is a bit tricky here. If we take an exclusive lock for direct
>  	 * IO, we effectively serialise all new concurrent read IO to this file
> @@ -324,7 +326,6 @@ xfs_file_dio_aio_read(
>  	}
>  	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
>  
> -	file_accessed(iocb->ki_filp);
>  	return ret;
>  }
>  

I noticed that g/323 only breaks when the XFS is on a real disk; on
pmem it works fine with or without the patch.

That said, it makes the crash go away and the patch looks ok, so:
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Tested-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2016-09-29 20:33 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-22 15:27 xfs: untangle the direct I/O and DAX path, fix DAX locking Christoph Hellwig
2016-06-22 15:27 ` [PATCH 1/8] xfs: don't pass ioflags around in the ioctl path Christoph Hellwig
2016-06-22 15:27 ` [PATCH 2/8] xfs: kill ioflags Christoph Hellwig
2016-06-22 15:27 ` [PATCH 3/8] xfs: remove s_maxbytes enforcement in xfs_file_read_iter Christoph Hellwig
2016-06-22 15:27 ` [PATCH 4/8] xfs: split xfs_file_read_iter into buffered and direct I/O helpers Christoph Hellwig
2016-06-22 15:27 ` [PATCH 5/8] xfs: stop using generic_file_read_iter for direct I/O Christoph Hellwig
2016-06-22 15:27 ` [PATCH 6/8] xfs: direct calls in the direct I/O path Christoph Hellwig
2016-06-22 15:27 ` [PATCH 7/8] xfs: split direct I/O and DAX path Christoph Hellwig
2016-09-29  2:53   ` Darrick J. Wong
2016-09-29  8:38     ` aio completions vs file_accessed race, was: " Christoph Hellwig
2016-09-29 20:18       ` Christoph Hellwig
2016-09-29 20:18         ` Christoph Hellwig
2016-09-29 20:33           ` Darrick J. Wong
2016-06-22 15:27 ` [PATCH 8/8] xfs: fix locking for DAX writes Christoph Hellwig
2016-06-23 14:22   ` Boaz Harrosh
2016-06-23 23:24 ` xfs: untangle the direct I/O and DAX path, fix DAX locking Dave Chinner
2016-06-24  1:14   ` Dan Williams
2016-06-24  7:13     ` Dave Chinner
2016-06-24  7:31       ` Christoph Hellwig
2016-06-24  7:26   ` Christoph Hellwig
2016-06-24 23:00     ` Dave Chinner
2016-06-28 13:10       ` Christoph Hellwig
2016-06-28 13:27         ` Boaz Harrosh
2016-06-28 13:39           ` Christoph Hellwig
2016-06-28 13:56             ` Boaz Harrosh
2016-06-28 15:39               ` Christoph Hellwig
2016-06-29 12:23                 ` Boaz Harrosh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).