linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
@ 2017-01-06 18:23 Jeff Layton
  2017-01-06 20:36 ` Jeff Layton
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-06 18:23 UTC (permalink / raw)
  To: Al Viro, Yan, Zheng
  Cc: Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel, linux-kernel,
	Zhu, Caifeng

xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
pick up a wrong offset and get stuck in an infinite loop while trying to
populate the page array.

To fix this, add a new iov_iter helper for to determine the offset into
the page for the current segment and have ceph call that.

dio_get_pagev_size has similar problems, so fix that as well by adding a
new helper function in lib/iov_iter.c that does what it does, but for
all iov_iter types.

Since we're moving that into generic code, we can also utilize the
iterate_all_kinds macro to handle this. That means that we need to
rework the logic a bit since we can't advance to the next vector while
checking the current one.

Link: http://tracker.ceph.com/issues/18130
Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/ceph/file.c      | 28 ++------------------
 include/linux/uio.h |  2 ++
 lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f633165f3fdc..0cd9fc68a04e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -35,29 +35,6 @@
  */
 
 /*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
- */
-static size_t dio_get_pagev_size(const struct iov_iter *it)
-{
-    const struct iovec *iov = it->iov;
-    const struct iovec *iovend = iov + it->nr_segs;
-    size_t size;
-
-    size = iov->iov_len - it->iov_offset;
-    /*
-     * An iov can be page vectored when both the current tail
-     * and the next base are page aligned.
-     */
-    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
-           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
-        size += iov->iov_len;
-    }
-    dout("dio_get_pagevlen len = %zu\n", size);
-    return size;
-}
-
-/*
  * Allocate a page vector based on (@it, @nbytes).
  * The return value is the tuple describing a page vector,
  * that is (@pages, @page_align, @num_pages).
@@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
 	struct page **pages;
 	int ret = 0, idx, npages;
 
-	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
-		(PAGE_SIZE - 1);
+	align = iov_iter_single_seg_page_offset(it);
 	npages = calc_pages_for(align, nbytes);
 	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
 	if (!pages) {
@@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	while (iov_iter_count(iter) > 0) {
-		u64 size = dio_get_pagev_size(iter);
+		u64 size = iov_iter_pvec_size(iter);
 		size_t start = 0;
 		ssize_t len;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..46fdfff3d7d6 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
@@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
+size_t iov_iter_pvec_size(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6b415b5a100d..d22850bbb1e9 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
+/* Return offset into page for current iov_iter segment */
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
+{
+	size_t	base;
+
+	if (i->type & ITER_PIPE)
+		base = i->pipe->bufs[i->idx].offset;
+	else if (i->type & ITER_BVEC)
+		base = i->bvec->bv_offset;
+	else if (i->type & ITER_KVEC)
+		base = (size_t)i->kvec->iov_base;
+	else
+		base = (size_t)i->iov->iov_base;
+
+	return (base + i->iov_offset) & (PAGE_SIZE - 1);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
+
 void iov_iter_kvec(struct iov_iter *i, int direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
@@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+/**
+ * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
+ * @i: iov_iter to in which to find the size
+ *
+ * Some filesystems can stitch together multiple iovecs into a single
+ * page vector when both the previous tail and current base are page
+ * aligned. This function discovers the length that can fit in a single
+ * pagevec and returns it.
+ */
+size_t iov_iter_pvec_size(const struct iov_iter *i)
+{
+	size_t size = i->count;
+	size_t pv_size = 0;
+	bool contig = false, first = true;
+
+	if (!size)
+		return 0;
+
+	/* Pipes are naturally aligned for this */
+	if (unlikely(i->type & ITER_PIPE))
+		return size;
+
+	/*
+	 * An iov can be page vectored when the current base and previous
+	 * tail are both page aligned. Note that we don't require that the
+	 * initial base in the first iovec also be page aligned.
+	 */
+	iterate_all_kinds(i, size, v,
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }; 0;
+		 }),
+		({
+		 if (first || (contig && v.bv_offset == 0)) {
+			pv_size += v.bv_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
+		 }
+		 }),
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }
+		 }))
+
+	pv_size -= i->iov_offset;
+	return pv_size;
+}
+EXPORT_SYMBOL(iov_iter_pvec_size);
+
 static inline size_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
 				struct page **pages,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-06 18:23 [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths Jeff Layton
@ 2017-01-06 20:36 ` Jeff Layton
  2017-01-09 23:11 ` Jeff Layton
  2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
  2 siblings, 0 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-06 20:36 UTC (permalink / raw)
  To: Al Viro, Yan, Zheng
  Cc: Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel, linux-kernel,
	Zhu, Caifeng

On Fri, 2017-01-06 at 13:23 -0500, Jeff Layton wrote:
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
> access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
> pick up a wrong offset and get stuck in an infinite loop while trying to
> populate the page array.
> 
> To fix this, add a new iov_iter helper for to determine the offset into
> the page for the current segment and have ceph call that.
> 
> dio_get_pagev_size has similar problems, so fix that as well by adding a
> new helper function in lib/iov_iter.c that does what it does, but for
> all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to handle this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/ceph/file.c      | 28 ++------------------
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>   */
>  
>  /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>   * Allocate a page vector based on (@it, @nbytes).
>   * The return value is the tuple describing a page vector,
>   * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
>  	struct page **pages;
>  	int ret = 0, idx, npages;
>  
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
>  	npages = calc_pages_for(align, nbytes);
>  	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
>  	if (!pages) {
> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
>  		size_t start = 0;
>  		ssize_t len;
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i);
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
>  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
>  unsigned long iov_iter_alignment(const struct iov_iter *i);
>  unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
>  void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
>  			unsigned long nr_segs, size_t count);
>  void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..d22850bbb1e9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
>  void iov_iter_kvec(struct iov_iter *i, int direction,
>  			const struct kvec *kvec, unsigned long nr_segs,
>  			size_t count)
> @@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +
> +	pv_size -= i->iov_offset;
> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,

Ugh, but this breaks generic/113...let me see what's going on there.
-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-06 18:23 [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths Jeff Layton
  2017-01-06 20:36 ` Jeff Layton
@ 2017-01-09 23:11 ` Jeff Layton
  2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
  2 siblings, 0 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-09 23:11 UTC (permalink / raw)
  To: Al Viro, Yan, Zheng
  Cc: Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel, linux-kernel,
	Zhu, Caifeng

On Fri, 2017-01-06 at 13:23 -0500, Jeff Layton wrote:
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
> access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
> pick up a wrong offset and get stuck in an infinite loop while trying to
> populate the page array.
> 
> To fix this, add a new iov_iter helper for to determine the offset into
> the page for the current segment and have ceph call that.
> 
> dio_get_pagev_size has similar problems, so fix that as well by adding a
> new helper function in lib/iov_iter.c that does what it does, but for
> all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to handle this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/ceph/file.c      | 28 ++------------------
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>   */
>  
>  /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>   * Allocate a page vector based on (@it, @nbytes).
>   * The return value is the tuple describing a page vector,
>   * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
>  	struct page **pages;
>  	int ret = 0, idx, npages;
>  
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
>  	npages = calc_pages_for(align, nbytes);
>  	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
>  	if (!pages) {

Found the bug with the earlier patch, and will be sending an updated one
after some more testing.

That said, I would love to be able to just get rid of ceph's
dio_get_pages_alloc as well and just use iov_iter_get_pages_alloc
instead, but I don't think we can because the bio_vec iterator block
does this:

                /* can't be more than PAGE_SIZE */
                *start = v.bv_offset;
                *pages = p = get_pages_array(1);
                if (!p)
                        return -ENOMEM;
                get_page(*p = v.bv_page);
                return v.bv_len;

...so what happens here is that iter_file_splice_write creates an
ITER_BVEC iov_iter. Then any filesystem driver that uses
iov_iter_get_pages or iov_iter_get_pages_alloc can only get a single
page for each call. The ceph code works around this by calling
iov_iter_get_pages in a loop until it gets enough, but that's rather
nasty.

I bet this makes splice writing to an O_DIRECT file on NFS suck too...

Why _is_ ITER_BVEC limited to a single page there? I wonder if we could
consider lifting that limitation in some cases if it really is
necessary?

> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
>  		size_t start = 0;
>  		ssize_t len;
> 
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i);
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
>  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
>  unsigned long iov_iter_alignment(const struct iov_iter *i);
>  unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
>  void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
>  			unsigned long nr_segs, size_t count);
>  void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..d22850bbb1e9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
>  void iov_iter_kvec(struct iov_iter *i, int direction,
>  			const struct kvec *kvec, unsigned long nr_segs,
>  			size_t count)
> @@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +
> +	pv_size -= i->iov_offset;

The bug in this patch is the above. iterate_all_kinds will have already
accounted for the iov_offset, so there's no need to do this here (and it
throws off the calculation when you have to go around more than once).

> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-06 18:23 [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths Jeff Layton
  2017-01-06 20:36 ` Jeff Layton
  2017-01-09 23:11 ` Jeff Layton
@ 2017-01-10 12:57 ` Jeff Layton
  2017-01-11  2:42   ` Yan, Zheng
  2017-01-12  7:59   ` Al Viro
  2 siblings, 2 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-10 12:57 UTC (permalink / raw)
  To: Al Viro, Yan, Zheng
  Cc: Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel, linux-kernel,
	Zhu, Caifeng

v2: fix bug in offset handling in iov_iter_pvec_size

xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
to pick up a wrong offset and get stuck in an infinite loop while trying
to populate the page array. dio_get_pagev_size has a similar problem.

To fix the first problem, add a new iov_iter helper to determine the
offset into the page for the current segment and have ceph call that.
I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
but that will only return a single page at a time for ITER_BVEC and
it's better to make larger requests when possible.

For the second problem, we simply replace it with a new helper that does
what it does, but properly for all iov_iter types.

Since we're moving that into generic code, we can also utilize the
iterate_all_kinds macro to simplify this. That means that we need to
rework the logic a bit since we can't advance to the next vector while
checking the current one.

Link: http://tracker.ceph.com/issues/18130
Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/ceph/file.c      | 28 ++-------------------
 include/linux/uio.h |  2 ++
 lib/iov_iter.c      | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f633165f3fdc..0cd9fc68a04e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -35,29 +35,6 @@
  */
 
 /*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
- */
-static size_t dio_get_pagev_size(const struct iov_iter *it)
-{
-    const struct iovec *iov = it->iov;
-    const struct iovec *iovend = iov + it->nr_segs;
-    size_t size;
-
-    size = iov->iov_len - it->iov_offset;
-    /*
-     * An iov can be page vectored when both the current tail
-     * and the next base are page aligned.
-     */
-    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
-           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
-        size += iov->iov_len;
-    }
-    dout("dio_get_pagevlen len = %zu\n", size);
-    return size;
-}
-
-/*
  * Allocate a page vector based on (@it, @nbytes).
  * The return value is the tuple describing a page vector,
  * that is (@pages, @page_align, @num_pages).
@@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
 	struct page **pages;
 	int ret = 0, idx, npages;
 
-	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
-		(PAGE_SIZE - 1);
+	align = iov_iter_single_seg_page_offset(it);
 	npages = calc_pages_for(align, nbytes);
 	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
 	if (!pages) {
@@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	while (iov_iter_count(iter) > 0) {
-		u64 size = dio_get_pagev_size(iter);
+		u64 size = iov_iter_pvec_size(iter);
 		size_t start = 0;
 		ssize_t len;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..46fdfff3d7d6 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
@@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
+size_t iov_iter_pvec_size(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6b415b5a100d..b8fa377b0cef 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
+/* Return offset into page for current iov_iter segment */
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
+{
+	size_t	base;
+
+	if (i->type & ITER_PIPE)
+		base = i->pipe->bufs[i->idx].offset;
+	else if (i->type & ITER_BVEC)
+		base = i->bvec->bv_offset;
+	else if (i->type & ITER_KVEC)
+		base = (size_t)i->kvec->iov_base;
+	else
+		base = (size_t)i->iov->iov_base;
+
+	return (base + i->iov_offset) & (PAGE_SIZE - 1);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
+
 void iov_iter_kvec(struct iov_iter *i, int direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
@@ -830,6 +848,59 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+/**
+ * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
+ * @i: iov_iter to in which to find the size
+ *
+ * Some filesystems can stitch together multiple iovecs into a single
+ * page vector when both the previous tail and current base are page
+ * aligned. This function discovers the length that can fit in a single
+ * pagevec and returns it.
+ */
+size_t iov_iter_pvec_size(const struct iov_iter *i)
+{
+	size_t size = i->count;
+	size_t pv_size = 0;
+	bool contig = false, first = true;
+
+	if (!size)
+		return 0;
+
+	/* Pipes are naturally aligned for this */
+	if (unlikely(i->type & ITER_PIPE))
+		return size;
+
+	/*
+	 * An iov can be page vectored when the current base and previous
+	 * tail are both page aligned. Note that we don't require that the
+	 * initial base in the first iovec also be page aligned.
+	 */
+	iterate_all_kinds(i, size, v,
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }; 0;
+		 }),
+		({
+		 if (first || (contig && v.bv_offset == 0)) {
+			pv_size += v.bv_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
+		 }
+		 }),
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }
+		 }))
+	return pv_size;
+}
+EXPORT_SYMBOL(iov_iter_pvec_size);
+
 static inline size_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
 				struct page **pages,
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
@ 2017-01-11  2:42   ` Yan, Zheng
  2017-01-12  7:59   ` Al Viro
  1 sibling, 0 replies; 17+ messages in thread
From: Yan, Zheng @ 2017-01-11  2:42 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Al Viro, Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel,
	linux-kernel, Zhu, Caifeng


> On 10 Jan 2017, at 20:57, Jeff Layton <jlayton@redhat.com> wrote:
> 
> v2: fix bug in offset handling in iov_iter_pvec_size
> 
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> to pick up a wrong offset and get stuck in an infinite loop while trying
> to populate the page array. dio_get_pagev_size has a similar problem.
> 
> To fix the first problem, add a new iov_iter helper to determine the
> offset into the page for the current segment and have ceph call that.
> I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> but that will only return a single page at a time for ITER_BVEC and
> it's better to make larger requests when possible.
> 
> For the second problem, we simply replace it with a new helper that does
> what it does, but properly for all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to simplify this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
> fs/ceph/file.c      | 28 ++-------------------
> include/linux/uio.h |  2 ++
> lib/iov_iter.c      | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 75 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>  */
> 
> /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>  * Allocate a page vector based on (@it, @nbytes).
>  * The return value is the tuple describing a page vector,
>  * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
> 	struct page **pages;
> 	int ret = 0, idx, npages;
> 
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
> 	npages = calc_pages_for(align, nbytes);
> 	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
> 	if (!pages) {
> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
> 	}
> 
> 	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
> 		size_t start = 0;
> 		ssize_t len;
> 
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
> void iov_iter_advance(struct iov_iter *i, size_t bytes);
> int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
> size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
> size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
> 			 struct iov_iter *i);
> size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
> size_t iov_iter_zero(size_t bytes, struct iov_iter *);
> unsigned long iov_iter_alignment(const struct iov_iter *i);
> unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
> void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
> 			unsigned long nr_segs, size_t count);
> void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..b8fa377b0cef 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
> }
> EXPORT_SYMBOL(iov_iter_single_seg_count);
> 
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
> void iov_iter_kvec(struct iov_iter *i, int direction,
> 			const struct kvec *kvec, unsigned long nr_segs,
> 			size_t count)
> @@ -830,6 +848,59 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
> }
> EXPORT_SYMBOL(iov_iter_gap_alignment);
> 
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
> static inline size_t __pipe_get_pages(struct iov_iter *i,
> 				size_t maxsize,
> 				struct page **pages,

Reviewed-by: Yan, Zheng <zyan@redhat.com>
> -- 
> 2.7.4
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
  2017-01-11  2:42   ` Yan, Zheng
@ 2017-01-12  7:59   ` Al Viro
  2017-01-12 11:13     ` Ilya Dryomov
  2017-01-12 11:27     ` Jeff Layton
  1 sibling, 2 replies; 17+ messages in thread
From: Al Viro @ 2017-01-12  7:59 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Yan, Zheng, Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
> v2: fix bug in offset handling in iov_iter_pvec_size
> 
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> to pick up a wrong offset and get stuck in an infinite loop while trying
> to populate the page array. dio_get_pagev_size has a similar problem.
> 
> To fix the first problem, add a new iov_iter helper to determine the
> offset into the page for the current segment and have ceph call that.
> I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> but that will only return a single page at a time for ITER_BVEC and
> it's better to make larger requests when possible.
> 
> For the second problem, we simply replace it with a new helper that does
> what it does, but properly for all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to simplify this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.

Yecchhh...  That really looks like exposing way too low-level stuff instead
of coming up with saner primitive ;-/

Is page vector + offset in the first page + number of bytes really what
ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
would make a lot more natural iov_iter_get_pages_alloc() analogue...

And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
how painful would it be to have it switched to struct bio_vec array instead?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12  7:59   ` Al Viro
@ 2017-01-12 11:13     ` Ilya Dryomov
  2017-01-12 11:37       ` Al Viro
  2017-01-12 11:27     ` Jeff Layton
  1 sibling, 1 reply; 17+ messages in thread
From: Ilya Dryomov @ 2017-01-12 11:13 UTC (permalink / raw)
  To: Al Viro
  Cc: Jeff Layton, Yan, Zheng, Sage Weil, Ceph Development,
	linux-fsdevel, linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 8:59 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
>> v2: fix bug in offset handling in iov_iter_pvec_size
>>
>> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
>> fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
>> to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
>> to pick up a wrong offset and get stuck in an infinite loop while trying
>> to populate the page array. dio_get_pagev_size has a similar problem.
>>
>> To fix the first problem, add a new iov_iter helper to determine the
>> offset into the page for the current segment and have ceph call that.
>> I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
>> but that will only return a single page at a time for ITER_BVEC and
>> it's better to make larger requests when possible.
>>
>> For the second problem, we simply replace it with a new helper that does
>> what it does, but properly for all iov_iter types.
>>
>> Since we're moving that into generic code, we can also utilize the
>> iterate_all_kinds macro to simplify this. That means that we need to
>> rework the logic a bit since we can't advance to the next vector while
>> checking the current one.
>
> Yecchhh...  That really looks like exposing way too low-level stuff instead
> of coming up with saner primitive ;-/
>
> Is page vector + offset in the first page + number of bytes really what
> ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
> would make a lot more natural iov_iter_get_pages_alloc() analogue...
>
> And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
> how painful would it be to have it switched to struct bio_vec array instead?

It would be a significant and wide-reaching change, but I've been
meaning to look into switching to iov_iter for a couple of releases
now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
iteration over "page vectors", "page lists" and "bio lists".  All of it
predates iov_iter proliferation and is mostly incomplete anyway: IIRC
you can send out of a pagelist but can't recv into a pagelist, etc.

That said, Jeff's patch doesn't look too bad to me...

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12  7:59   ` Al Viro
  2017-01-12 11:13     ` Ilya Dryomov
@ 2017-01-12 11:27     ` Jeff Layton
  2017-01-12 11:37       ` Ilya Dryomov
  1 sibling, 1 reply; 17+ messages in thread
From: Jeff Layton @ 2017-01-12 11:27 UTC (permalink / raw)
  To: Al Viro
  Cc: Yan, Zheng, Sage Weil, Ilya Dryomov, ceph-devel, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Thu, 2017-01-12 at 07:59 +0000, Al Viro wrote:
> On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
> > 
> > v2: fix bug in offset handling in iov_iter_pvec_size
> > 
> > xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> > fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> > to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> > to pick up a wrong offset and get stuck in an infinite loop while trying
> > to populate the page array. dio_get_pagev_size has a similar problem.
> > 
> > To fix the first problem, add a new iov_iter helper to determine the
> > offset into the page for the current segment and have ceph call that.
> > I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> > but that will only return a single page at a time for ITER_BVEC and
> > it's better to make larger requests when possible.
> > 
> > For the second problem, we simply replace it with a new helper that does
> > what it does, but properly for all iov_iter types.
> > 
> > Since we're moving that into generic code, we can also utilize the
> > iterate_all_kinds macro to simplify this. That means that we need to
> > rework the logic a bit since we can't advance to the next vector while
> > checking the current one.
> 
> Yecchhh...  That really looks like exposing way too low-level stuff instead
> of coming up with saner primitive ;-/
> 

Fair point. That said, I'm not terribly thrilled with how
iov_iter_get_pages* works right now.

Note that it only ever touches the first vector. Would it not be better
to keep getting page references if the bvec/iov elements are aligned
properly? It seems quite plausible that they often would be, and being
able to hand back a larger list of pages in most cases would be
advantageous.

IOW, should we have iov_iter_get_pages basically do what
dio_get_pages_alloc does -- try to build as long an array of pages as
possible before returning, provided that the alignment works out?

The NFS DIO code, for instance, could also benefit there. I know we've
had reports there in the past that sending down a bunch of small iovecs
causes a lot of small-sized requests on the wire.

> Is page vector + offset in the first page + number of bytes really what
> ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
> would make a lot more natural iov_iter_get_pages_alloc() analogue...
> 
> And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
> how painful would it be to have it switched to struct bio_vec array instead?

Actually...it looks like that might not be too hard. The low-level OSD
handling code can already handle bio_vec arrays in order to service RBD.
It looks like we could switch cephfs to use
osd_req_op_extent_osd_data_bio instead of
osd_req_op_extent_osd_data_pages. That would add a dependency in cephfs
on CONFIG_BLOCK, but I think we could probably live with that.

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:27     ` Jeff Layton
@ 2017-01-12 11:37       ` Ilya Dryomov
  2017-01-18 12:14         ` Jeff Layton
  0 siblings, 1 reply; 17+ messages in thread
From: Ilya Dryomov @ 2017-01-12 11:37 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Al Viro, Yan, Zheng, Sage Weil, Ceph Development, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 12:27 PM, Jeff Layton <jlayton@redhat.com> wrote:
> On Thu, 2017-01-12 at 07:59 +0000, Al Viro wrote:
>> On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
>> >
>> > v2: fix bug in offset handling in iov_iter_pvec_size
>> >
>> > xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
>> > fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
>> > to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
>> > to pick up a wrong offset and get stuck in an infinite loop while trying
>> > to populate the page array. dio_get_pagev_size has a similar problem.
>> >
>> > To fix the first problem, add a new iov_iter helper to determine the
>> > offset into the page for the current segment and have ceph call that.
>> > I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
>> > but that will only return a single page at a time for ITER_BVEC and
>> > it's better to make larger requests when possible.
>> >
>> > For the second problem, we simply replace it with a new helper that does
>> > what it does, but properly for all iov_iter types.
>> >
>> > Since we're moving that into generic code, we can also utilize the
>> > iterate_all_kinds macro to simplify this. That means that we need to
>> > rework the logic a bit since we can't advance to the next vector while
>> > checking the current one.
>>
>> Yecchhh...  That really looks like exposing way too low-level stuff instead
>> of coming up with saner primitive ;-/
>>
>
> Fair point. That said, I'm not terribly thrilled with how
> iov_iter_get_pages* works right now.
>
> Note that it only ever touches the first vector. Would it not be better
> to keep getting page references if the bvec/iov elements are aligned
> properly? It seems quite plausible that they often would be, and being
> able to hand back a larger list of pages in most cases would be
> advantageous.
>
> IOW, should we have iov_iter_get_pages basically do what
> dio_get_pages_alloc does -- try to build as long an array of pages as
> possible before returning, provided that the alignment works out?
>
> The NFS DIO code, for instance, could also benefit there. I know we've
> had reports there in the past that sending down a bunch of small iovecs
> causes a lot of small-sized requests on the wire.
>
>> Is page vector + offset in the first page + number of bytes really what
>> ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
>> would make a lot more natural iov_iter_get_pages_alloc() analogue...
>>
>> And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
>> how painful would it be to have it switched to struct bio_vec array instead?
>
> Actually...it looks like that might not be too hard. The low-level OSD
> handling code can already handle bio_vec arrays in order to service RBD.
> It looks like we could switch cephfs to use
> osd_req_op_extent_osd_data_bio instead of
> osd_req_op_extent_osd_data_pages. That would add a dependency in cephfs
> on CONFIG_BLOCK, but I think we could probably live with that.

Ah, just that part might be easy enough ;)

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:13     ` Ilya Dryomov
@ 2017-01-12 11:37       ` Al Viro
  2017-01-12 11:46         ` Ilya Dryomov
  0 siblings, 1 reply; 17+ messages in thread
From: Al Viro @ 2017-01-12 11:37 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Jeff Layton, Yan, Zheng, Sage Weil, Ceph Development,
	linux-fsdevel, linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 12:13:31PM +0100, Ilya Dryomov wrote:

> It would be a significant and wide-reaching change, but I've been
> meaning to look into switching to iov_iter for a couple of releases
> now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
> iteration over "page vectors", "page lists" and "bio lists".  All of it
> predates iov_iter proliferation and is mostly incomplete anyway: IIRC
> you can send out of a pagelist but can't recv into a pagelist, etc.

Wait a sec...  Is it done from the same thread that has issued a syscall?
If so, we certainly could just pass iov_iter without bothering with any
form of ..._get_pages(); if not, we'll need at least to get from iovec
to bio_vec, since userland addresses make sense only in the caller's
context...

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:37       ` Al Viro
@ 2017-01-12 11:46         ` Ilya Dryomov
  2017-01-12 11:53           ` Al Viro
  2017-01-12 11:57           ` Jeff Layton
  0 siblings, 2 replies; 17+ messages in thread
From: Ilya Dryomov @ 2017-01-12 11:46 UTC (permalink / raw)
  To: Al Viro
  Cc: Jeff Layton, Yan, Zheng, Sage Weil, Ceph Development,
	linux-fsdevel, linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 12:37 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Thu, Jan 12, 2017 at 12:13:31PM +0100, Ilya Dryomov wrote:
>
>> It would be a significant and wide-reaching change, but I've been
>> meaning to look into switching to iov_iter for a couple of releases
>> now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
>> iteration over "page vectors", "page lists" and "bio lists".  All of it
>> predates iov_iter proliferation and is mostly incomplete anyway: IIRC
>> you can send out of a pagelist but can't recv into a pagelist, etc.
>
> Wait a sec...  Is it done from the same thread that has issued a syscall?
> If so, we certainly could just pass iov_iter without bothering with any
> form of ..._get_pages(); if not, we'll need at least to get from iovec
> to bio_vec, since userland addresses make sense only in the caller's
> context...

No, not necessarily - it's also used by rbd (all of net/ceph has two
users: fs/ceph and drivers/block/rbd.c).

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:46         ` Ilya Dryomov
@ 2017-01-12 11:53           ` Al Viro
  2017-01-12 12:17             ` Ilya Dryomov
  2017-01-12 11:57           ` Jeff Layton
  1 sibling, 1 reply; 17+ messages in thread
From: Al Viro @ 2017-01-12 11:53 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Jeff Layton, Yan, Zheng, Sage Weil, Ceph Development,
	linux-fsdevel, linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 12:46:42PM +0100, Ilya Dryomov wrote:
> On Thu, Jan 12, 2017 at 12:37 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> > On Thu, Jan 12, 2017 at 12:13:31PM +0100, Ilya Dryomov wrote:
> >
> >> It would be a significant and wide-reaching change, but I've been
> >> meaning to look into switching to iov_iter for a couple of releases
> >> now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
> >> iteration over "page vectors", "page lists" and "bio lists".  All of it
> >> predates iov_iter proliferation and is mostly incomplete anyway: IIRC
> >> you can send out of a pagelist but can't recv into a pagelist, etc.
> >
> > Wait a sec...  Is it done from the same thread that has issued a syscall?
> > If so, we certainly could just pass iov_iter without bothering with any
> > form of ..._get_pages(); if not, we'll need at least to get from iovec
> > to bio_vec, since userland addresses make sense only in the caller's
> > context...
> 
> No, not necessarily - it's also used by rbd (all of net/ceph has two
> users: fs/ceph and drivers/block/rbd.c).

Yes, but rbd doesn't deal with userland-pointing iovecs at all, does it?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:46         ` Ilya Dryomov
  2017-01-12 11:53           ` Al Viro
@ 2017-01-12 11:57           ` Jeff Layton
  1 sibling, 0 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-12 11:57 UTC (permalink / raw)
  To: Ilya Dryomov, Al Viro
  Cc: Yan, Zheng, Sage Weil, Ceph Development, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Thu, 2017-01-12 at 12:46 +0100, Ilya Dryomov wrote:
> On Thu, Jan 12, 2017 at 12:37 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> > 
> > On Thu, Jan 12, 2017 at 12:13:31PM +0100, Ilya Dryomov wrote:
> > 
> > > 
> > > It would be a significant and wide-reaching change, but I've been
> > > meaning to look into switching to iov_iter for a couple of releases
> > > now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
> > > iteration over "page vectors", "page lists" and "bio lists".  All of it
> > > predates iov_iter proliferation and is mostly incomplete anyway: IIRC
> > > you can send out of a pagelist but can't recv into a pagelist, etc.
> > 
> > Wait a sec...  Is it done from the same thread that has issued a syscall?
> > If so, we certainly could just pass iov_iter without bothering with any
> > form of ..._get_pages(); if not, we'll need at least to get from iovec
> > to bio_vec, since userland addresses make sense only in the caller's
> > context...
> 
> No, not necessarily - it's also used by rbd (all of net/ceph has two
> users: fs/ceph and drivers/block/rbd.c).
> 
> 

...and note that the actual send/receive is done from workqueue context
(AFAICT), so we might be operating over the array from a completely
different thread context from where it was submitted. I think we do need
to get page references at the point of submission like this.
-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:53           ` Al Viro
@ 2017-01-12 12:17             ` Ilya Dryomov
  0 siblings, 0 replies; 17+ messages in thread
From: Ilya Dryomov @ 2017-01-12 12:17 UTC (permalink / raw)
  To: Al Viro
  Cc: Jeff Layton, Yan, Zheng, Sage Weil, Ceph Development,
	linux-fsdevel, linux-kernel, Zhu, Caifeng

On Thu, Jan 12, 2017 at 12:53 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> On Thu, Jan 12, 2017 at 12:46:42PM +0100, Ilya Dryomov wrote:
>> On Thu, Jan 12, 2017 at 12:37 PM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>> > On Thu, Jan 12, 2017 at 12:13:31PM +0100, Ilya Dryomov wrote:
>> >
>> >> It would be a significant and wide-reaching change, but I've been
>> >> meaning to look into switching to iov_iter for a couple of releases
>> >> now.  There is a lot of ugly code in net/ceph/messenger.c to hangle
>> >> iteration over "page vectors", "page lists" and "bio lists".  All of it
>> >> predates iov_iter proliferation and is mostly incomplete anyway: IIRC
>> >> you can send out of a pagelist but can't recv into a pagelist, etc.
>> >
>> > Wait a sec...  Is it done from the same thread that has issued a syscall?
>> > If so, we certainly could just pass iov_iter without bothering with any
>> > form of ..._get_pages(); if not, we'll need at least to get from iovec
>> > to bio_vec, since userland addresses make sense only in the caller's
>> > context...
>>
>> No, not necessarily - it's also used by rbd (all of net/ceph has two
>> users: fs/ceph and drivers/block/rbd.c).
>
> Yes, but rbd doesn't deal with userland-pointing iovecs at all, does it?

Correct.  Normal I/O is all bios + currently it uses some of that
custom page vector/list machinery for maintenance operations (rbd
metadata, locks, etc).  No userland pointers whatsoever for rbd, but
the messenger (checksumming + {send,recv}{msg,page}) runs out of the
kworker for both rbd and cephfs.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-12 11:37       ` Ilya Dryomov
@ 2017-01-18 12:14         ` Jeff Layton
  2017-01-24 15:00           ` Jeff Layton
  0 siblings, 1 reply; 17+ messages in thread
From: Jeff Layton @ 2017-01-18 12:14 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Al Viro, Yan, Zheng, Sage Weil, Ceph Development, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Thu, 2017-01-12 at 12:37 +0100, Ilya Dryomov wrote:
> On Thu, Jan 12, 2017 at 12:27 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > On Thu, 2017-01-12 at 07:59 +0000, Al Viro wrote:
> > > On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
> > > > 
> > > > v2: fix bug in offset handling in iov_iter_pvec_size
> > > > 
> > > > xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> > > > fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> > > > to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> > > > to pick up a wrong offset and get stuck in an infinite loop while trying
> > > > to populate the page array. dio_get_pagev_size has a similar problem.
> > > > 
> > > > To fix the first problem, add a new iov_iter helper to determine the
> > > > offset into the page for the current segment and have ceph call that.
> > > > I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> > > > but that will only return a single page at a time for ITER_BVEC and
> > > > it's better to make larger requests when possible.
> > > > 
> > > > For the second problem, we simply replace it with a new helper that does
> > > > what it does, but properly for all iov_iter types.
> > > > 
> > > > Since we're moving that into generic code, we can also utilize the
> > > > iterate_all_kinds macro to simplify this. That means that we need to
> > > > rework the logic a bit since we can't advance to the next vector while
> > > > checking the current one.
> > > 
> > > Yecchhh...  That really looks like exposing way too low-level stuff instead
> > > of coming up with saner primitive ;-/
> > > 
> > 
> > Fair point. That said, I'm not terribly thrilled with how
> > iov_iter_get_pages* works right now.
> > 
> > Note that it only ever touches the first vector. Would it not be better
> > to keep getting page references if the bvec/iov elements are aligned
> > properly? It seems quite plausible that they often would be, and being
> > able to hand back a larger list of pages in most cases would be
> > advantageous.
> > 
> > IOW, should we have iov_iter_get_pages basically do what
> > dio_get_pages_alloc does -- try to build as long an array of pages as
> > possible before returning, provided that the alignment works out?
> > 
> > The NFS DIO code, for instance, could also benefit there. I know we've
> > had reports there in the past that sending down a bunch of small iovecs
> > causes a lot of small-sized requests on the wire.
> > 
> > > Is page vector + offset in the first page + number of bytes really what
> > > ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
> > > would make a lot more natural iov_iter_get_pages_alloc() analogue...
> > > 
> > > And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
> > > how painful would it be to have it switched to struct bio_vec array instead?
> > 
> > Actually...it looks like that might not be too hard. The low-level OSD
> > handling code can already handle bio_vec arrays in order to service RBD.
> > It looks like we could switch cephfs to use
> > osd_req_op_extent_osd_data_bio instead of
> > osd_req_op_extent_osd_data_pages. That would add a dependency in cephfs
> > on CONFIG_BLOCK, but I think we could probably live with that.
> 
> Ah, just that part might be easy enough ;)
> 
> 

Yeah, that part doesn't look too bad. Regardless though, I think we need
to get a fix in for this sooner rather than later as it's trivial to get
the kernel stuck in this loop today, by any user with write access to a
ceph mount.

Al, when you mentioned switching this over to a bio_vec based interface,
were you planning to roll up the iov_iter->bio_vec array helper for
this, or should I be looking into doing that?

Thanks,
-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-18 12:14         ` Jeff Layton
@ 2017-01-24 15:00           ` Jeff Layton
  2017-01-24 20:51             ` Jeff Layton
  0 siblings, 1 reply; 17+ messages in thread
From: Jeff Layton @ 2017-01-24 15:00 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Al Viro, Yan, Zheng, Sage Weil, Ceph Development, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Wed, 2017-01-18 at 07:14 -0500, Jeff Layton wrote:
> On Thu, 2017-01-12 at 12:37 +0100, Ilya Dryomov wrote:
> > On Thu, Jan 12, 2017 at 12:27 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > > On Thu, 2017-01-12 at 07:59 +0000, Al Viro wrote:
> > > > On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
> > > > > 
> > > > > v2: fix bug in offset handling in iov_iter_pvec_size
> > > > > 
> > > > > xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> > > > > fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> > > > > to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> > > > > to pick up a wrong offset and get stuck in an infinite loop while trying
> > > > > to populate the page array. dio_get_pagev_size has a similar problem.
> > > > > 
> > > > > To fix the first problem, add a new iov_iter helper to determine the
> > > > > offset into the page for the current segment and have ceph call that.
> > > > > I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> > > > > but that will only return a single page at a time for ITER_BVEC and
> > > > > it's better to make larger requests when possible.
> > > > > 
> > > > > For the second problem, we simply replace it with a new helper that does
> > > > > what it does, but properly for all iov_iter types.
> > > > > 
> > > > > Since we're moving that into generic code, we can also utilize the
> > > > > iterate_all_kinds macro to simplify this. That means that we need to
> > > > > rework the logic a bit since we can't advance to the next vector while
> > > > > checking the current one.
> > > > 
> > > > Yecchhh...  That really looks like exposing way too low-level stuff instead
> > > > of coming up with saner primitive ;-/
> > > > 
> > > 
> > > Fair point. That said, I'm not terribly thrilled with how
> > > iov_iter_get_pages* works right now.
> > > 
> > > Note that it only ever touches the first vector. Would it not be better
> > > to keep getting page references if the bvec/iov elements are aligned
> > > properly? It seems quite plausible that they often would be, and being
> > > able to hand back a larger list of pages in most cases would be
> > > advantageous.
> > > 
> > > IOW, should we have iov_iter_get_pages basically do what
> > > dio_get_pages_alloc does -- try to build as long an array of pages as
> > > possible before returning, provided that the alignment works out?
> > > 
> > > The NFS DIO code, for instance, could also benefit there. I know we've
> > > had reports there in the past that sending down a bunch of small iovecs
> > > causes a lot of small-sized requests on the wire.
> > > 
> > > > Is page vector + offset in the first page + number of bytes really what
> > > > ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
> > > > would make a lot more natural iov_iter_get_pages_alloc() analogue...
> > > > 
> > > > And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
> > > > how painful would it be to have it switched to struct bio_vec array instead?
> > > 
> > > Actually...it looks like that might not be too hard. The low-level OSD
> > > handling code can already handle bio_vec arrays in order to service RBD.
> > > It looks like we could switch cephfs to use
> > > osd_req_op_extent_osd_data_bio instead of
> > > osd_req_op_extent_osd_data_pages. That would add a dependency in cephfs
> > > on CONFIG_BLOCK, but I think we could probably live with that.
> > 
> > Ah, just that part might be easy enough ;)
> > 
> > 
> 
> Yeah, that part doesn't look too bad. Regardless though, I think we need
> to get a fix in for this sooner rather than later as it's trivial to get
> the kernel stuck in this loop today, by any user with write access to a
> ceph mount.
> 
> Al, when you mentioned switching this over to a bio_vec based interface,
> were you planning to roll up the iov_iter->bio_vec array helper for
> this, or should I be looking into doing that?
> 

I take it back. It's trickier than it sounds. The libceph code is wired
to accept struct bio, not struct bio_vec.

That said, I think the right solution might be a patch like the one
below. This basically just changes iov_iter_get_pages_alloc to try to
return as large a page array as it can instead of bailing out on the
first entry.

With this, we can just drop dio_get_pages_alloc altogether and have ceph
just call iov_iter_get_pages_alloc.

I was also thinking this would help NFS DIO to do large I/Os as well,
but it doesn't. NFS DIO writes are still done a page at a time, even
when this function now hands back a large array of pages. It's probably
a bug in the NFS DIO code, but I haven't chased it down yet.

Still, this doesn't seem to make that any worse and we'd need this patch
or something like it to make NFS DTRT here anyway.

-------------------------8<---------------------------

iov_iter: allow iov_iter_get_pages_alloc to return more pages per call

Currently, iov_iter_get_pages_alloc will only ever operate on the first
vector that iterate_all_kinds hands back. Many of the callers however
would like to have as long a set of pages as possible, to allow for
fewer, but larger I/Os.

When the previous vector ends on a page boundary and the current one
begins on one, we can continue to add more pages.

Change the function to first scan the iov_iter to see how long an
array of pages we could create from the current position. Then,
allocate an array that large (or up to the maxsize), and fill that
many pages.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 lib/iov_iter.c | 140 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 111 insertions(+), 29 deletions(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e68604ae3ced..956d17767a3e 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -883,6 +883,58 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+/**
+ * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
+ * @i: iov_iter to in which to find the size
+ *
+ * Some filesystems can stitch together multiple iovecs into a single
+ * page vector when both the previous tail and current base are page
+ * aligned. This function discovers the length that can fit in a single
+ * pagevec and returns it.
+ */
+static size_t iov_iter_pvec_size(const struct iov_iter *i)
+{
+	size_t size = i->count;
+	size_t pv_size = 0;
+	bool contig = false, first = true;
+
+	if (!size)
+		return 0;
+
+	/* Pipes are naturally aligned for this */
+	if (unlikely(i->type & ITER_PIPE))
+		return size;
+
+	/*
+	 * An iov can be page vectored when the current base and previous
+	 * tail are both page aligned. Note that we don't require that the
+	 * initial base in the first iovec also be page aligned.
+	 */
+	iterate_all_kinds(i, size, v,
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }; 0;
+		 }),
+		({
+		 if (first || (contig && v.bv_offset == 0)) {
+			pv_size += v.bv_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
+		 }
+		 }),
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }
+		 }))
+	return pv_size;
+}
+
 static inline size_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
 				struct page **pages,
@@ -1006,47 +1058,77 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
 }
 
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start)
+		   struct page ***ppages, size_t maxsize,
+		   size_t *pstart)
 {
-	struct page **p;
-
-	if (maxsize > i->count)
-		maxsize = i->count;
+	struct page **p, **pc;
+	size_t start = 0;
+	ssize_t len = 0;
+	int npages, res = 0;
+	bool first = true;
 
 	if (unlikely(i->type & ITER_PIPE))
-		return pipe_get_pages_alloc(i, pages, maxsize, start);
+		return pipe_get_pages_alloc(i, ppages, maxsize, pstart);
+
+	maxsize = min(iov_iter_pvec_size(i), maxsize);
+	npages = DIV_ROUND_UP(maxsize, PAGE_SIZE);
+	p = get_pages_array(npages);
+	if (!p)
+		return -ENOMEM;
+
+	pc = p;
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
-		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		size_t slen = v.iov_len;
 		int n;
-		int res;
 
-		addr &= ~(PAGE_SIZE - 1);
-		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		p = get_pages_array(n);
-		if (!p)
-			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
-		if (unlikely(res < 0)) {
-			kvfree(p);
-			return res;
+		if (first) {
+			start = addr & (PAGE_SIZE - 1);
+			slen += start;
+			first = false;
 		}
-		*pages = p;
-		return (res == n ? len : res * PAGE_SIZE) - *start;
+
+		n = DIV_ROUND_UP(slen, PAGE_SIZE);
+		if ((pc + n) > (p + npages)) {
+			/* Did something change the iov array?!? */
+			res = -EFAULT;
+			goto out;
+		}
+		addr &= ~(PAGE_SIZE - 1);
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pc);
+		if (unlikely(res < 0))
+			goto out;
+		len += (res == n ? slen : res * PAGE_SIZE) - start;
+		pc += res;
 	0;}),({
-		/* can't be more than PAGE_SIZE */
-		*start = v.bv_offset;
-		*pages = p = get_pages_array(1);
-		if (!p)
-			return -ENOMEM;
-		get_page(*p = v.bv_page);
-		return v.bv_len;
+		/* bio_vecs are limited to a single page each */
+		if (first) {
+			start = v.bv_offset;
+			first = false;
+		}
+		get_page(*pc = v.bv_page);
+		len += v.bv_len;
+		++pc;
+		BUG_ON(pc > p + npages);
 	}),({
-		return -EFAULT;
+		/* FIXME: should we handle this case? */
+		res = -EFAULT;
+		goto out;
 	})
 	)
-	return 0;
+out:
+	if (unlikely(res < 0)) {
+		struct page **i;
+
+		for (i = p; i < pc; i++)
+			put_page(*i);
+		kvfree(p);
+		return res;
+	}
+
+	*ppages = p;
+	*pstart = start;
+	return len;
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths
  2017-01-24 15:00           ` Jeff Layton
@ 2017-01-24 20:51             ` Jeff Layton
  0 siblings, 0 replies; 17+ messages in thread
From: Jeff Layton @ 2017-01-24 20:51 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Al Viro, Yan, Zheng, Sage Weil, Ceph Development, linux-fsdevel,
	linux-kernel, Zhu, Caifeng

On Tue, 2017-01-24 at 10:00 -0500, Jeff Layton wrote:
> On Wed, 2017-01-18 at 07:14 -0500, Jeff Layton wrote:
> > On Thu, 2017-01-12 at 12:37 +0100, Ilya Dryomov wrote:
> > > On Thu, Jan 12, 2017 at 12:27 PM, Jeff Layton <jlayton@redhat.com> wrote:
> > > > On Thu, 2017-01-12 at 07:59 +0000, Al Viro wrote:
> > > > > On Tue, Jan 10, 2017 at 07:57:31AM -0500, Jeff Layton wrote:
> > > > > > 
> > > > > > v2: fix bug in offset handling in iov_iter_pvec_size
> > > > > > 
> > > > > > xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> > > > > > fio to drive some I/O via vmsplice ane splice. Ceph then ends up trying
> > > > > > to access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it
> > > > > > to pick up a wrong offset and get stuck in an infinite loop while trying
> > > > > > to populate the page array. dio_get_pagev_size has a similar problem.
> > > > > > 
> > > > > > To fix the first problem, add a new iov_iter helper to determine the
> > > > > > offset into the page for the current segment and have ceph call that.
> > > > > > I would just replace dio_get_pages_alloc with iov_iter_get_pages_alloc,
> > > > > > but that will only return a single page at a time for ITER_BVEC and
> > > > > > it's better to make larger requests when possible.
> > > > > > 
> > > > > > For the second problem, we simply replace it with a new helper that does
> > > > > > what it does, but properly for all iov_iter types.
> > > > > > 
> > > > > > Since we're moving that into generic code, we can also utilize the
> > > > > > iterate_all_kinds macro to simplify this. That means that we need to
> > > > > > rework the logic a bit since we can't advance to the next vector while
> > > > > > checking the current one.
> > > > > 
> > > > > Yecchhh...  That really looks like exposing way too low-level stuff instead
> > > > > of coming up with saner primitive ;-/
> > > > > 
> > > > 
> > > > Fair point. That said, I'm not terribly thrilled with how
> > > > iov_iter_get_pages* works right now.
> > > > 
> > > > Note that it only ever touches the first vector. Would it not be better
> > > > to keep getting page references if the bvec/iov elements are aligned
> > > > properly? It seems quite plausible that they often would be, and being
> > > > able to hand back a larger list of pages in most cases would be
> > > > advantageous.
> > > > 
> > > > IOW, should we have iov_iter_get_pages basically do what
> > > > dio_get_pages_alloc does -- try to build as long an array of pages as
> > > > possible before returning, provided that the alignment works out?
> > > > 
> > > > The NFS DIO code, for instance, could also benefit there. I know we've
> > > > had reports there in the past that sending down a bunch of small iovecs
> > > > causes a lot of small-sized requests on the wire.
> > > > 
> > > > > Is page vector + offset in the first page + number of bytes really what
> > > > > ceph wants?  Would e.g. an array of bio_vec be saner?  Because _that_
> > > > > would make a lot more natural iov_iter_get_pages_alloc() analogue...
> > > > > 
> > > > > And yes, I realize that you have ->pages wired into the struct ceph_osd_request;
> > > > > how painful would it be to have it switched to struct bio_vec array instead?
> > > > 
> > > > Actually...it looks like that might not be too hard. The low-level OSD
> > > > handling code can already handle bio_vec arrays in order to service RBD.
> > > > It looks like we could switch cephfs to use
> > > > osd_req_op_extent_osd_data_bio instead of
> > > > osd_req_op_extent_osd_data_pages. That would add a dependency in cephfs
> > > > on CONFIG_BLOCK, but I think we could probably live with that.
> > > 
> > > Ah, just that part might be easy enough ;)
> > > 
> > > 
> > 
> > Yeah, that part doesn't look too bad. Regardless though, I think we need
> > to get a fix in for this sooner rather than later as it's trivial to get
> > the kernel stuck in this loop today, by any user with write access to a
> > ceph mount.
> > 
> > Al, when you mentioned switching this over to a bio_vec based interface,
> > were you planning to roll up the iov_iter->bio_vec array helper for
> > this, or should I be looking into doing that?
> > 
> 
> I take it back. It's trickier than it sounds. The libceph code is wired
> to accept struct bio, not struct bio_vec.
> 
> That said, I think the right solution might be a patch like the one
> below. This basically just changes iov_iter_get_pages_alloc to try to
> return as large a page array as it can instead of bailing out on the
> first entry.
> 
> With this, we can just drop dio_get_pages_alloc altogether and have ceph
> just call iov_iter_get_pages_alloc.
> 
> I was also thinking this would help NFS DIO to do large I/Os as well,
> but it doesn't. NFS DIO writes are still done a page at a time, even
> when this function now hands back a large array of pages. It's probably
> a bug in the NFS DIO code, but I haven't chased it down yet.
> 
> Still, this doesn't seem to make that any worse and we'd need this patch
> or something like it to make NFS DTRT here anyway.
> 

Ahh, found the problem with nfs and this patch. The reproducer I had
didn't dirty the pages before writing them

It turns out that when you allocate pages with posix_memalign, you get
an allocation that is generally mapped to the same page that is set up
for CoW. That runs afoul of some of the logic in the nfs code that
coalesces page i/o requests. 

When the pages are dirtied ahead of time (as they would almost always be
in a real-world scenario), it works just fine, and it does allow NFS to
better coalesce page-aligned iovecs.


> -------------------------8<---------------------------
> 
> iov_iter: allow iov_iter_get_pages_alloc to return more pages per call
> 
> Currently, iov_iter_get_pages_alloc will only ever operate on the first
> vector that iterate_all_kinds hands back. Many of the callers however
> would like to have as long a set of pages as possible, to allow for
> fewer, but larger I/Os.
> 
> When the previous vector ends on a page boundary and the current one
> begins on one, we can continue to add more pages.
> 
> Change the function to first scan the iov_iter to see how long an
> array of pages we could create from the current position. Then,
> allocate an array that large (or up to the maxsize), and fill that
> many pages.
> 
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  lib/iov_iter.c | 140 +++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 111 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index e68604ae3ced..956d17767a3e 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -883,6 +883,58 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +static size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +	return pv_size;
> +}
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,
> @@ -1006,47 +1058,77 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
>  }
>  
>  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
> -		   struct page ***pages, size_t maxsize,
> -		   size_t *start)
> +		   struct page ***ppages, size_t maxsize,
> +		   size_t *pstart)
>  {
> -	struct page **p;
> -
> -	if (maxsize > i->count)
> -		maxsize = i->count;
> +	struct page **p, **pc;
> +	size_t start = 0;
> +	ssize_t len = 0;
> +	int npages, res = 0;
> +	bool first = true;
>  
>  	if (unlikely(i->type & ITER_PIPE))
> -		return pipe_get_pages_alloc(i, pages, maxsize, start);
> +		return pipe_get_pages_alloc(i, ppages, maxsize, pstart);
> +
> +	maxsize = min(iov_iter_pvec_size(i), maxsize);
> +	npages = DIV_ROUND_UP(maxsize, PAGE_SIZE);
> +	p = get_pages_array(npages);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	pc = p;
>  	iterate_all_kinds(i, maxsize, v, ({
>  		unsigned long addr = (unsigned long)v.iov_base;
> -		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
> +		size_t slen = v.iov_len;
>  		int n;
> -		int res;
>  
> -		addr &= ~(PAGE_SIZE - 1);
> -		n = DIV_ROUND_UP(len, PAGE_SIZE);
> -		p = get_pages_array(n);
> -		if (!p)
> -			return -ENOMEM;
> -		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
> -		if (unlikely(res < 0)) {
> -			kvfree(p);
> -			return res;
> +		if (first) {
> +			start = addr & (PAGE_SIZE - 1);
> +			slen += start;
> +			first = false;
>  		}
> -		*pages = p;
> -		return (res == n ? len : res * PAGE_SIZE) - *start;
> +
> +		n = DIV_ROUND_UP(slen, PAGE_SIZE);
> +		if ((pc + n) > (p + npages)) {
> +			/* Did something change the iov array?!? */
> +			res = -EFAULT;
> +			goto out;
> +		}
> +		addr &= ~(PAGE_SIZE - 1);
> +		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pc);
> +		if (unlikely(res < 0))
> +			goto out;
> +		len += (res == n ? slen : res * PAGE_SIZE) - start;
> +		pc += res;
>  	0;}),({
> -		/* can't be more than PAGE_SIZE */
> -		*start = v.bv_offset;
> -		*pages = p = get_pages_array(1);
> -		if (!p)
> -			return -ENOMEM;
> -		get_page(*p = v.bv_page);
> -		return v.bv_len;
> +		/* bio_vecs are limited to a single page each */
> +		if (first) {
> +			start = v.bv_offset;
> +			first = false;
> +		}
> +		get_page(*pc = v.bv_page);
> +		len += v.bv_len;
> +		++pc;
> +		BUG_ON(pc > p + npages);
>  	}),({
> -		return -EFAULT;
> +		/* FIXME: should we handle this case? */
> +		res = -EFAULT;
> +		goto out;
>  	})
>  	)
> -	return 0;
> +out:
> +	if (unlikely(res < 0)) {
> +		struct page **i;
> +
> +		for (i = p; i < pc; i++)
> +			put_page(*i);
> +		kvfree(p);
> +		return res;
> +	}
> +
> +	*ppages = p;
> +	*pstart = start;
> +	return len;
>  }
>  EXPORT_SYMBOL(iov_iter_get_pages_alloc);
>  

-- 
Jeff Layton <jlayton@poochiereds.net>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2017-01-24 20:51 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-06 18:23 [PATCH] ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths Jeff Layton
2017-01-06 20:36 ` Jeff Layton
2017-01-09 23:11 ` Jeff Layton
2017-01-10 12:57 ` [PATCH v2] " Jeff Layton
2017-01-11  2:42   ` Yan, Zheng
2017-01-12  7:59   ` Al Viro
2017-01-12 11:13     ` Ilya Dryomov
2017-01-12 11:37       ` Al Viro
2017-01-12 11:46         ` Ilya Dryomov
2017-01-12 11:53           ` Al Viro
2017-01-12 12:17             ` Ilya Dryomov
2017-01-12 11:57           ` Jeff Layton
2017-01-12 11:27     ` Jeff Layton
2017-01-12 11:37       ` Ilya Dryomov
2017-01-18 12:14         ` Jeff Layton
2017-01-24 15:00           ` Jeff Layton
2017-01-24 20:51             ` Jeff Layton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).