All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] split struct kiocb
@ 2015-02-23 18:00 Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 01/12] new helper: dup_iter() Christoph Hellwig
                   ` (12 more replies)
  0 siblings, 13 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

This series cuts down the amount of fiels in the public iocb that is
allocated on stack for every synchronous I/O, both by removing fields
from it, and by adding a aio-specific iocb that is only allocated
for aio requests.

Additionally it cleans up various corner cases in the aio completion
code and allowes for adding a simple in-kernel async read/write
interface.

The first few patches are from Al's gadget branch and reposted
here because they are needed for the rest of the series.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 01/12] new helper: dup_iter()
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 02/12] move iov_iter.c from mm/ to lib/ Christoph Hellwig
                   ` (11 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

Copy iter and kmemdup the underlying array for the copy.  Returns
a pointer to result of kmemdup() to be kfree()'d later.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 15 +++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 07a0226..7188029 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
 	return i->count;
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 8277320..9d96e283 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -751,3 +751,18 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	return npages;
 }
 EXPORT_SYMBOL(iov_iter_npages);
+
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+{
+	*new = *old;
+	if (new->type & ITER_BVEC)
+		return new->bvec = kmemdup(new->bvec,
+				    new->nr_segs * sizeof(struct bio_vec),
+				    flags);
+	else
+		/* iovec and kvec have identical layout */
+		return new->iov = kmemdup(new->iov,
+				   new->nr_segs * sizeof(struct iovec),
+				   flags);
+}
+EXPORT_SYMBOL(dup_iter);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 02/12] move iov_iter.c from mm/ to lib/
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 01/12] new helper: dup_iter() Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 03/12] gadget/function/f_fs.c: close leaks Christoph Hellwig
                   ` (10 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/Makefile   |   2 +-
 lib/iov_iter.c | 768 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/Makefile    |   2 +-
 mm/iov_iter.c  | 768 ---------------------------------------------------------
 4 files changed, 770 insertions(+), 770 deletions(-)
 create mode 100644 lib/iov_iter.c
 delete mode 100644 mm/iov_iter.c

diff --git a/lib/Makefile b/lib/Makefile
index 87eb3bf..58f74d2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-	 gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
+	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
new file mode 100644
index 0000000..9d96e283
--- /dev/null
+++ b/lib/iov_iter.c
@@ -0,0 +1,768 @@
+#include <linux/export.h>
+#include <linux/uio.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <net/checksum.h>
+
+#define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
+	size_t left;					\
+	size_t wanted = n;				\
+	__p = i->iov;					\
+	__v.iov_len = min(n, __p->iov_len - skip);	\
+	if (likely(__v.iov_len)) {			\
+		__v.iov_base = __p->iov_base + skip;	\
+		left = (STEP);				\
+		__v.iov_len -= left;			\
+		skip += __v.iov_len;			\
+		n -= __v.iov_len;			\
+	} else {					\
+		left = 0;				\
+	}						\
+	while (unlikely(!left && n)) {			\
+		__p++;					\
+		__v.iov_len = min(n, __p->iov_len);	\
+		if (unlikely(!__v.iov_len))		\
+			continue;			\
+		__v.iov_base = __p->iov_base;		\
+		left = (STEP);				\
+		__v.iov_len -= left;			\
+		skip = __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	n = wanted - n;					\
+}
+
+#define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
+	size_t wanted = n;				\
+	__p = i->kvec;					\
+	__v.iov_len = min(n, __p->iov_len - skip);	\
+	if (likely(__v.iov_len)) {			\
+		__v.iov_base = __p->iov_base + skip;	\
+		(void)(STEP);				\
+		skip += __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	while (unlikely(n)) {				\
+		__p++;					\
+		__v.iov_len = min(n, __p->iov_len);	\
+		if (unlikely(!__v.iov_len))		\
+			continue;			\
+		__v.iov_base = __p->iov_base;		\
+		(void)(STEP);				\
+		skip = __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	n = wanted;					\
+}
+
+#define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
+	size_t wanted = n;				\
+	__p = i->bvec;					\
+	__v.bv_len = min_t(size_t, n, __p->bv_len - skip);	\
+	if (likely(__v.bv_len)) {			\
+		__v.bv_page = __p->bv_page;		\
+		__v.bv_offset = __p->bv_offset + skip; 	\
+		(void)(STEP);				\
+		skip += __v.bv_len;			\
+		n -= __v.bv_len;			\
+	}						\
+	while (unlikely(n)) {				\
+		__p++;					\
+		__v.bv_len = min_t(size_t, n, __p->bv_len);	\
+		if (unlikely(!__v.bv_len))		\
+			continue;			\
+		__v.bv_page = __p->bv_page;		\
+		__v.bv_offset = __p->bv_offset;		\
+		(void)(STEP);				\
+		skip = __v.bv_len;			\
+		n -= __v.bv_len;			\
+	}						\
+	n = wanted;					\
+}
+
+#define iterate_all_kinds(i, n, v, I, B, K) {			\
+	size_t skip = i->iov_offset;				\
+	if (unlikely(i->type & ITER_BVEC)) {			\
+		const struct bio_vec *bvec;			\
+		struct bio_vec v;				\
+		iterate_bvec(i, n, v, bvec, skip, (B))		\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
+	} else {						\
+		const struct iovec *iov;			\
+		struct iovec v;					\
+		iterate_iovec(i, n, v, iov, skip, (I))		\
+	}							\
+}
+
+#define iterate_and_advance(i, n, v, I, B, K) {			\
+	size_t skip = i->iov_offset;				\
+	if (unlikely(i->type & ITER_BVEC)) {			\
+		const struct bio_vec *bvec;			\
+		struct bio_vec v;				\
+		iterate_bvec(i, n, v, bvec, skip, (B))		\
+		if (skip == bvec->bv_len) {			\
+			bvec++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= bvec - i->bvec;			\
+		i->bvec = bvec;					\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
+		if (skip == kvec->iov_len) {			\
+			kvec++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= kvec - i->kvec;			\
+		i->kvec = kvec;					\
+	} else {						\
+		const struct iovec *iov;			\
+		struct iovec v;					\
+		iterate_iovec(i, n, v, iov, skip, (I))		\
+		if (skip == iov->iov_len) {			\
+			iov++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= iov - i->iov;			\
+		i->iov = iov;					\
+	}							\
+	i->count -= n;						\
+	i->iov_offset = skip;					\
+}
+
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+	void *kaddr, *from;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	if (!fault_in_pages_writeable(buf, copy)) {
+		kaddr = kmap_atomic(page);
+		from = kaddr + offset;
+
+		/* first chunk, usually the only one */
+		left = __copy_to_user_inatomic(buf, from, copy);
+		copy -= left;
+		skip += copy;
+		from += copy;
+		bytes -= copy;
+
+		while (unlikely(!left && bytes)) {
+			iov++;
+			buf = iov->iov_base;
+			copy = min(bytes, iov->iov_len);
+			left = __copy_to_user_inatomic(buf, from, copy);
+			copy -= left;
+			skip = copy;
+			from += copy;
+			bytes -= copy;
+		}
+		if (likely(!bytes)) {
+			kunmap_atomic(kaddr);
+			goto done;
+		}
+		offset = from - kaddr;
+		buf += copy;
+		kunmap_atomic(kaddr);
+		copy = min(bytes, iov->iov_len - skip);
+	}
+	/* Too bad - revert to non-atomic kmap */
+	kaddr = kmap(page);
+	from = kaddr + offset;
+	left = __copy_to_user(buf, from, copy);
+	copy -= left;
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_to_user(buf, from, copy);
+		copy -= left;
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+	kunmap(page);
+done:
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	if (!fault_in_pages_readable(buf, copy)) {
+		kaddr = kmap_atomic(page);
+		to = kaddr + offset;
+
+		/* first chunk, usually the only one */
+		left = __copy_from_user_inatomic(to, buf, copy);
+		copy -= left;
+		skip += copy;
+		to += copy;
+		bytes -= copy;
+
+		while (unlikely(!left && bytes)) {
+			iov++;
+			buf = iov->iov_base;
+			copy = min(bytes, iov->iov_len);
+			left = __copy_from_user_inatomic(to, buf, copy);
+			copy -= left;
+			skip = copy;
+			to += copy;
+			bytes -= copy;
+		}
+		if (likely(!bytes)) {
+			kunmap_atomic(kaddr);
+			goto done;
+		}
+		offset = to - kaddr;
+		buf += copy;
+		kunmap_atomic(kaddr);
+		copy = min(bytes, iov->iov_len - skip);
+	}
+	/* Too bad - revert to non-atomic kmap */
+	kaddr = kmap(page);
+	to = kaddr + offset;
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap(page);
+done:
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+/*
+ * Fault in the first iovec of the given iov_iter, to a maximum length
+ * of bytes. Returns 0 on success, or non-zero if the memory could not be
+ * accessed (ie. because it is an invalid address).
+ *
+ * writev-intensive code may want this to prefault several iovecs -- that
+ * would be possible (callers must not rely on the fact that _only_ the
+ * first iovec will be faulted with the current implementation).
+ */
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+{
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+		return fault_in_pages_readable(buf, bytes);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
+
+void iov_iter_init(struct iov_iter *i, int direction,
+			const struct iovec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	/* It will get better.  Eventually... */
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		direction |= ITER_KVEC;
+		i->type = direction;
+		i->kvec = (struct kvec *)iov;
+	} else {
+		i->type = direction;
+		i->iov = iov;
+	}
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_init);
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+	char *from = kmap_atomic(page);
+	memcpy(to, from + offset, len);
+	kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+	char *to = kmap_atomic(page);
+	memcpy(to + offset, from, len);
+	kunmap_atomic(to);
+}
+
+static void memzero_page(struct page *page, size_t offset, size_t len)
+{
+	char *addr = kmap_atomic(page);
+	memset(addr + offset, 0, len);
+	kunmap_atomic(addr);
+}
+
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *from = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
+			       v.iov_len),
+		memcpy_to_page(v.bv_page, v.bv_offset,
+			       (from += v.bv_len) - v.bv_len, v.bv_len),
+		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(copy_to_iter);
+
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
+				 v.iov_len),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(copy_from_iter);
+
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
+					 v.iov_base, v.iov_len),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(copy_from_iter_nocache);
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & (ITER_BVEC|ITER_KVEC)) {
+		void *kaddr = kmap_atomic(page);
+		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
+		kunmap_atomic(kaddr);
+		return wanted;
+	} else
+		return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & (ITER_BVEC|ITER_KVEC)) {
+		void *kaddr = kmap_atomic(page);
+		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
+		kunmap_atomic(kaddr);
+		return wanted;
+	} else
+		return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__clear_user(v.iov_base, v.iov_len),
+		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
+		memset(v.iov_base, 0, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(iov_iter_zero);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
+	iterate_all_kinds(i, bytes, v,
+		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
+					  v.iov_base, v.iov_len),
+		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+	kunmap_atomic(kaddr);
+	return bytes;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+	iterate_and_advance(i, size, v, 0, 0, 0)
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+	if (i->nr_segs == 1)
+		return i->count;
+	else if (i->type & ITER_BVEC)
+		return min(i->count, i->bvec->bv_len - i->iov_offset);
+	else
+		return min(i->count, i->iov->iov_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+void iov_iter_kvec(struct iov_iter *i, int direction,
+			const struct kvec *kvec, unsigned long nr_segs,
+			size_t count)
+{
+	BUG_ON(!(direction & ITER_KVEC));
+	i->type = direction;
+	i->kvec = kvec;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_kvec);
+
+void iov_iter_bvec(struct iov_iter *i, int direction,
+			const struct bio_vec *bvec, unsigned long nr_segs,
+			size_t count)
+{
+	BUG_ON(!(direction & ITER_BVEC));
+	i->type = direction;
+	i->bvec = bvec;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_bvec);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	unsigned long res = 0;
+	size_t size = i->count;
+
+	if (!size)
+		return 0;
+
+	iterate_all_kinds(i, size, v,
+		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
+		res |= v.bv_offset | v.bv_len,
+		res |= (unsigned long)v.iov_base | v.iov_len
+	)
+	return res;
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize, unsigned maxpages,
+		   size_t *start)
+{
+	if (maxsize > i->count)
+		maxsize = i->count;
+
+	if (!maxsize)
+		return 0;
+
+	iterate_all_kinds(i, maxsize, v, ({
+		unsigned long addr = (unsigned long)v.iov_base;
+		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		int n;
+		int res;
+
+		if (len > maxpages * PAGE_SIZE)
+			len = maxpages * PAGE_SIZE;
+		addr &= ~(PAGE_SIZE - 1);
+		n = DIV_ROUND_UP(len, PAGE_SIZE);
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+		if (unlikely(res < 0))
+			return res;
+		return (res == n ? len : res * PAGE_SIZE) - *start;
+	0;}),({
+		/* can't be more than PAGE_SIZE */
+		*start = v.bv_offset;
+		get_page(*pages = v.bv_page);
+		return v.bv_len;
+	}),({
+		return -EFAULT;
+	})
+	)
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+static struct page **get_pages_array(size_t n)
+{
+	struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+	if (!p)
+		p = vmalloc(n * sizeof(struct page *));
+	return p;
+}
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	struct page **p;
+
+	if (maxsize > i->count)
+		maxsize = i->count;
+
+	if (!maxsize)
+		return 0;
+
+	iterate_all_kinds(i, maxsize, v, ({
+		unsigned long addr = (unsigned long)v.iov_base;
+		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
+		int n;
+		int res;
+
+		addr &= ~(PAGE_SIZE - 1);
+		n = DIV_ROUND_UP(len, PAGE_SIZE);
+		p = get_pages_array(n);
+		if (!p)
+			return -ENOMEM;
+		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+		if (unlikely(res < 0)) {
+			kvfree(p);
+			return res;
+		}
+		*pages = p;
+		return (res == n ? len : res * PAGE_SIZE) - *start;
+	0;}),({
+		/* can't be more than PAGE_SIZE */
+		*start = v.bv_offset;
+		*pages = p = get_pages_array(1);
+		if (!p)
+			return -ENOMEM;
+		get_page(*p = v.bv_page);
+		return v.bv_len;
+	}),({
+		return -EFAULT;
+	})
+	)
+	return 0;
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
+			       struct iov_iter *i)
+{
+	char *to = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_from_user(v.iov_base, 
+					       (to += v.iov_len) - v.iov_len,
+					       v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck(p + v.bv_offset,
+						 (to += v.bv_len) - v.bv_len,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck(v.iov_base,
+						 (to += v.iov_len) - v.iov_len,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_from_iter);
+
+size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
+			     struct iov_iter *i)
+{
+	char *from = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
+					     v.iov_base, 
+					     v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
+						 p + v.bv_offset,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
+						 v.iov_base,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_to_iter);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	size_t size = i->count;
+	int npages = 0;
+
+	if (!size)
+		return 0;
+
+	iterate_all_kinds(i, size, v, ({
+		unsigned long p = (unsigned long)v.iov_base;
+		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
+			- p / PAGE_SIZE;
+		if (npages >= maxpages)
+			return maxpages;
+	0;}),({
+		npages++;
+		if (npages >= maxpages)
+			return maxpages;
+	}),({
+		unsigned long p = (unsigned long)v.iov_base;
+		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
+			- p / PAGE_SIZE;
+		if (npages >= maxpages)
+			return maxpages;
+	})
+	)
+	return npages;
+}
+EXPORT_SYMBOL(iov_iter_npages);
+
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+{
+	*new = *old;
+	if (new->type & ITER_BVEC)
+		return new->bvec = kmemdup(new->bvec,
+				    new->nr_segs * sizeof(struct bio_vec),
+				    flags);
+	else
+		/* iovec and kvec have identical layout */
+		return new->iov = kmemdup(new->iov,
+				   new->nr_segs * sizeof(struct iovec),
+				   flags);
+}
+EXPORT_SYMBOL(dup_iter);
diff --git a/mm/Makefile b/mm/Makefile
index 3c1caa2..15dbe99 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -21,7 +21,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
 			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
-			   iov_iter.o debug.o $(mmu-y)
+			   debug.o $(mmu-y)
 
 obj-y += init-mm.o
 
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
deleted file mode 100644
index 9d96e283..0000000
--- a/mm/iov_iter.c
+++ /dev/null
@@ -1,768 +0,0 @@
-#include <linux/export.h>
-#include <linux/uio.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-
-#define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
-	size_t left;					\
-	size_t wanted = n;				\
-	__p = i->iov;					\
-	__v.iov_len = min(n, __p->iov_len - skip);	\
-	if (likely(__v.iov_len)) {			\
-		__v.iov_base = __p->iov_base + skip;	\
-		left = (STEP);				\
-		__v.iov_len -= left;			\
-		skip += __v.iov_len;			\
-		n -= __v.iov_len;			\
-	} else {					\
-		left = 0;				\
-	}						\
-	while (unlikely(!left && n)) {			\
-		__p++;					\
-		__v.iov_len = min(n, __p->iov_len);	\
-		if (unlikely(!__v.iov_len))		\
-			continue;			\
-		__v.iov_base = __p->iov_base;		\
-		left = (STEP);				\
-		__v.iov_len -= left;			\
-		skip = __v.iov_len;			\
-		n -= __v.iov_len;			\
-	}						\
-	n = wanted - n;					\
-}
-
-#define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
-	size_t wanted = n;				\
-	__p = i->kvec;					\
-	__v.iov_len = min(n, __p->iov_len - skip);	\
-	if (likely(__v.iov_len)) {			\
-		__v.iov_base = __p->iov_base + skip;	\
-		(void)(STEP);				\
-		skip += __v.iov_len;			\
-		n -= __v.iov_len;			\
-	}						\
-	while (unlikely(n)) {				\
-		__p++;					\
-		__v.iov_len = min(n, __p->iov_len);	\
-		if (unlikely(!__v.iov_len))		\
-			continue;			\
-		__v.iov_base = __p->iov_base;		\
-		(void)(STEP);				\
-		skip = __v.iov_len;			\
-		n -= __v.iov_len;			\
-	}						\
-	n = wanted;					\
-}
-
-#define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
-	size_t wanted = n;				\
-	__p = i->bvec;					\
-	__v.bv_len = min_t(size_t, n, __p->bv_len - skip);	\
-	if (likely(__v.bv_len)) {			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset + skip; 	\
-		(void)(STEP);				\
-		skip += __v.bv_len;			\
-		n -= __v.bv_len;			\
-	}						\
-	while (unlikely(n)) {				\
-		__p++;					\
-		__v.bv_len = min_t(size_t, n, __p->bv_len);	\
-		if (unlikely(!__v.bv_len))		\
-			continue;			\
-		__v.bv_page = __p->bv_page;		\
-		__v.bv_offset = __p->bv_offset;		\
-		(void)(STEP);				\
-		skip = __v.bv_len;			\
-		n -= __v.bv_len;			\
-	}						\
-	n = wanted;					\
-}
-
-#define iterate_all_kinds(i, n, v, I, B, K) {			\
-	size_t skip = i->iov_offset;				\
-	if (unlikely(i->type & ITER_BVEC)) {			\
-		const struct bio_vec *bvec;			\
-		struct bio_vec v;				\
-		iterate_bvec(i, n, v, bvec, skip, (B))		\
-	} else if (unlikely(i->type & ITER_KVEC)) {		\
-		const struct kvec *kvec;			\
-		struct kvec v;					\
-		iterate_kvec(i, n, v, kvec, skip, (K))		\
-	} else {						\
-		const struct iovec *iov;			\
-		struct iovec v;					\
-		iterate_iovec(i, n, v, iov, skip, (I))		\
-	}							\
-}
-
-#define iterate_and_advance(i, n, v, I, B, K) {			\
-	size_t skip = i->iov_offset;				\
-	if (unlikely(i->type & ITER_BVEC)) {			\
-		const struct bio_vec *bvec;			\
-		struct bio_vec v;				\
-		iterate_bvec(i, n, v, bvec, skip, (B))		\
-		if (skip == bvec->bv_len) {			\
-			bvec++;					\
-			skip = 0;				\
-		}						\
-		i->nr_segs -= bvec - i->bvec;			\
-		i->bvec = bvec;					\
-	} else if (unlikely(i->type & ITER_KVEC)) {		\
-		const struct kvec *kvec;			\
-		struct kvec v;					\
-		iterate_kvec(i, n, v, kvec, skip, (K))		\
-		if (skip == kvec->iov_len) {			\
-			kvec++;					\
-			skip = 0;				\
-		}						\
-		i->nr_segs -= kvec - i->kvec;			\
-		i->kvec = kvec;					\
-	} else {						\
-		const struct iovec *iov;			\
-		struct iovec v;					\
-		iterate_iovec(i, n, v, iov, skip, (I))		\
-		if (skip == iov->iov_len) {			\
-			iov++;					\
-			skip = 0;				\
-		}						\
-		i->nr_segs -= iov - i->iov;			\
-		i->iov = iov;					\
-	}							\
-	i->count -= n;						\
-	i->iov_offset = skip;					\
-}
-
-static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
-{
-	size_t skip, copy, left, wanted;
-	const struct iovec *iov;
-	char __user *buf;
-	void *kaddr, *from;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	iov = i->iov;
-	skip = i->iov_offset;
-	buf = iov->iov_base + skip;
-	copy = min(bytes, iov->iov_len - skip);
-
-	if (!fault_in_pages_writeable(buf, copy)) {
-		kaddr = kmap_atomic(page);
-		from = kaddr + offset;
-
-		/* first chunk, usually the only one */
-		left = __copy_to_user_inatomic(buf, from, copy);
-		copy -= left;
-		skip += copy;
-		from += copy;
-		bytes -= copy;
-
-		while (unlikely(!left && bytes)) {
-			iov++;
-			buf = iov->iov_base;
-			copy = min(bytes, iov->iov_len);
-			left = __copy_to_user_inatomic(buf, from, copy);
-			copy -= left;
-			skip = copy;
-			from += copy;
-			bytes -= copy;
-		}
-		if (likely(!bytes)) {
-			kunmap_atomic(kaddr);
-			goto done;
-		}
-		offset = from - kaddr;
-		buf += copy;
-		kunmap_atomic(kaddr);
-		copy = min(bytes, iov->iov_len - skip);
-	}
-	/* Too bad - revert to non-atomic kmap */
-	kaddr = kmap(page);
-	from = kaddr + offset;
-	left = __copy_to_user(buf, from, copy);
-	copy -= left;
-	skip += copy;
-	from += copy;
-	bytes -= copy;
-	while (unlikely(!left && bytes)) {
-		iov++;
-		buf = iov->iov_base;
-		copy = min(bytes, iov->iov_len);
-		left = __copy_to_user(buf, from, copy);
-		copy -= left;
-		skip = copy;
-		from += copy;
-		bytes -= copy;
-	}
-	kunmap(page);
-done:
-	if (skip == iov->iov_len) {
-		iov++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= iov - i->iov;
-	i->iov = iov;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
-static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
-{
-	size_t skip, copy, left, wanted;
-	const struct iovec *iov;
-	char __user *buf;
-	void *kaddr, *to;
-
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	wanted = bytes;
-	iov = i->iov;
-	skip = i->iov_offset;
-	buf = iov->iov_base + skip;
-	copy = min(bytes, iov->iov_len - skip);
-
-	if (!fault_in_pages_readable(buf, copy)) {
-		kaddr = kmap_atomic(page);
-		to = kaddr + offset;
-
-		/* first chunk, usually the only one */
-		left = __copy_from_user_inatomic(to, buf, copy);
-		copy -= left;
-		skip += copy;
-		to += copy;
-		bytes -= copy;
-
-		while (unlikely(!left && bytes)) {
-			iov++;
-			buf = iov->iov_base;
-			copy = min(bytes, iov->iov_len);
-			left = __copy_from_user_inatomic(to, buf, copy);
-			copy -= left;
-			skip = copy;
-			to += copy;
-			bytes -= copy;
-		}
-		if (likely(!bytes)) {
-			kunmap_atomic(kaddr);
-			goto done;
-		}
-		offset = to - kaddr;
-		buf += copy;
-		kunmap_atomic(kaddr);
-		copy = min(bytes, iov->iov_len - skip);
-	}
-	/* Too bad - revert to non-atomic kmap */
-	kaddr = kmap(page);
-	to = kaddr + offset;
-	left = __copy_from_user(to, buf, copy);
-	copy -= left;
-	skip += copy;
-	to += copy;
-	bytes -= copy;
-	while (unlikely(!left && bytes)) {
-		iov++;
-		buf = iov->iov_base;
-		copy = min(bytes, iov->iov_len);
-		left = __copy_from_user(to, buf, copy);
-		copy -= left;
-		skip = copy;
-		to += copy;
-		bytes -= copy;
-	}
-	kunmap(page);
-done:
-	if (skip == iov->iov_len) {
-		iov++;
-		skip = 0;
-	}
-	i->count -= wanted - bytes;
-	i->nr_segs -= iov - i->iov;
-	i->iov = iov;
-	i->iov_offset = skip;
-	return wanted - bytes;
-}
-
-/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
-	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-		return fault_in_pages_readable(buf, bytes);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-void iov_iter_init(struct iov_iter *i, int direction,
-			const struct iovec *iov, unsigned long nr_segs,
-			size_t count)
-{
-	/* It will get better.  Eventually... */
-	if (segment_eq(get_fs(), KERNEL_DS)) {
-		direction |= ITER_KVEC;
-		i->type = direction;
-		i->kvec = (struct kvec *)iov;
-	} else {
-		i->type = direction;
-		i->iov = iov;
-	}
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_init);
-
-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
-{
-	char *from = kmap_atomic(page);
-	memcpy(to, from + offset, len);
-	kunmap_atomic(from);
-}
-
-static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
-{
-	char *to = kmap_atomic(page);
-	memcpy(to + offset, from, len);
-	kunmap_atomic(to);
-}
-
-static void memzero_page(struct page *page, size_t offset, size_t len)
-{
-	char *addr = kmap_atomic(page);
-	memset(addr + offset, 0, len);
-	kunmap_atomic(addr);
-}
-
-size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	char *from = addr;
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	iterate_and_advance(i, bytes, v,
-		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
-			       v.iov_len),
-		memcpy_to_page(v.bv_page, v.bv_offset,
-			       (from += v.bv_len) - v.bv_len, v.bv_len),
-		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
-	)
-
-	return bytes;
-}
-EXPORT_SYMBOL(copy_to_iter);
-
-size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
-{
-	char *to = addr;
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	iterate_and_advance(i, bytes, v,
-		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
-				 v.iov_len),
-		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len),
-		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
-	)
-
-	return bytes;
-}
-EXPORT_SYMBOL(copy_from_iter);
-
-size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
-{
-	char *to = addr;
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	iterate_and_advance(i, bytes, v,
-		__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
-					 v.iov_base, v.iov_len),
-		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len),
-		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
-	)
-
-	return bytes;
-}
-EXPORT_SYMBOL(copy_from_iter_nocache);
-
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
-{
-	if (i->type & (ITER_BVEC|ITER_KVEC)) {
-		void *kaddr = kmap_atomic(page);
-		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
-		kunmap_atomic(kaddr);
-		return wanted;
-	} else
-		return copy_page_to_iter_iovec(page, offset, bytes, i);
-}
-EXPORT_SYMBOL(copy_page_to_iter);
-
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
-{
-	if (i->type & (ITER_BVEC|ITER_KVEC)) {
-		void *kaddr = kmap_atomic(page);
-		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
-		kunmap_atomic(kaddr);
-		return wanted;
-	} else
-		return copy_page_from_iter_iovec(page, offset, bytes, i);
-}
-EXPORT_SYMBOL(copy_page_from_iter);
-
-size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
-{
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	iterate_and_advance(i, bytes, v,
-		__clear_user(v.iov_base, v.iov_len),
-		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
-		memset(v.iov_base, 0, v.iov_len)
-	)
-
-	return bytes;
-}
-EXPORT_SYMBOL(iov_iter_zero);
-
-size_t iov_iter_copy_from_user_atomic(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
-	iterate_all_kinds(i, bytes, v,
-		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
-					  v.iov_base, v.iov_len),
-		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len),
-		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
-	)
-	kunmap_atomic(kaddr);
-	return bytes;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
-
-void iov_iter_advance(struct iov_iter *i, size_t size)
-{
-	iterate_and_advance(i, size, v, 0, 0, 0)
-}
-EXPORT_SYMBOL(iov_iter_advance);
-
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
-	if (i->nr_segs == 1)
-		return i->count;
-	else if (i->type & ITER_BVEC)
-		return min(i->count, i->bvec->bv_len - i->iov_offset);
-	else
-		return min(i->count, i->iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
-void iov_iter_kvec(struct iov_iter *i, int direction,
-			const struct kvec *kvec, unsigned long nr_segs,
-			size_t count)
-{
-	BUG_ON(!(direction & ITER_KVEC));
-	i->type = direction;
-	i->kvec = kvec;
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_kvec);
-
-void iov_iter_bvec(struct iov_iter *i, int direction,
-			const struct bio_vec *bvec, unsigned long nr_segs,
-			size_t count)
-{
-	BUG_ON(!(direction & ITER_BVEC));
-	i->type = direction;
-	i->bvec = bvec;
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count;
-}
-EXPORT_SYMBOL(iov_iter_bvec);
-
-unsigned long iov_iter_alignment(const struct iov_iter *i)
-{
-	unsigned long res = 0;
-	size_t size = i->count;
-
-	if (!size)
-		return 0;
-
-	iterate_all_kinds(i, size, v,
-		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
-		res |= v.bv_offset | v.bv_len,
-		res |= (unsigned long)v.iov_base | v.iov_len
-	)
-	return res;
-}
-EXPORT_SYMBOL(iov_iter_alignment);
-
-ssize_t iov_iter_get_pages(struct iov_iter *i,
-		   struct page **pages, size_t maxsize, unsigned maxpages,
-		   size_t *start)
-{
-	if (maxsize > i->count)
-		maxsize = i->count;
-
-	if (!maxsize)
-		return 0;
-
-	iterate_all_kinds(i, maxsize, v, ({
-		unsigned long addr = (unsigned long)v.iov_base;
-		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
-		int n;
-		int res;
-
-		if (len > maxpages * PAGE_SIZE)
-			len = maxpages * PAGE_SIZE;
-		addr &= ~(PAGE_SIZE - 1);
-		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
-		if (unlikely(res < 0))
-			return res;
-		return (res == n ? len : res * PAGE_SIZE) - *start;
-	0;}),({
-		/* can't be more than PAGE_SIZE */
-		*start = v.bv_offset;
-		get_page(*pages = v.bv_page);
-		return v.bv_len;
-	}),({
-		return -EFAULT;
-	})
-	)
-	return 0;
-}
-EXPORT_SYMBOL(iov_iter_get_pages);
-
-static struct page **get_pages_array(size_t n)
-{
-	struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
-	if (!p)
-		p = vmalloc(n * sizeof(struct page *));
-	return p;
-}
-
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
-		   struct page ***pages, size_t maxsize,
-		   size_t *start)
-{
-	struct page **p;
-
-	if (maxsize > i->count)
-		maxsize = i->count;
-
-	if (!maxsize)
-		return 0;
-
-	iterate_all_kinds(i, maxsize, v, ({
-		unsigned long addr = (unsigned long)v.iov_base;
-		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
-		int n;
-		int res;
-
-		addr &= ~(PAGE_SIZE - 1);
-		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		p = get_pages_array(n);
-		if (!p)
-			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
-		if (unlikely(res < 0)) {
-			kvfree(p);
-			return res;
-		}
-		*pages = p;
-		return (res == n ? len : res * PAGE_SIZE) - *start;
-	0;}),({
-		/* can't be more than PAGE_SIZE */
-		*start = v.bv_offset;
-		*pages = p = get_pages_array(1);
-		if (!p)
-			return -ENOMEM;
-		get_page(*p = v.bv_page);
-		return v.bv_len;
-	}),({
-		return -EFAULT;
-	})
-	)
-	return 0;
-}
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
-
-size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
-			       struct iov_iter *i)
-{
-	char *to = addr;
-	__wsum sum, next;
-	size_t off = 0;
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	sum = *csum;
-	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
-		next = csum_and_copy_from_user(v.iov_base, 
-					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, 0, &err);
-		if (!err) {
-			sum = csum_block_add(sum, next, off);
-			off += v.iov_len;
-		}
-		err ? v.iov_len : 0;
-	}), ({
-		char *p = kmap_atomic(v.bv_page);
-		next = csum_partial_copy_nocheck(p + v.bv_offset,
-						 (to += v.bv_len) - v.bv_len,
-						 v.bv_len, 0);
-		kunmap_atomic(p);
-		sum = csum_block_add(sum, next, off);
-		off += v.bv_len;
-	}),({
-		next = csum_partial_copy_nocheck(v.iov_base,
-						 (to += v.iov_len) - v.iov_len,
-						 v.iov_len, 0);
-		sum = csum_block_add(sum, next, off);
-		off += v.iov_len;
-	})
-	)
-	*csum = sum;
-	return bytes;
-}
-EXPORT_SYMBOL(csum_and_copy_from_iter);
-
-size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
-			     struct iov_iter *i)
-{
-	char *from = addr;
-	__wsum sum, next;
-	size_t off = 0;
-	if (unlikely(bytes > i->count))
-		bytes = i->count;
-
-	if (unlikely(!bytes))
-		return 0;
-
-	sum = *csum;
-	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
-		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
-					     v.iov_base, 
-					     v.iov_len, 0, &err);
-		if (!err) {
-			sum = csum_block_add(sum, next, off);
-			off += v.iov_len;
-		}
-		err ? v.iov_len : 0;
-	}), ({
-		char *p = kmap_atomic(v.bv_page);
-		next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
-						 p + v.bv_offset,
-						 v.bv_len, 0);
-		kunmap_atomic(p);
-		sum = csum_block_add(sum, next, off);
-		off += v.bv_len;
-	}),({
-		next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
-						 v.iov_base,
-						 v.iov_len, 0);
-		sum = csum_block_add(sum, next, off);
-		off += v.iov_len;
-	})
-	)
-	*csum = sum;
-	return bytes;
-}
-EXPORT_SYMBOL(csum_and_copy_to_iter);
-
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
-{
-	size_t size = i->count;
-	int npages = 0;
-
-	if (!size)
-		return 0;
-
-	iterate_all_kinds(i, size, v, ({
-		unsigned long p = (unsigned long)v.iov_base;
-		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
-			- p / PAGE_SIZE;
-		if (npages >= maxpages)
-			return maxpages;
-	0;}),({
-		npages++;
-		if (npages >= maxpages)
-			return maxpages;
-	}),({
-		unsigned long p = (unsigned long)v.iov_base;
-		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
-			- p / PAGE_SIZE;
-		if (npages >= maxpages)
-			return maxpages;
-	})
-	)
-	return npages;
-}
-EXPORT_SYMBOL(iov_iter_npages);
-
-const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
-{
-	*new = *old;
-	if (new->type & ITER_BVEC)
-		return new->bvec = kmemdup(new->bvec,
-				    new->nr_segs * sizeof(struct bio_vec),
-				    flags);
-	else
-		/* iovec and kvec have identical layout */
-		return new->iov = kmemdup(new->iov,
-				   new->nr_segs * sizeof(struct iovec),
-				   flags);
-}
-EXPORT_SYMBOL(dup_iter);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 03/12] gadget/function/f_fs.c: close leaks
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 01/12] new helper: dup_iter() Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 02/12] move iov_iter.c from mm/ to lib/ Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 04/12] gadget/function/f_fs.c: use put iov_iter into io_data Christoph Hellwig
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

If ffs_epfile_io() fails in AIO case, we end up leaking io_data
(and iovec_copy in case of AIO read).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/function/f_fs.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index af98b09..3ab34a2 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -970,6 +970,7 @@ static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb,
 				    unsigned long nr_segs, loff_t loff)
 {
 	struct ffs_io_data *io_data;
+	ssize_t res;
 
 	ENTER();
 
@@ -989,7 +990,10 @@ static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb,
 
 	kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-	return ffs_epfile_io(kiocb->ki_filp, io_data);
+	res = ffs_epfile_io(kiocb->ki_filp, io_data);
+	if (res != -EIOCBQUEUED)
+		kfree(io_data);
+	return res;
 }
 
 static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
@@ -998,6 +1002,7 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
 {
 	struct ffs_io_data *io_data;
 	struct iovec *iovec_copy;
+	ssize_t res;
 
 	ENTER();
 
@@ -1025,7 +1030,12 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
 
 	kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-	return ffs_epfile_io(kiocb->ki_filp, io_data);
+	res = ffs_epfile_io(kiocb->ki_filp, io_data);
+	if (res != -EIOCBQUEUED) {
+		kfree(io_data);
+		kfree(iovec_copy);
+	}
+	return res;
 }
 
 static int
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 04/12] gadget/function/f_fs.c: use put iov_iter into io_data
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (2 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 03/12] gadget/function/f_fs.c: close leaks Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 05/12] gadget/function/f_fs.c: switch to ->{read,write}_iter() Christoph Hellwig
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

both on aio and non-aio sides

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/function/f_fs.c | 86 +++++++++++---------------------------
 1 file changed, 25 insertions(+), 61 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 3ab34a2..98610e4 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -144,10 +144,9 @@ struct ffs_io_data {
 	bool read;
 
 	struct kiocb *kiocb;
-	const struct iovec *iovec;
-	unsigned long nr_segs;
-	char __user *buf;
-	size_t len;
+	struct iov_iter data;
+	const void *to_free;
+	char *buf;
 
 	struct mm_struct *mm;
 	struct work_struct work;
@@ -649,29 +648,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
 					 io_data->req->actual;
 
 	if (io_data->read && ret > 0) {
-		int i;
-		size_t pos = 0;
-
-		/*
-		 * Since req->length may be bigger than io_data->len (after
-		 * being rounded up to maxpacketsize), we may end up with more
-		 * data then user space has space for.
-		 */
-		ret = min_t(int, ret, io_data->len);
-
 		use_mm(io_data->mm);
-		for (i = 0; i < io_data->nr_segs; i++) {
-			size_t len = min_t(size_t, ret - pos,
-					io_data->iovec[i].iov_len);
-			if (!len)
-				break;
-			if (unlikely(copy_to_user(io_data->iovec[i].iov_base,
-						 &io_data->buf[pos], len))) {
-				ret = -EFAULT;
-				break;
-			}
-			pos += len;
-		}
+		ret = copy_to_iter(io_data->buf, ret, &io_data->data);
+		if (iov_iter_count(&io_data->data))
+			ret = -EFAULT;
 		unuse_mm(io_data->mm);
 	}
 
@@ -684,7 +664,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
 	io_data->kiocb->private = NULL;
 	if (io_data->read)
-		kfree(io_data->iovec);
+		kfree(io_data->to_free);
 	kfree(io_data->buf);
 	kfree(io_data);
 }
@@ -743,6 +723,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 		 * before the waiting completes, so do not assign to 'gadget' earlier
 		 */
 		struct usb_gadget *gadget = epfile->ffs->gadget;
+		size_t copied;
 
 		spin_lock_irq(&epfile->ffs->eps_lock);
 		/* In the meantime, endpoint got disabled or changed. */
@@ -750,34 +731,21 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 			spin_unlock_irq(&epfile->ffs->eps_lock);
 			return -ESHUTDOWN;
 		}
+		data_len = iov_iter_count(&io_data->data);
 		/*
 		 * Controller may require buffer size to be aligned to
 		 * maxpacketsize of an out endpoint.
 		 */
-		data_len = io_data->read ?
-			   usb_ep_align_maybe(gadget, ep->ep, io_data->len) :
-			   io_data->len;
+		if (io_data->read)
+			data_len = usb_ep_align_maybe(gadget, ep->ep, data_len);
 		spin_unlock_irq(&epfile->ffs->eps_lock);
 
 		data = kmalloc(data_len, GFP_KERNEL);
 		if (unlikely(!data))
 			return -ENOMEM;
-		if (io_data->aio && !io_data->read) {
-			int i;
-			size_t pos = 0;
-			for (i = 0; i < io_data->nr_segs; i++) {
-				if (unlikely(copy_from_user(&data[pos],
-					     io_data->iovec[i].iov_base,
-					     io_data->iovec[i].iov_len))) {
-					ret = -EFAULT;
-					goto error;
-				}
-				pos += io_data->iovec[i].iov_len;
-			}
-		} else {
-			if (!io_data->read &&
-			    unlikely(__copy_from_user(data, io_data->buf,
-						      io_data->len))) {
+		if (!io_data->read) {
+			copied = copy_from_iter(data, data_len, &io_data->data);
+			if (copied != data_len) {
 				ret = -EFAULT;
 				goto error;
 			}
@@ -876,10 +844,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
 				 */
 				ret = ep->status;
 				if (io_data->read && ret > 0) {
-					ret = min_t(size_t, ret, io_data->len);
-
-					if (unlikely(copy_to_user(io_data->buf,
-						data, ret)))
+					ret = copy_to_iter(data, ret, &io_data->data);
+					if (unlikely(iov_iter_count(&io_data->data)))
 						ret = -EFAULT;
 				}
 			}
@@ -903,13 +869,13 @@ ffs_epfile_write(struct file *file, const char __user *buf, size_t len,
 		 loff_t *ptr)
 {
 	struct ffs_io_data io_data;
+	struct iovec iov = {.iov_base = buf, .iov_len = len};
 
 	ENTER();
 
 	io_data.aio = false;
 	io_data.read = false;
-	io_data.buf = (char * __user)buf;
-	io_data.len = len;
+	iov_iter_init(&io_data.data, WRITE, &iov, 1, len);
 
 	return ffs_epfile_io(file, &io_data);
 }
@@ -918,13 +884,14 @@ static ssize_t
 ffs_epfile_read(struct file *file, char __user *buf, size_t len, loff_t *ptr)
 {
 	struct ffs_io_data io_data;
+	struct iovec iov = {.iov_base = buf, .iov_len = len};
 
 	ENTER();
 
 	io_data.aio = false;
 	io_data.read = true;
-	io_data.buf = buf;
-	io_data.len = len;
+	io_data.to_free = NULL;
+	iov_iter_init(&io_data.data, READ, &iov, 1, len);
 
 	return ffs_epfile_io(file, &io_data);
 }
@@ -981,9 +948,7 @@ static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb,
 	io_data->aio = true;
 	io_data->read = false;
 	io_data->kiocb = kiocb;
-	io_data->iovec = iovec;
-	io_data->nr_segs = nr_segs;
-	io_data->len = kiocb->ki_nbytes;
+	iov_iter_init(&io_data->data, WRITE, iovec, nr_segs, kiocb->ki_nbytes);
 	io_data->mm = current->mm;
 
 	kiocb->private = io_data;
@@ -1021,9 +986,8 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
 	io_data->aio = true;
 	io_data->read = true;
 	io_data->kiocb = kiocb;
-	io_data->iovec = iovec_copy;
-	io_data->nr_segs = nr_segs;
-	io_data->len = kiocb->ki_nbytes;
+	io_data->to_free = iovec_copy;
+	iov_iter_init(&io_data->data, READ, iovec_copy, nr_segs, kiocb->ki_nbytes);
 	io_data->mm = current->mm;
 
 	kiocb->private = io_data;
@@ -1032,8 +996,8 @@ static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
 
 	res = ffs_epfile_io(kiocb->ki_filp, io_data);
 	if (res != -EIOCBQUEUED) {
+		kfree(io_data->to_free);
 		kfree(io_data);
-		kfree(iovec_copy);
 	}
 	return res;
 }
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 05/12] gadget/function/f_fs.c: switch to ->{read,write}_iter()
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (3 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 04/12] gadget/function/f_fs.c: use put iov_iter into io_data Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 06/12] gadgetfs: use-after-free in ->aio_read() Christoph Hellwig
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/function/f_fs.c | 136 ++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 78 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 98610e4..175c995 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -864,38 +864,6 @@ error:
 	return ret;
 }
 
-static ssize_t
-ffs_epfile_write(struct file *file, const char __user *buf, size_t len,
-		 loff_t *ptr)
-{
-	struct ffs_io_data io_data;
-	struct iovec iov = {.iov_base = buf, .iov_len = len};
-
-	ENTER();
-
-	io_data.aio = false;
-	io_data.read = false;
-	iov_iter_init(&io_data.data, WRITE, &iov, 1, len);
-
-	return ffs_epfile_io(file, &io_data);
-}
-
-static ssize_t
-ffs_epfile_read(struct file *file, char __user *buf, size_t len, loff_t *ptr)
-{
-	struct ffs_io_data io_data;
-	struct iovec iov = {.iov_base = buf, .iov_len = len};
-
-	ENTER();
-
-	io_data.aio = false;
-	io_data.read = true;
-	io_data.to_free = NULL;
-	iov_iter_init(&io_data.data, READ, &iov, 1, len);
-
-	return ffs_epfile_io(file, &io_data);
-}
-
 static int
 ffs_epfile_open(struct inode *inode, struct file *file)
 {
@@ -932,72 +900,84 @@ static int ffs_aio_cancel(struct kiocb *kiocb)
 	return value;
 }
 
-static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb,
-				    const struct iovec *iovec,
-				    unsigned long nr_segs, loff_t loff)
+static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 {
-	struct ffs_io_data *io_data;
+	struct ffs_io_data io_data, *p = &io_data;
 	ssize_t res;
 
 	ENTER();
 
-	io_data = kmalloc(sizeof(*io_data), GFP_KERNEL);
-	if (unlikely(!io_data))
-		return -ENOMEM;
+	if (!is_sync_kiocb(kiocb)) {
+		p = kmalloc(sizeof(io_data), GFP_KERNEL);
+		if (unlikely(!p))
+			return -ENOMEM;
+		p->aio = true;
+	} else {
+		p->aio = false;
+	}
 
-	io_data->aio = true;
-	io_data->read = false;
-	io_data->kiocb = kiocb;
-	iov_iter_init(&io_data->data, WRITE, iovec, nr_segs, kiocb->ki_nbytes);
-	io_data->mm = current->mm;
+	p->read = false;
+	p->kiocb = kiocb;
+	p->data = *from;
+	p->mm = current->mm;
 
-	kiocb->private = io_data;
+	kiocb->private = p;
 
 	kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-	res = ffs_epfile_io(kiocb->ki_filp, io_data);
-	if (res != -EIOCBQUEUED)
-		kfree(io_data);
+	res = ffs_epfile_io(kiocb->ki_filp, p);
+	if (res == -EIOCBQUEUED)
+		return res;
+	if (p->aio)
+		kfree(p);
+	else
+		*from = p->data;
 	return res;
 }
 
-static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
-				   const struct iovec *iovec,
-				   unsigned long nr_segs, loff_t loff)
+static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 {
-	struct ffs_io_data *io_data;
-	struct iovec *iovec_copy;
+	struct ffs_io_data io_data, *p = &io_data;
 	ssize_t res;
 
 	ENTER();
 
-	iovec_copy = kmalloc_array(nr_segs, sizeof(*iovec_copy), GFP_KERNEL);
-	if (unlikely(!iovec_copy))
-		return -ENOMEM;
-
-	memcpy(iovec_copy, iovec, sizeof(struct iovec)*nr_segs);
-
-	io_data = kmalloc(sizeof(*io_data), GFP_KERNEL);
-	if (unlikely(!io_data)) {
-		kfree(iovec_copy);
-		return -ENOMEM;
+	if (!is_sync_kiocb(kiocb)) {
+		p = kmalloc(sizeof(io_data), GFP_KERNEL);
+		if (unlikely(!p))
+			return -ENOMEM;
+		p->aio = true;
+	} else {
+		p->aio = false;
 	}
 
-	io_data->aio = true;
-	io_data->read = true;
-	io_data->kiocb = kiocb;
-	io_data->to_free = iovec_copy;
-	iov_iter_init(&io_data->data, READ, iovec_copy, nr_segs, kiocb->ki_nbytes);
-	io_data->mm = current->mm;
+	p->read = true;
+	p->kiocb = kiocb;
+	if (p->aio) {
+		p->to_free = dup_iter(&p->data, to, GFP_KERNEL);
+		if (!p->to_free) {
+			kfree(p);
+			return -ENOMEM;
+		}
+	} else {
+		p->data = *to;
+		p->to_free = NULL;
+	}
+	p->mm = current->mm;
 
-	kiocb->private = io_data;
+	kiocb->private = p;
 
 	kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-	res = ffs_epfile_io(kiocb->ki_filp, io_data);
-	if (res != -EIOCBQUEUED) {
-		kfree(io_data->to_free);
-		kfree(io_data);
+	res = ffs_epfile_io(kiocb->ki_filp, p);
+	if (res == -EIOCBQUEUED)
+		return res;
+
+	if (p->aio) {
+		kfree(p->to_free);
+		kfree(p);
+	} else {
+		*to = p->data;
 	}
 	return res;
 }
@@ -1079,10 +1059,10 @@ static const struct file_operations ffs_epfile_operations = {
 	.llseek =	no_llseek,
 
 	.open =		ffs_epfile_open,
-	.write =	ffs_epfile_write,
-	.read =		ffs_epfile_read,
-	.aio_write =	ffs_epfile_aio_write,
-	.aio_read =	ffs_epfile_aio_read,
+	.write =	new_sync_write,
+	.read =		new_sync_read,
+	.write_iter =	ffs_epfile_write_iter,
+	.read_iter =	ffs_epfile_read_iter,
 	.release =	ffs_epfile_release,
 	.unlocked_ioctl =	ffs_epfile_ioctl,
 };
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 06/12] gadgetfs: use-after-free in ->aio_read()
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (4 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 05/12] gadget/function/f_fs.c: switch to ->{read,write}_iter() Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-03-08 10:07   ` Ming Lei
  2015-02-23 18:00 ` [PATCH 07/12] gadget: switch ep_io_operations to ->read_iter/->write_iter Christoph Hellwig
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel, stable

From: Al Viro <viro@zeniv.linux.org.uk>

AIO_PREAD requests call ->aio_read() with iovec on caller's stack, so if
we are going to access it asynchronously, we'd better get ourselves
a copy - the one on kernel stack of aio_run_iocb() won't be there
anymore.  function/f_fs.c take care of doing that, legacy/inode.c
doesn't...

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/legacy/inode.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index db49ec4..9fbbaa0 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -566,7 +566,6 @@ static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
 		if (total == 0)
 			break;
 	}
-
 	return len;
 }
 
@@ -585,6 +584,7 @@ static void ep_user_copy_worker(struct work_struct *work)
 	aio_complete(iocb, ret, ret);
 
 	kfree(priv->buf);
+	kfree(priv->iv);
 	kfree(priv);
 }
 
@@ -605,6 +605,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 	 */
 	if (priv->iv == NULL || unlikely(req->actual == 0)) {
 		kfree(req->buf);
+		kfree(priv->iv);
 		kfree(priv);
 		iocb->private = NULL;
 		/* aio_complete() reports bytes-transferred _and_ faults */
@@ -640,7 +641,7 @@ ep_aio_rwtail(
 	struct usb_request	*req;
 	ssize_t			value;
 
-	priv = kmalloc(sizeof *priv, GFP_KERNEL);
+	priv = kzalloc(sizeof *priv, GFP_KERNEL);
 	if (!priv) {
 		value = -ENOMEM;
 fail:
@@ -649,7 +650,14 @@ fail:
 	}
 	iocb->private = priv;
 	priv->iocb = iocb;
-	priv->iv = iv;
+	if (iv) {
+		priv->iv = kmemdup(iv, nr_segs * sizeof(struct iovec),
+				   GFP_KERNEL);
+		if (!priv->iv) {
+			kfree(priv);
+			goto fail;
+		}
+	}
 	priv->nr_segs = nr_segs;
 	INIT_WORK(&priv->work, ep_user_copy_worker);
 
@@ -689,6 +697,7 @@ fail:
 	mutex_unlock(&epdata->lock);
 
 	if (unlikely(value)) {
+		kfree(priv->iv);
 		kfree(priv);
 		put_ep(epdata);
 	} else
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 07/12] gadget: switch ep_io_operations to ->read_iter/->write_iter
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (5 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 06/12] gadgetfs: use-after-free in ->aio_read() Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 08/12] fs: remove ki_nbytes Christoph Hellwig
                   ` (5 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

From: Al Viro <viro@zeniv.linux.org.uk>

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/gadget/legacy/inode.c | 355 +++++++++++++++-----------------------
 1 file changed, 141 insertions(+), 214 deletions(-)

diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 9fbbaa0..b825edc 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -363,97 +363,6 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
 	return value;
 }
 
-
-/* handle a synchronous OUT bulk/intr/iso transfer */
-static ssize_t
-ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
-{
-	struct ep_data		*data = fd->private_data;
-	void			*kbuf;
-	ssize_t			value;
-
-	if ((value = get_ready_ep (fd->f_flags, data)) < 0)
-		return value;
-
-	/* halt any endpoint by doing a "wrong direction" i/o call */
-	if (usb_endpoint_dir_in(&data->desc)) {
-		if (usb_endpoint_xfer_isoc(&data->desc)) {
-			mutex_unlock(&data->lock);
-			return -EINVAL;
-		}
-		DBG (data->dev, "%s halt\n", data->name);
-		spin_lock_irq (&data->dev->lock);
-		if (likely (data->ep != NULL))
-			usb_ep_set_halt (data->ep);
-		spin_unlock_irq (&data->dev->lock);
-		mutex_unlock(&data->lock);
-		return -EBADMSG;
-	}
-
-	/* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */
-
-	value = -ENOMEM;
-	kbuf = kmalloc (len, GFP_KERNEL);
-	if (unlikely (!kbuf))
-		goto free1;
-
-	value = ep_io (data, kbuf, len);
-	VDEBUG (data->dev, "%s read %zu OUT, status %d\n",
-		data->name, len, (int) value);
-	if (value >= 0 && copy_to_user (buf, kbuf, value))
-		value = -EFAULT;
-
-free1:
-	mutex_unlock(&data->lock);
-	kfree (kbuf);
-	return value;
-}
-
-/* handle a synchronous IN bulk/intr/iso transfer */
-static ssize_t
-ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
-{
-	struct ep_data		*data = fd->private_data;
-	void			*kbuf;
-	ssize_t			value;
-
-	if ((value = get_ready_ep (fd->f_flags, data)) < 0)
-		return value;
-
-	/* halt any endpoint by doing a "wrong direction" i/o call */
-	if (!usb_endpoint_dir_in(&data->desc)) {
-		if (usb_endpoint_xfer_isoc(&data->desc)) {
-			mutex_unlock(&data->lock);
-			return -EINVAL;
-		}
-		DBG (data->dev, "%s halt\n", data->name);
-		spin_lock_irq (&data->dev->lock);
-		if (likely (data->ep != NULL))
-			usb_ep_set_halt (data->ep);
-		spin_unlock_irq (&data->dev->lock);
-		mutex_unlock(&data->lock);
-		return -EBADMSG;
-	}
-
-	/* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */
-
-	value = -ENOMEM;
-	kbuf = memdup_user(buf, len);
-	if (IS_ERR(kbuf)) {
-		value = PTR_ERR(kbuf);
-		kbuf = NULL;
-		goto free1;
-	}
-
-	value = ep_io (data, kbuf, len);
-	VDEBUG (data->dev, "%s write %zu IN, status %d\n",
-		data->name, len, (int) value);
-free1:
-	mutex_unlock(&data->lock);
-	kfree (kbuf);
-	return value;
-}
-
 static int
 ep_release (struct inode *inode, struct file *fd)
 {
@@ -517,8 +426,8 @@ struct kiocb_priv {
 	struct mm_struct	*mm;
 	struct work_struct	work;
 	void			*buf;
-	const struct iovec	*iv;
-	unsigned long		nr_segs;
+	struct iov_iter		to;
+	const void		*to_free;
 	unsigned		actual;
 };
 
@@ -541,34 +450,6 @@ static int ep_aio_cancel(struct kiocb *iocb)
 	return value;
 }
 
-static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
-{
-	ssize_t			len, total;
-	void			*to_copy;
-	int			i;
-
-	/* copy stuff into user buffers */
-	total = priv->actual;
-	len = 0;
-	to_copy = priv->buf;
-	for (i=0; i < priv->nr_segs; i++) {
-		ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total);
-
-		if (copy_to_user(priv->iv[i].iov_base, to_copy, this)) {
-			if (len == 0)
-				len = -EFAULT;
-			break;
-		}
-
-		total -= this;
-		len += this;
-		to_copy += this;
-		if (total == 0)
-			break;
-	}
-	return len;
-}
-
 static void ep_user_copy_worker(struct work_struct *work)
 {
 	struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work);
@@ -577,14 +458,16 @@ static void ep_user_copy_worker(struct work_struct *work)
 	size_t ret;
 
 	use_mm(mm);
-	ret = ep_copy_to_user(priv);
+	ret = copy_to_iter(priv->buf, priv->actual, &priv->to);
 	unuse_mm(mm);
+	if (!ret)
+		ret = -EFAULT;
 
 	/* completing the iocb can drop the ctx and mm, don't touch mm after */
 	aio_complete(iocb, ret, ret);
 
 	kfree(priv->buf);
-	kfree(priv->iv);
+	kfree(priv->to_free);
 	kfree(priv);
 }
 
@@ -603,9 +486,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 	 * don't need to copy anything to userspace, so we can
 	 * complete the aio request immediately.
 	 */
-	if (priv->iv == NULL || unlikely(req->actual == 0)) {
+	if (priv->to_free == NULL || unlikely(req->actual == 0)) {
 		kfree(req->buf);
-		kfree(priv->iv);
+		kfree(priv->to_free);
 		kfree(priv);
 		iocb->private = NULL;
 		/* aio_complete() reports bytes-transferred _and_ faults */
@@ -619,6 +502,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 
 		priv->buf = req->buf;
 		priv->actual = req->actual;
+		INIT_WORK(&priv->work, ep_user_copy_worker);
 		schedule_work(&priv->work);
 	}
 	spin_unlock(&epdata->dev->lock);
@@ -627,45 +511,17 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 	put_ep(epdata);
 }
 
-static ssize_t
-ep_aio_rwtail(
-	struct kiocb	*iocb,
-	char		*buf,
-	size_t		len,
-	struct ep_data	*epdata,
-	const struct iovec *iv,
-	unsigned long	nr_segs
-)
+static ssize_t ep_aio(struct kiocb *iocb,
+		      struct kiocb_priv *priv,
+		      struct ep_data *epdata,
+		      char *buf,
+		      size_t len)
 {
-	struct kiocb_priv	*priv;
-	struct usb_request	*req;
-	ssize_t			value;
+	struct usb_request *req;
+	ssize_t value;
 
-	priv = kzalloc(sizeof *priv, GFP_KERNEL);
-	if (!priv) {
-		value = -ENOMEM;
-fail:
-		kfree(buf);
-		return value;
-	}
 	iocb->private = priv;
 	priv->iocb = iocb;
-	if (iv) {
-		priv->iv = kmemdup(iv, nr_segs * sizeof(struct iovec),
-				   GFP_KERNEL);
-		if (!priv->iv) {
-			kfree(priv);
-			goto fail;
-		}
-	}
-	priv->nr_segs = nr_segs;
-	INIT_WORK(&priv->work, ep_user_copy_worker);
-
-	value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
-	if (unlikely(value < 0)) {
-		kfree(priv);
-		goto fail;
-	}
 
 	kiocb_set_cancel_fn(iocb, ep_aio_cancel);
 	get_ep(epdata);
@@ -677,76 +533,147 @@ fail:
 	 * allocate or submit those if the host disconnected.
 	 */
 	spin_lock_irq(&epdata->dev->lock);
-	if (likely(epdata->ep)) {
-		req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
-		if (likely(req)) {
-			priv->req = req;
-			req->buf = buf;
-			req->length = len;
-			req->complete = ep_aio_complete;
-			req->context = iocb;
-			value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC);
-			if (unlikely(0 != value))
-				usb_ep_free_request(epdata->ep, req);
-		} else
-			value = -EAGAIN;
-	} else
-		value = -ENODEV;
-	spin_unlock_irq(&epdata->dev->lock);
+	value = -ENODEV;
+	if (unlikely(epdata->ep))
+		goto fail;
 
-	mutex_unlock(&epdata->lock);
+	req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
+	value = -ENOMEM;
+	if (unlikely(!req))
+		goto fail;
 
-	if (unlikely(value)) {
-		kfree(priv->iv);
-		kfree(priv);
-		put_ep(epdata);
-	} else
-		value = -EIOCBQUEUED;
+	priv->req = req;
+	req->buf = buf;
+	req->length = len;
+	req->complete = ep_aio_complete;
+	req->context = iocb;
+	value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC);
+	if (unlikely(0 != value)) {
+		usb_ep_free_request(epdata->ep, req);
+		goto fail;
+	}
+	spin_unlock_irq(&epdata->dev->lock);
+	return -EIOCBQUEUED;
+
+fail:
+	spin_unlock_irq(&epdata->dev->lock);
+	kfree(priv->to_free);
+	kfree(priv);
+	put_ep(epdata);
 	return value;
 }
 
 static ssize_t
-ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t o)
+ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
-	struct ep_data		*epdata = iocb->ki_filp->private_data;
-	char			*buf;
+	struct file *file = iocb->ki_filp;
+	struct ep_data *epdata = file->private_data;
+	size_t len = iov_iter_count(to);
+	ssize_t value;
+	char *buf;
 
-	if (unlikely(usb_endpoint_dir_in(&epdata->desc)))
-		return -EINVAL;
+	if ((value = get_ready_ep(file->f_flags, epdata)) < 0)
+		return value;
 
-	buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
-	if (unlikely(!buf))
-		return -ENOMEM;
+	/* halt any endpoint by doing a "wrong direction" i/o call */
+	if (usb_endpoint_dir_in(&epdata->desc)) {
+		if (usb_endpoint_xfer_isoc(&epdata->desc) ||
+		    !is_sync_kiocb(iocb)) {
+			mutex_unlock(&epdata->lock);
+			return -EINVAL;
+		}
+		DBG (epdata->dev, "%s halt\n", epdata->name);
+		spin_lock_irq(&epdata->dev->lock);
+		if (likely(epdata->ep != NULL))
+			usb_ep_set_halt(epdata->ep);
+		spin_unlock_irq(&epdata->dev->lock);
+		mutex_unlock(&epdata->lock);
+		return -EBADMSG;
+	}
 
-	return ep_aio_rwtail(iocb, buf, iocb->ki_nbytes, epdata, iov, nr_segs);
+	buf = kmalloc(len, GFP_KERNEL);
+	if (unlikely(!buf)) {
+		mutex_unlock(&epdata->lock);
+		return -ENOMEM;
+	}
+	if (is_sync_kiocb(iocb)) {
+		value = ep_io(epdata, buf, len);
+		if (value >= 0 && copy_to_iter(buf, value, to))
+			value = -EFAULT;
+	} else {
+		struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
+		value = -ENOMEM;
+		if (!priv)
+			goto fail;
+		priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL);
+		if (!priv->to_free) {
+			kfree(priv);
+			goto fail;
+		}
+		value = ep_aio(iocb, priv, epdata, buf, len);
+		if (value == -EIOCBQUEUED)
+			buf = NULL;
+	}
+fail:
+	kfree(buf);
+	mutex_unlock(&epdata->lock);
+	return value;
 }
 
 static ssize_t
-ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t o)
+ep_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-	struct ep_data		*epdata = iocb->ki_filp->private_data;
-	char			*buf;
-	size_t			len = 0;
-	int			i = 0;
+	struct file *file = iocb->ki_filp;
+	struct ep_data *epdata = file->private_data;
+	size_t len = iov_iter_count(from);
+	ssize_t value;
+	char *buf;
 
-	if (unlikely(!usb_endpoint_dir_in(&epdata->desc)))
-		return -EINVAL;
+	if ((value = get_ready_ep(file->f_flags, epdata)) < 0)
+		return value;
 
-	buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
-	if (unlikely(!buf))
+	/* halt any endpoint by doing a "wrong direction" i/o call */
+	if (!usb_endpoint_dir_in(&epdata->desc)) {
+		if (usb_endpoint_xfer_isoc(&epdata->desc) ||
+		    !is_sync_kiocb(iocb)) {
+			mutex_unlock(&epdata->lock);
+			return -EINVAL;
+		}
+		DBG (epdata->dev, "%s halt\n", epdata->name);
+		spin_lock_irq(&epdata->dev->lock);
+		if (likely(epdata->ep != NULL))
+			usb_ep_set_halt(epdata->ep);
+		spin_unlock_irq(&epdata->dev->lock);
+		mutex_unlock(&epdata->lock);
+		return -EBADMSG;
+	}
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (unlikely(!buf)) {
+		mutex_unlock(&epdata->lock);
 		return -ENOMEM;
+	}
 
-	for (i=0; i < nr_segs; i++) {
-		if (unlikely(copy_from_user(&buf[len], iov[i].iov_base,
-				iov[i].iov_len) != 0)) {
-			kfree(buf);
-			return -EFAULT;
+	if (unlikely(copy_from_iter(buf, len, from) != len)) {
+		value = -EFAULT;
+		goto out;
+	}
+
+	if (is_sync_kiocb(iocb)) {
+		value = ep_io(epdata, buf, len);
+	} else {
+		struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
+		value = -ENOMEM;
+		if (priv) {
+			value = ep_aio(iocb, priv, epdata, buf, len);
+			if (value == -EIOCBQUEUED)
+				buf = NULL;
 		}
-		len += iov[i].iov_len;
 	}
-	return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
+out:
+	kfree(buf);
+	mutex_unlock(&epdata->lock);
+	return value;
 }
 
 /*----------------------------------------------------------------------*/
@@ -756,13 +683,13 @@ static const struct file_operations ep_io_operations = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
 
-	.read =		ep_read,
-	.write =	ep_write,
+	.read =		new_sync_read,
+	.write =	new_sync_write,
 	.unlocked_ioctl = ep_ioctl,
 	.release =	ep_release,
 
-	.aio_read =	ep_aio_read,
-	.aio_write =	ep_aio_write,
+	.read_iter =	ep_read_iter,
+	.write_iter =	ep_write_iter,
 };
 
 /* ENDPOINT INITIALIZATION
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 08/12] fs: remove ki_nbytes
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (6 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 07/12] gadget: switch ep_io_operations to ->read_iter/->write_iter Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 09/12] fuse: handle synchronous iocbs internally Christoph Hellwig
                   ` (4 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

There is no need to pass the total request length in the kiocb, as
we already get passed in through the iov_iter argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/aio.c               | 34 ++++++++++++++++++----------------
 fs/ceph/file.c         |  2 +-
 fs/nfs/direct.c        |  2 +-
 fs/ocfs2/file.c        |  8 +++-----
 fs/read_write.c        |  8 --------
 fs/udf/file.c          |  2 +-
 include/linux/aio.h    |  1 -
 kernel/printk/printk.c |  2 +-
 mm/page_io.c           |  1 -
 net/socket.c           |  6 +++---
 10 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 118a2e0..667054c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1344,12 +1344,13 @@ typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
 				     int rw, char __user *buf,
 				     unsigned long *nr_segs,
+				     size_t *len,
 				     struct iovec **iovec,
 				     bool compat)
 {
 	ssize_t ret;
 
-	*nr_segs = kiocb->ki_nbytes;
+	*nr_segs = *len;
 
 #ifdef CONFIG_COMPAT
 	if (compat)
@@ -1364,21 +1365,22 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
 	if (ret < 0)
 		return ret;
 
-	/* ki_nbytes now reflect bytes instead of segs */
-	kiocb->ki_nbytes = ret;
+	/* len now reflect bytes instead of segs */
+	*len = ret;
 	return 0;
 }
 
 static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
 				       int rw, char __user *buf,
 				       unsigned long *nr_segs,
+				       size_t len,
 				       struct iovec *iovec)
 {
-	if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
+	if (unlikely(!access_ok(!rw, buf, len)))
 		return -EFAULT;
 
 	iovec->iov_base = buf;
-	iovec->iov_len = kiocb->ki_nbytes;
+	iovec->iov_len = len;
 	*nr_segs = 1;
 	return 0;
 }
@@ -1388,7 +1390,7 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
  *	Performs the initial checks and io submission.
  */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-			    char __user *buf, bool compat)
+			    char __user *buf, size_t len, bool compat)
 {
 	struct file *file = req->ki_filp;
 	ssize_t ret;
@@ -1423,21 +1425,21 @@ rw_common:
 		if (!rw_op && !iter_op)
 			return -EINVAL;
 
-		ret = (opcode == IOCB_CMD_PREADV ||
-		       opcode == IOCB_CMD_PWRITEV)
-			? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
-						&iovec, compat)
-			: aio_setup_single_vector(req, rw, buf, &nr_segs,
-						  iovec);
+		if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
+			ret = aio_setup_vectored_rw(req, rw, buf, &nr_segs,
+						&len, &iovec, compat);
+		else
+			ret = aio_setup_single_vector(req, rw, buf, &nr_segs,
+						  len, iovec);
 		if (!ret)
-			ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+			ret = rw_verify_area(rw, file, &req->ki_pos, len);
 		if (ret < 0) {
 			if (iovec != inline_vecs)
 				kfree(iovec);
 			return ret;
 		}
 
-		req->ki_nbytes = ret;
+		len = ret;
 
 		/* XXX: move/kill - rw_verify_area()? */
 		/* This matches the pread()/pwrite() logic */
@@ -1450,7 +1452,7 @@ rw_common:
 			file_start_write(file);
 
 		if (iter_op) {
-			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+			iov_iter_init(&iter, rw, iovec, nr_segs, len);
 			ret = iter_op(req, &iter);
 		} else {
 			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
@@ -1553,10 +1555,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->ki_obj.user = user_iocb;
 	req->ki_user_data = iocb->aio_data;
 	req->ki_pos = iocb->aio_offset;
-	req->ki_nbytes = iocb->aio_nbytes;
 
 	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
 			   (char __user *)(unsigned long)iocb->aio_buf,
+			   iocb->aio_nbytes,
 			   compat);
 	if (ret)
 		goto out_put_req;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index a3d774b..0a1d0f3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -808,7 +808,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *filp = iocb->ki_filp;
 	struct ceph_file_info *fi = filp->private_data;
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(to);
 	struct inode *inode = file_inode(filp);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct page *pinned_page = NULL;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e907c8c..8d6f3f5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -265,7 +265,7 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 
 	return -EINVAL;
 #else
-	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
 	if (rw == READ)
 		return nfs_file_direct_read(iocb, iter, pos);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 46e0d4e..266845d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2280,7 +2280,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 		file->f_path.dentry->d_name.name,
 		(unsigned int)from->nr_segs);	/* GRRRRR */
 
-	if (iocb->ki_nbytes == 0)
+	if (count == 0)
 		return 0;
 
 	appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2330,8 +2330,7 @@ relock:
 	}
 
 	can_do_direct = direct_io;
-	ret = ocfs2_prepare_inode_for_write(file, ppos,
-					    iocb->ki_nbytes, appending,
+	ret = ocfs2_prepare_inode_for_write(file, ppos, count, appending,
 					    &can_do_direct, &has_refcount);
 	if (ret < 0) {
 		mlog_errno(ret);
@@ -2339,8 +2338,7 @@ relock:
 	}
 
 	if (direct_io && !is_sync_kiocb(iocb))
-		unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
-						      *ppos);
+		unaligned_dio = ocfs2_is_io_unaligned(inode, count, *ppos);
 
 	/*
 	 * We can't complete the direct I/O as requested, fall back to
diff --git a/fs/read_write.c b/fs/read_write.c
index 8e1b687..f8b8fc1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -343,7 +343,6 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);
 
 	iter->type |= READ;
 	ret = file->f_op->read_iter(&kiocb, iter);
@@ -366,7 +365,6 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = iov_iter_count(iter);
 
 	iter->type |= WRITE;
 	ret = file->f_op->write_iter(&kiocb, iter);
@@ -426,7 +424,6 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
 	if (-EIOCBQUEUED == ret)
@@ -446,7 +443,6 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
 	ret = filp->f_op->read_iter(&kiocb, &iter);
@@ -510,7 +506,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
 	if (-EIOCBQUEUED == ret)
@@ -530,7 +525,6 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
 	ret = filp->f_op->write_iter(&kiocb, &iter);
@@ -719,7 +713,6 @@ static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iove
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	iov_iter_init(&iter, rw, iov, nr_segs, len);
 	ret = fn(&kiocb, &iter);
@@ -737,7 +730,6 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = *ppos;
-	kiocb.ki_nbytes = len;
 
 	ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
 	if (ret == -EIOCBQUEUED)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 08f3555..9c0b6da 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	int err, pos;
-	size_t count = iocb->ki_nbytes;
+	size_t count = iov_iter_count(from);
 	struct udf_inode_info *iinfo = UDF_I(inode);
 
 	mutex_lock(&inode->i_mutex);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index d9c92da..132d1ec 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -42,7 +42,6 @@ struct kiocb {
 
 	__u64			ki_user_data;	/* user's data for completion */
 	loff_t			ki_pos;
-	size_t			ki_nbytes;	/* copy of iocb->aio_nbytes */
 
 	struct list_head	ki_list;	/* the aio core uses this
 						 * for cancellation */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 01cfd69..fdd296c 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -521,7 +521,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
 	int i;
 	int level = default_message_loglevel;
 	int facility = 1;	/* LOG_USER */
-	size_t len = iocb->ki_nbytes;
+	size_t len = iov_iter_count(from);
 	ssize_t ret = len;
 
 	if (len > LOG_LINE_MAX)
diff --git a/mm/page_io.c b/mm/page_io.c
index e604580..7ef2157 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -274,7 +274,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
-		kiocb.ki_nbytes = PAGE_SIZE;
 
 		set_page_writeback(page);
 		unlock_page(page);
diff --git a/net/socket.c b/net/socket.c
index bbedbfc..f921455 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -858,11 +858,11 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (iocb->ki_pos != 0)
 		return -ESPIPE;
 
-	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
+	if (!iov_iter_count(to))	/* Match SYS5 behaviour */
 		return 0;
 
 	res = __sock_recvmsg(iocb, sock, &msg,
-			     iocb->ki_nbytes, msg.msg_flags);
+			     iov_iter_count(to), msg.msg_flags);
 	*to = msg.msg_iter;
 	return res;
 }
@@ -883,7 +883,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (sock->type == SOCK_SEQPACKET)
 		msg.msg_flags |= MSG_EOR;
 
-	res = __sock_sendmsg(iocb, sock, &msg, iocb->ki_nbytes);
+	res = __sock_sendmsg(iocb, sock, &msg, iov_iter_count(from));
 	*from = msg.msg_iter;
 	return res;
 }
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 09/12] fuse: handle synchronous iocbs internally
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (7 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 08/12] fs: remove ki_nbytes Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-03-06  2:54   ` Maxim Patlasov
  2015-02-23 18:00 ` [PATCH 10/12] fs: don't allow to complete sync iocbs through aio_complete Christoph Hellwig
                   ` (3 subsequent siblings)
  12 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

Based on a patch from Maxim Patlasov <MPatlasov@parallels.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/fuse/file.c   | 51 +++++++++++++++++++++++++++++++--------------------
 fs/fuse/fuse_i.h |  1 +
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c01ec3b..f81d83e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 	}
 }
 
+static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
+{
+	if (io->err)
+		return io->err;
+
+	if (io->bytes >= 0 && io->write)
+		return -EIO;
+
+	return io->bytes < 0 ? io->size : io->bytes;
+}
+
 /**
  * In case of short read, the caller sets 'pos' to the position of
  * actual end of fuse request in IO request. Otherwise, if bytes_requested
@@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
  */
 static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 {
+	bool is_sync = is_sync_kiocb(io->iocb);
 	int left;
 
 	spin_lock(&io->lock);
@@ -555,27 +567,21 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 		io->bytes = pos;
 
 	left = --io->reqs;
+	if (!left && is_sync)
+		complete(io->done);
 	spin_unlock(&io->lock);
 
-	if (!left) {
-		long res;
+	if (!left && !is_sync) {
+		ssize_t res = fuse_get_res_by_io(io);
 
-		if (io->err)
-			res = io->err;
-		else if (io->bytes >= 0 && io->write)
-			res = -EIO;
-		else {
-			res = io->bytes < 0 ? io->size : io->bytes;
+		if (res >= 0) {
+			struct inode *inode = file_inode(io->iocb->ki_filp);
+			struct fuse_conn *fc = get_fuse_conn(inode);
+			struct fuse_inode *fi = get_fuse_inode(inode);
 
-			if (!is_sync_kiocb(io->iocb)) {
-				struct inode *inode = file_inode(io->iocb->ki_filp);
-				struct fuse_conn *fc = get_fuse_conn(inode);
-				struct fuse_inode *fi = get_fuse_inode(inode);
-
-				spin_lock(&fc->lock);
-				fi->attr_version = ++fc->attr_version;
-				spin_unlock(&fc->lock);
-			}
+			spin_lock(&fc->lock);
+			fi->attr_version = ++fc->attr_version;
+			spin_unlock(&fc->lock);
 		}
 
 		aio_complete(io->iocb, res, 0);
@@ -2801,6 +2807,7 @@ static ssize_t
 fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 			loff_t offset)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
 	struct fuse_file *ff = file->private_data;
@@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
 		io->async = false;
 
+	if (io->async && is_sync_kiocb(iocb))
+		io->done = &wait;
+
 	if (rw == WRITE)
 		ret = __fuse_direct_write(io, iter, &pos);
 	else
@@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		if (!is_sync_kiocb(iocb))
 			return -EIOCBQUEUED;
 
-		ret = wait_on_sync_kiocb(iocb);
-	} else {
-		kfree(io);
+		wait_for_completion(&wait);
+		ret = fuse_get_res_by_io(io);
 	}
 
+	kfree(io);
+
 	if (rw == WRITE) {
 		if (ret > 0)
 			fuse_write_update_size(inode, pos);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 1cdfb07..7354dc1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -263,6 +263,7 @@ struct fuse_io_priv {
 	int err;
 	struct kiocb *iocb;
 	struct file *file;
+	struct completion *done;
 };
 
 /**
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 10/12] fs: don't allow to complete sync iocbs through aio_complete
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (8 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 09/12] fuse: handle synchronous iocbs internally Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 11/12] fs: split generic and aio kiocb Christoph Hellwig
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

The AIO interface is fairly complex because it tries to allow
filesystems to always work async and then wakeup a synchronous
caller through aio_complete.  It turns out that basically no one
was doing this to avoid the complexity and context switches,
and we've already fixed up the remaining users and can now
get rid of this case.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/aio.c            | 24 +-----------------------
 fs/ecryptfs/file.c  |  6 ------
 fs/read_write.c     | 26 ++++++++------------------
 include/linux/aio.h |  4 ----
 net/socket.c        |  9 +++------
 5 files changed, 12 insertions(+), 57 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 667054c..8ca8df1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -778,22 +778,6 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 	return 0;
 }
 
-/* wait_on_sync_kiocb:
- *	Waits on the given sync kiocb to complete.
- */
-ssize_t wait_on_sync_kiocb(struct kiocb *req)
-{
-	while (!req->ki_ctx) {
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (req->ki_ctx)
-			break;
-		io_schedule();
-	}
-	__set_current_state(TASK_RUNNING);
-	return req->ki_user_data;
-}
-EXPORT_SYMBOL(wait_on_sync_kiocb);
-
 /*
  * exit_aio: called when the last user of mm goes away.  At this point, there is
  * no way for any new requests to be submited or any of the io_* syscalls to be
@@ -1025,13 +1009,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	 *    ref, no other paths have a way to get another ref
 	 *  - the sync task helpfully left a reference to itself in the iocb
 	 */
-	if (is_sync_kiocb(iocb)) {
-		iocb->ki_user_data = res;
-		smp_wmb();
-		iocb->ki_ctx = ERR_PTR(-EXDEV);
-		wake_up_process(iocb->ki_obj.tsk);
-		return;
-	}
+	BUG_ON(is_sync_kiocb(iocb));
 
 	if (iocb->ki_list.next) {
 		unsigned long flags;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 6f4e659..a36da88 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -52,12 +52,6 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb,
 	struct file *file = iocb->ki_filp;
 
 	rc = generic_file_read_iter(iocb, to);
-	/*
-	 * Even though this is a async interface, we need to wait
-	 * for IO to finish to update atime
-	 */
-	if (-EIOCBQUEUED == rc)
-		rc = wait_on_sync_kiocb(iocb);
 	if (rc >= 0) {
 		path = ecryptfs_dentry_to_lower_path(file->f_path.dentry);
 		touch_atime(path);
diff --git a/fs/read_write.c b/fs/read_write.c
index f8b8fc1..76e324e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -346,9 +346,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
 	iter->type |= READ;
 	ret = file->f_op->read_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@@ -368,9 +366,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
 
 	iter->type |= WRITE;
 	ret = file->f_op->write_iter(&kiocb, iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
-
+	BUG_ON(ret == -EIOCBQUEUED);
 	if (ret > 0)
 		*ppos = kiocb.ki_pos;
 	return ret;
@@ -426,8 +422,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 	kiocb.ki_pos = *ppos;
 
 	ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -446,8 +441,7 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
 	iov_iter_init(&iter, READ, &iov, 1, len);
 
 	ret = filp->f_op->read_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -508,8 +502,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 	kiocb.ki_pos = *ppos;
 
 	ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -528,8 +521,7 @@ ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, lo
 	iov_iter_init(&iter, WRITE, &iov, 1, len);
 
 	ret = filp->f_op->write_iter(&kiocb, &iter);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -716,8 +708,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iove
 
 	iov_iter_init(&iter, rw, iov, nr_segs, len);
 	ret = fn(&kiocb, &iter);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
@@ -732,8 +723,7 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 	kiocb.ki_pos = *ppos;
 
 	ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
-	if (ret == -EIOCBQUEUED)
-		ret = wait_on_sync_kiocb(&kiocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	*ppos = kiocb.ki_pos;
 	return ret;
 }
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 132d1ec..f851643 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -37,7 +37,6 @@ struct kiocb {
 
 	union {
 		void __user		*user;
-		struct task_struct	*tsk;
 	} ki_obj;
 
 	__u64			ki_user_data;	/* user's data for completion */
@@ -63,13 +62,11 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 	*kiocb = (struct kiocb) {
 			.ki_ctx = NULL,
 			.ki_filp = filp,
-			.ki_obj.tsk = current,
 		};
 }
 
 /* prototypes */
 #ifdef CONFIG_AIO
-extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
 extern void aio_complete(struct kiocb *iocb, long res, long res2);
 struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
@@ -77,7 +74,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr,
 			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
 static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
 struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
diff --git a/net/socket.c b/net/socket.c
index f921455..f6c519d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -633,8 +633,7 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 	init_sync_kiocb(&iocb, NULL);
 	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 		      __sock_sendmsg(&iocb, sock, msg, size);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 
@@ -766,8 +765,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 
 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 EXPORT_SYMBOL(sock_recvmsg);
@@ -780,8 +778,7 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 
 	init_sync_kiocb(&iocb, NULL);
 	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
-	if (-EIOCBQUEUED == ret)
-		ret = wait_on_sync_kiocb(&iocb);
+	BUG_ON(ret == -EIOCBQUEUED);
 	return ret;
 }
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 11/12] fs: split generic and aio kiocb
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (9 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 10/12] fs: don't allow to complete sync iocbs through aio_complete Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 18:00 ` [PATCH 12/12] fs: move struct kiocb to fs.h Christoph Hellwig
  2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

Most callers in the kernel want to perform synchronous file I/O, but
still have to bloat the stack with a full struct kiocb.  Split out
the parts needed in filesystem code from those in the aio code, and
only allocate those needed to pass down argument on the stack.  The
aio code embedds the generic iocb in the one it allocates and can
easily get back to it by using container_of.

Also add a ->ki_complete method to struct kiocb, this is used to call
into the aio code and thus removes the dependency on aio for filesystems
impementing asynchronous operations.  It will also allow other callers
to substitute their own completion callback.

We also add a new ->ki_flags field to work around the nasty layering
violation recently introduced in commit 5e33f6 ("usb: gadget: ffs: add
eventfd notification about ffs events").

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/usb/gadget/function/f_fs.c |  5 +-
 drivers/usb/gadget/legacy/inode.c  |  5 +-
 fs/aio.c                           | 94 ++++++++++++++++++++++++++------------
 fs/direct-io.c                     |  4 +-
 fs/fuse/file.c                     |  2 +-
 fs/nfs/direct.c                    |  2 +-
 include/linux/aio.h                | 46 +++----------------
 7 files changed, 81 insertions(+), 77 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 175c995..b64538b 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -655,9 +655,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
 		unuse_mm(io_data->mm);
 	}
 
-	aio_complete(io_data->kiocb, ret, ret);
+	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
 
-	if (io_data->ffs->ffs_eventfd && !io_data->kiocb->ki_eventfd)
+	if (io_data->ffs->ffs_eventfd &&
+	    !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
 
 	usb_ep_free_request(io_data->ep, io_data->req);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index b825edc..0487284 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -464,7 +464,7 @@ static void ep_user_copy_worker(struct work_struct *work)
 		ret = -EFAULT;
 
 	/* completing the iocb can drop the ctx and mm, don't touch mm after */
-	aio_complete(iocb, ret, ret);
+	iocb->ki_complete(iocb, ret, ret);
 
 	kfree(priv->buf);
 	kfree(priv->to_free);
@@ -492,7 +492,8 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 		kfree(priv);
 		iocb->private = NULL;
 		/* aio_complete() reports bytes-transferred _and_ faults */
-		aio_complete(iocb, req->actual ? req->actual : req->status,
+
+		iocb->ki_complete(iocb, req->actual ? req->actual : req->status,
 				req->status);
 	} else {
 		/* ep_copy_to_user() won't report both; we hide some faults */
diff --git a/fs/aio.c b/fs/aio.c
index 8ca8df1..9582865 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -151,6 +151,38 @@ struct kioctx {
 	unsigned		id;
 };
 
+/*
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
+ */
+#define KIOCB_CANCELLED		((void *) (~0ULL))
+
+struct aio_kiocb {
+	struct kiocb		common;
+
+	struct kioctx		*ki_ctx;
+	kiocb_cancel_fn		*ki_cancel;
+
+	struct iocb __user	*ki_user_iocb;	/* user's aiocb */
+	__u64			ki_user_data;	/* user's data for completion */
+
+	struct list_head	ki_list;	/* the aio core uses this
+						 * for cancellation */
+
+	/*
+	 * If the aio_resfd field of the userspace iocb is not zero,
+	 * this is the underlying eventfd context to deliver events to.
+	 */
+	struct eventfd_ctx	*ki_eventfd;
+};
+
 /*------ sysctl variables----*/
 static DEFINE_SPINLOCK(aio_nr_lock);
 unsigned long aio_nr;		/* current system wide number of aio requests */
@@ -220,7 +252,7 @@ static int __init aio_setup(void)
 	if (IS_ERR(aio_mnt))
 		panic("Failed to create aio fs mount.");
 
-	kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 	kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
 
 	pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
@@ -480,8 +512,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
 {
+	struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
 	struct kioctx *ctx = req->ki_ctx;
 	unsigned long flags;
 
@@ -496,7 +529,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kiocb *kiocb)
+static int kiocb_cancel(struct aio_kiocb *kiocb)
 {
 	kiocb_cancel_fn *old, *cancel;
 
@@ -514,7 +547,7 @@ static int kiocb_cancel(struct kiocb *kiocb)
 		cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
 	} while (cancel != old);
 
-	return cancel(kiocb);
+	return cancel(&kiocb->common);
 }
 
 static void free_ioctx(struct work_struct *work)
@@ -550,13 +583,13 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 static void free_ioctx_users(struct percpu_ref *ref)
 {
 	struct kioctx *ctx = container_of(ref, struct kioctx, users);
-	struct kiocb *req;
+	struct aio_kiocb *req;
 
 	spin_lock_irq(&ctx->ctx_lock);
 
 	while (!list_empty(&ctx->active_reqs)) {
 		req = list_first_entry(&ctx->active_reqs,
-				       struct kiocb, ki_list);
+				       struct aio_kiocb, ki_list);
 
 		list_del_init(&req->ki_list);
 		kiocb_cancel(req);
@@ -932,9 +965,9 @@ static void user_refill_reqs_available(struct kioctx *ctx)
  *	Allocate a slot for an aio request.
  * Returns NULL if no requests are free.
  */
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;
 
 	if (!get_reqs_available(ctx)) {
 		user_refill_reqs_available(ctx);
@@ -955,10 +988,10 @@ out_put:
 	return NULL;
 }
 
-static void kiocb_free(struct kiocb *req)
+static void kiocb_free(struct aio_kiocb *req)
 {
-	if (req->ki_filp)
-		fput(req->ki_filp);
+	if (req->common.ki_filp)
+		fput(req->common.ki_filp);
 	if (req->ki_eventfd != NULL)
 		eventfd_ctx_put(req->ki_eventfd);
 	kmem_cache_free(kiocb_cachep, req);
@@ -994,8 +1027,9 @@ out:
 /* aio_complete
  *	Called when the io request on the given iocb is complete.
  */
-void aio_complete(struct kiocb *iocb, long res, long res2)
+static void aio_complete(struct kiocb *kiocb, long res, long res2)
 {
+	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
 	struct kioctx	*ctx = iocb->ki_ctx;
 	struct aio_ring	*ring;
 	struct io_event	*ev_page, *event;
@@ -1009,7 +1043,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	 *    ref, no other paths have a way to get another ref
 	 *  - the sync task helpfully left a reference to itself in the iocb
 	 */
-	BUG_ON(is_sync_kiocb(iocb));
+	BUG_ON(is_sync_kiocb(kiocb));
 
 	if (iocb->ki_list.next) {
 		unsigned long flags;
@@ -1035,7 +1069,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 	event = ev_page + pos % AIO_EVENTS_PER_PAGE;
 
-	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
+	event->obj = (u64)(unsigned long)iocb->ki_user_iocb;
 	event->data = iocb->ki_user_data;
 	event->res = res;
 	event->res2 = res2;
@@ -1044,7 +1078,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 	flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
 
 	pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
-		 ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
+		 ctx, tail, iocb, iocb->ki_user_iocb, iocb->ki_user_data,
 		 res, res2);
 
 	/* after flagging the request as done, we
@@ -1091,7 +1125,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
 
 	percpu_ref_put(&ctx->reqs);
 }
-EXPORT_SYMBOL(aio_complete);
 
 /* aio_read_events_ring
  *	Pull an event off of the ioctx's event ring.  Returns the number of
@@ -1480,7 +1513,7 @@ rw_common:
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			 struct iocb *iocb, bool compat)
 {
-	struct kiocb *req;
+	struct aio_kiocb *req;
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
@@ -1503,11 +1536,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	if (unlikely(!req))
 		return -EAGAIN;
 
-	req->ki_filp = fget(iocb->aio_fildes);
-	if (unlikely(!req->ki_filp)) {
+	req->common.ki_filp = fget(iocb->aio_fildes);
+	if (unlikely(!req->common.ki_filp)) {
 		ret = -EBADF;
 		goto out_put_req;
 	}
+	req->common.ki_pos = iocb->aio_offset;
+	req->common.ki_complete = aio_complete;
+	req->common.ki_flags = 0;
 
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
@@ -1522,6 +1558,8 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 			req->ki_eventfd = NULL;
 			goto out_put_req;
 		}
+
+		req->common.ki_flags |= IOCB_EVENTFD;
 	}
 
 	ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1530,11 +1568,10 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 	}
 
-	req->ki_obj.user = user_iocb;
+	req->ki_user_iocb = user_iocb;
 	req->ki_user_data = iocb->aio_data;
-	req->ki_pos = iocb->aio_offset;
 
-	ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+	ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
 			   (char __user *)(unsigned long)iocb->aio_buf,
 			   iocb->aio_nbytes,
 			   compat);
@@ -1623,10 +1660,10 @@ SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
 /* lookup_kiocb
  *	Finds a given iocb for cancellation.
  */
-static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
-				  u32 key)
+static struct aio_kiocb *
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
 {
-	struct list_head *pos;
+	struct aio_kiocb *kiocb;
 
 	assert_spin_locked(&ctx->ctx_lock);
 
@@ -1634,9 +1671,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
 		return NULL;
 
 	/* TODO: use a hash or array, this sucks. */
-	list_for_each(pos, &ctx->active_reqs) {
-		struct kiocb *kiocb = list_kiocb(pos);
-		if (kiocb->ki_obj.user == iocb)
+	list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
+		if (kiocb->ki_user_iocb == iocb)
 			return kiocb;
 	}
 	return NULL;
@@ -1656,7 +1692,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 		struct io_event __user *, result)
 {
 	struct kioctx *ctx;
-	struct kiocb *kiocb;
+	struct aio_kiocb *kiocb;
 	u32 key;
 	int ret;
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e181b6b..c38b460 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -265,7 +265,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
 				ret = err;
 		}
 
-		aio_complete(dio->iocb, ret, 0);
+		dio->iocb->ki_complete(dio->iocb, ret, 0);
 	}
 
 	kmem_cache_free(dio_cache, dio);
@@ -1056,7 +1056,7 @@ static inline int drop_refcount(struct dio *dio)
 	 * operation.  AIO can if it was a broken operation described above or
 	 * in fact if all the bios race to complete before we get here.  In
 	 * that case dio_complete() translates the EIOCBQUEUED into the proper
-	 * return code that the caller will hand to aio_complete().
+	 * return code that the caller will hand to ->complete().
 	 *
 	 * This is managed by the bio_lock instead of being an atomic_t so that
 	 * completion paths can drop their ref and use the remaining count to
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f81d83e..a5c5e38 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -584,7 +584,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 			spin_unlock(&fc->lock);
 		}
 
-		aio_complete(io->iocb, res, 0);
+		io->iocb->ki_complete(io->iocb, res, 0);
 		kfree(io);
 	}
 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8d6f3f5..c3929fb 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -393,7 +393,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 		long res = (long) dreq->error;
 		if (!res)
 			res = (long) dreq->count;
-		aio_complete(dreq->iocb, res, 0);
+		dreq->iocb->ki_complete(dreq->iocb, res, 0);
 	}
 
 	complete_all(&dreq->completion);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index f851643..5c40b61 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -14,67 +14,38 @@ struct kiocb;
 
 #define KIOCB_KEY		0
 
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED		((void *) (~0ULL))
-
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
+#define IOCB_EVENTFD		(1 << 0)
+
 struct kiocb {
 	struct file		*ki_filp;
-	struct kioctx		*ki_ctx;	/* NULL for sync ops */
-	kiocb_cancel_fn		*ki_cancel;
-	void			*private;
-
-	union {
-		void __user		*user;
-	} ki_obj;
-
-	__u64			ki_user_data;	/* user's data for completion */
 	loff_t			ki_pos;
-
-	struct list_head	ki_list;	/* the aio core uses this
-						 * for cancellation */
-
-	/*
-	 * If the aio_resfd field of the userspace iocb is not zero,
-	 * this is the underlying eventfd context to deliver events to.
-	 */
-	struct eventfd_ctx	*ki_eventfd;
+	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+	void			*private;
+	int			ki_flags;
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
 {
-	return kiocb->ki_ctx == NULL;
+	return kiocb->ki_complete == NULL;
 }
 
 static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 {
 	*kiocb = (struct kiocb) {
-			.ki_ctx = NULL,
 			.ki_filp = filp,
 		};
 }
 
 /* prototypes */
 #ifdef CONFIG_AIO
-extern void aio_complete(struct kiocb *iocb, long res, long res2);
 struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
 			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
 struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
@@ -84,11 +55,6 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req,
 				       kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
-static inline struct kiocb *list_kiocb(struct list_head *h)
-{
-	return list_entry(h, struct kiocb, ki_list);
-}
-
 /* for sysctl: */
 extern unsigned long aio_nr;
 extern unsigned long aio_max_nr;
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 12/12] fs: move struct kiocb to fs.h
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (10 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 11/12] fs: split generic and aio kiocb Christoph Hellwig
@ 2015-02-23 18:00 ` Christoph Hellwig
  2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
  12 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 18:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

struct kiocb now is a generic I/O container, so move it to fs.h.
Also do a #include diet for aio.h while we're at it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/s390/hypfs/inode.c                      |  2 +-
 drivers/char/mem.c                           |  2 +-
 drivers/char/tile-srom.c                     |  1 -
 drivers/infiniband/hw/ipath/ipath_file_ops.c |  1 -
 drivers/infiniband/hw/qib/qib_file_ops.c     |  1 -
 drivers/misc/mei/amthif.c                    |  1 -
 drivers/misc/mei/main.c                      |  1 -
 drivers/misc/mei/pci-me.c                    |  1 -
 drivers/scsi/sg.c                            |  2 +-
 drivers/staging/unisys/include/timskmod.h    |  1 -
 drivers/usb/gadget/function/f_fs.c           |  1 +
 drivers/usb/gadget/legacy/inode.c            |  1 +
 fs/9p/vfs_addr.c                             |  2 +-
 fs/affs/file.c                               |  2 +-
 fs/afs/write.c                               |  1 -
 fs/bfs/inode.c                               |  1 +
 fs/block_dev.c                               |  1 -
 fs/btrfs/file.c                              |  2 +-
 fs/btrfs/inode.c                             |  1 -
 fs/ceph/file.c                               |  1 -
 fs/direct-io.c                               |  1 -
 fs/ecryptfs/file.c                           |  1 -
 fs/ext2/inode.c                              |  2 +-
 fs/ext3/inode.c                              |  2 +-
 fs/ext4/file.c                               |  2 +-
 fs/ext4/indirect.c                           |  1 -
 fs/ext4/inode.c                              |  1 -
 fs/ext4/page-io.c                            |  1 -
 fs/f2fs/data.c                               |  2 +-
 fs/fat/inode.c                               |  1 -
 fs/fuse/cuse.c                               |  2 +-
 fs/fuse/dev.c                                |  1 -
 fs/fuse/file.c                               |  2 +-
 fs/gfs2/aops.c                               |  2 +-
 fs/gfs2/file.c                               |  1 -
 fs/hfs/inode.c                               |  2 +-
 fs/hfsplus/inode.c                           |  2 +-
 fs/jfs/inode.c                               |  2 +-
 fs/nfs/file.c                                |  1 -
 fs/nilfs2/inode.c                            |  2 +-
 fs/ntfs/file.c                               |  1 -
 fs/ntfs/inode.c                              |  1 -
 fs/ocfs2/aops.c                              |  1 +
 fs/ocfs2/aops.h                              |  2 +-
 fs/pipe.c                                    |  1 -
 fs/read_write.c                              |  1 -
 fs/reiserfs/inode.c                          |  2 +-
 fs/splice.c                                  |  1 -
 fs/ubifs/file.c                              |  1 -
 fs/udf/file.c                                |  2 +-
 fs/udf/inode.c                               |  2 +-
 fs/xfs/xfs_aops.c                            |  1 -
 fs/xfs/xfs_file.c                            |  1 -
 include/linux/aio.h                          | 31 +---------------------------
 include/linux/fs.h                           | 22 ++++++++++++++++++++
 include/net/sock.h                           |  1 -
 kernel/printk/printk.c                       |  2 +-
 kernel/sysctl.c                              |  1 +
 mm/filemap.c                                 |  1 -
 mm/page_io.c                                 |  2 +-
 mm/shmem.c                                   |  2 +-
 net/ipv4/raw.c                               |  1 -
 sound/core/pcm_native.c                      |  2 +-
 63 files changed, 53 insertions(+), 86 deletions(-)

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 4c8008d..ad66b07 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <asm/ebcdic.h>
 #include "hypfs.h"
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 297110c..9c4fd7a 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -26,7 +26,7 @@
 #include <linux/pfn.h>
 #include <linux/export.h>
 #include <linux/io.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/uaccess.h>
 
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
index 02e76ac..69f6b4a 100644
--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>	/* size_t */
 #include <linux/proc_fs.h>
 #include <linux/fcntl.h>	/* O_ACCMODE */
-#include <linux/aio.h>
 #include <linux/pagemap.h>
 #include <linux/hugetlb.h>
 #include <linux/uaccess.h>
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 6d7f453..aed8afe 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -40,7 +40,6 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <linux/cpu.h>
 #include <asm/pgtable.h>
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 41937c6..14046f5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -39,7 +39,6 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/io.h>
-#include <linux/aio.h>
 #include <linux/jiffies.h>
 #include <asm/pgtable.h>
 #include <linux/delay.h>
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index c4cb9a9..40ea639 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 3c019c0..47680c8 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index bd3039a..af44ee2 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -21,7 +21,6 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/ioctl.h>
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 2270bd5..d383f84 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -33,7 +33,6 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/mtio.h>
 #include <linux/ioctl.h>
@@ -51,6 +50,7 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/mutex.h>
 #include <linux/atomic.h>
 #include <linux/ratelimit.h>
+#include <linux/uio.h>
 
 #include "scsi.h"
 #include <scsi/scsi_dbg.h>
diff --git a/drivers/staging/unisys/include/timskmod.h b/drivers/staging/unisys/include/timskmod.h
index 4019a0d..52648d4 100644
--- a/drivers/staging/unisys/include/timskmod.h
+++ b/drivers/staging/unisys/include/timskmod.h
@@ -46,7 +46,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/seq_file.h>
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index b64538b..a12315a78 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -23,6 +23,7 @@
 #include <linux/export.h>
 #include <linux/hid.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 
 #include <linux/usb/composite.h>
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 0487284..f0669fd 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -26,6 +26,7 @@
 #include <linux/poll.h>
 #include <linux/mmu_context.h>
 #include <linux/aio.h>
+#include <linux/uio.h>
 
 #include <linux/device.h>
 #include <linux/moduleparam.h>
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index eb14e05..ff1a5ba 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,7 +33,7 @@
 #include <linux/pagemap.h>
 #include <linux/idr.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d2468bf..33eaa67 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -12,7 +12,7 @@
  *  affs regular file handling primitives
  */
 
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c13cb08..0714abc 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,7 +14,6 @@
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 90bc079..fdcb4d6 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -15,6 +15,7 @@
 #include <linux/buffer_head.h>
 #include <linux/vfs.h>
 #include <linux/writeback.h>
+#include <linux/uio.h>
 #include <asm/uaccess.h>
 #include "bfs.h"
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 975266b..2e522ae 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,7 +27,6 @@
 #include <linux/namei.h>
 #include <linux/log2.h>
 #include <linux/cleancache.h>
-#include <linux/aio.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbba..69c9508 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,7 +24,6 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -32,6 +31,7 @@
 #include <linux/compat.h>
 #include <linux/slab.h>
 #include <linux/btrfs.h>
+#include <linux/uio.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23d..46de23e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,6 @@
 #include <linux/writeback.h>
 #include <linux/statfs.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include <linux/bit_spinlock.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 0a1d0f3..829d9ff 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,7 +7,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
 
 #include "super.h"
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c38b460..6fb00e3 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
 #include <linux/uio.h>
 #include <linux/atomic.h>
 #include <linux/prefetch.h>
-#include <linux/aio.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a36da88..273d36e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,7 +31,6 @@
 #include <linux/security.h>
 #include <linux/compat.h>
 #include <linux/fs_stack.h>
-#include <linux/aio.h>
 #include "ecryptfs_kernel.h"
 
 /**
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 6434bc0..df9d6af 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,7 +31,7 @@
 #include <linux/mpage.h>
 #include <linux/fiemap.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext2.h"
 #include "acl.h"
 #include "xattr.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2c6ccc4..db07ffb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,7 +27,7 @@
 #include <linux/writeback.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "ext3.h"
 #include "xattr.h"
 #include "acl.h"
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 33a09da..598abbb 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,9 +23,9 @@
 #include <linux/jbd2.h>
 #include <linux/mount.h>
 #include <linux/path.h>
-#include <linux/aio.h>
 #include <linux/quotaops.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 6b9878a..62d621f 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,7 +20,6 @@
  *	(sct@redhat.com), 1993, 1998
  */
 
-#include <linux/aio.h>
 #include "ext4_jbd2.h"
 #include "truncate.h"
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 85404f1..6325d2c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,7 +37,6 @@
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
-#include <linux/aio.h>
 #include <linux/bitops.h>
 
 #include "ext4_jbd2.h"
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index b24a254..4649842 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,7 +18,6 @@
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/namei.h>
-#include <linux/aio.h>
 #include <linux/uio.h>
 #include <linux/bio.h>
 #include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed02..497f851 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,12 @@
 #include <linux/f2fs_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/prefetch.h>
+#include <linux/uio.h>
 
 #include "f2fs.h"
 #include "node.h"
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 497c7c5..8521207 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -19,7 +19,6 @@
 #include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
-#include <linux/aio.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
 #include <linux/uio.h>
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 28d0c7a..b3fa050 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,7 +38,6 @@
 #include <linux/device.h>
 #include <linux/file.h>
 #include <linux/fs.h>
-#include <linux/aio.h>
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
@@ -48,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/uio.h>
 
 #include "fuse_i.h"
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ed19a7d..8c92c72 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,7 +19,6 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/swap.h>
 #include <linux/splice.h>
-#include <linux/aio.h>
 
 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
 MODULE_ALIAS("devname:fuse");
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a5c5e38..ff102cb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,8 +15,8 @@
 #include <linux/module.h>
 #include <linux/compat.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
 #include <linux/falloc.h>
+#include <linux/uio.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4ad4f94..fe6634d 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,7 +20,7 @@
 #include <linux/swap.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/backing-dev.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include <trace/events/writeback.h>
 
 #include "gfs2.h"
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 3e32bb8..f6fc412 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,7 +25,6 @@
 #include <asm/uaccess.h>
 #include <linux/dlm.h>
 #include <linux/dlm_plock.h>
-#include <linux/aio.h>
 #include <linux/delay.h>
 
 #include "gfs2.h"
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index d0929bc..98d4ea4 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfs_fs.h"
 #include "btree.h"
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 0cf786f..f541196 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,7 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
 #include <linux/sched.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index bd3df1c..3197aed 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -22,8 +22,8 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/quotaops.h>
+#include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 #include "jfs_incore.h"
 #include "jfs_inode.h"
 #include "jfs_filsys.h"
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 94712fc..5d8b89c 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/swap.h>
 
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 8b59695..ab4987b 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -26,7 +26,7 @@
 #include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 #include "nilfs.h"
 #include "btnode.h"
 #include "segment.h"
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 1da9b2d..f16f2d8 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -28,7 +28,6 @@
 #include <linux/swap.h>
 #include <linux/uio.h>
 #include <linux/writeback.h>
-#include <linux/aio.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 898b994..1d0c21d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,7 +28,6 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/log2.h>
-#include <linux/aio.h>
 
 #include "aops.h"
 #include "attrib.h"
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 44db180..e1bf18c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -29,6 +29,7 @@
 #include <linux/mpage.h>
 #include <linux/quotaops.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 
 #include <cluster/masklog.h>
 
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 6cae155..dd59599 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,7 +22,7 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-#include <linux/aio.h>
+#include <linux/fs.h>
 
 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
diff --git a/fs/pipe.c b/fs/pipe.c
index 21981e5..2d084f2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,7 +21,6 @@
 #include <linux/audit.h>
 #include <linux/syscalls.h>
 #include <linux/fcntl.h>
-#include <linux/aio.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
diff --git a/fs/read_write.c b/fs/read_write.c
index 76e324e..99a6ef9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/aio.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/export.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index e72401e..9312b78 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,7 +18,7 @@
 #include <linux/writeback.h>
 #include <linux/quotaops.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 7968da9..4bbfa95 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,7 +32,6 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
-#include <linux/aio.h>
 #include "internal.h"
 
 /*
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index e627c0a..c3d15fe 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,7 +50,6 @@
  */
 
 #include "ubifs.h"
-#include <linux/aio.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/slab.h>
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 9c0b6da..7f885cc 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -34,7 +34,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index a445d59..9c1fbd2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,7 +38,7 @@
 #include <linux/slab.h>
 #include <linux/crc-itu-t.h>
 #include <linux/mpage.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 3a9b7a1..4f8cdc5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,7 +31,6 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_bmap_btree.h"
-#include <linux/aio.h>
 #include <linux/gfp.h>
 #include <linux/mpage.h>
 #include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce615d1..88f5cb5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -38,7 +38,6 @@
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
 
-#include <linux/aio.h>
 #include <linux/dcache.h>
 #include <linux/falloc.h>
 #include <linux/pagevec.h>
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 5c40b61..9eb42db 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -1,52 +1,23 @@
 #ifndef __LINUX__AIO_H
 #define __LINUX__AIO_H
 
-#include <linux/list.h>
-#include <linux/workqueue.h>
 #include <linux/aio_abi.h>
-#include <linux/uio.h>
-#include <linux/rcupdate.h>
-
-#include <linux/atomic.h>
 
 struct kioctx;
 struct kiocb;
+struct mm_struct;
 
 #define KIOCB_KEY		0
 
 typedef int (kiocb_cancel_fn)(struct kiocb *);
 
-#define IOCB_EVENTFD		(1 << 0)
-
-struct kiocb {
-	struct file		*ki_filp;
-	loff_t			ki_pos;
-	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
-	void			*private;
-	int			ki_flags;
-};
-
-static inline bool is_sync_kiocb(struct kiocb *kiocb)
-{
-	return kiocb->ki_complete == NULL;
-}
-
-static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
-{
-	*kiocb = (struct kiocb) {
-			.ki_filp = filp,
-		};
-}
-
 /* prototypes */
 #ifdef CONFIG_AIO
-struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
 			 struct iocb __user *__user *iocbpp, bool compat);
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
-struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
 				struct iocb __user * __user *iocbpp,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b4d71b5..748e8b3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -314,6 +314,28 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+#define IOCB_EVENTFD		(1 << 0)
+
+struct kiocb {
+	struct file		*ki_filp;
+	loff_t			ki_pos;
+	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+	void			*private;
+	int			ki_flags;
+};
+
+static inline bool is_sync_kiocb(struct kiocb *kiocb)
+{
+	return kiocb->ki_complete == NULL;
+}
+
+static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
+{
+	*kiocb = (struct kiocb) {
+		.ki_filp = filp,
+	};
+}
+
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
diff --git a/include/net/sock.h b/include/net/sock.h
index ab186b1..71c1300 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -57,7 +57,6 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/static_key.h>
-#include <linux/aio.h>
 #include <linux/sched.h>
 
 #include <linux/filter.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fdd296c..e366d18 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -32,7 +32,6 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
-#include <linux/aio.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
 #include <linux/kdb.h>
@@ -46,6 +45,7 @@
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
 #include <linux/ctype.h>
+#include <linux/uio.h>
 
 #include <asm/uaccess.h>
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 88ea2d6..83d907a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/aio.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index ad72420..876f4e6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -13,7 +13,6 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
-#include <linux/aio.h>
 #include <linux/capability.h>
 #include <linux/kernel_stat.h>
 #include <linux/gfp.h>
diff --git a/mm/page_io.c b/mm/page_io.c
index 7ef2157..a96c856 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -20,8 +20,8 @@
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
 #include <linux/frontswap.h>
-#include <linux/aio.h>
 #include <linux/blkdev.h>
+#include <linux/uio.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
diff --git a/mm/shmem.c b/mm/shmem.c
index a63031f..944b940 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -31,7 +31,7 @@
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/swap.h>
-#include <linux/aio.h>
+#include <linux/uio.h>
 
 static struct vfsmount *shm_mnt;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index f027a70..4a356b7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -46,7 +46,6 @@
 #include <linux/stddef.h>
 #include <linux/slab.h>
 #include <linux/errno.h>
-#include <linux/aio.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index b03a638..9ecff24 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -25,7 +25,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/pm_qos.h>
-#include <linux/aio.h>
 #include <linux/io.h>
 #include <linux/dma-mapping.h>
 #include <sound/core.h>
@@ -35,6 +34,7 @@
 #include <sound/pcm_params.h>
 #include <sound/timer.h>
 #include <sound/minors.h>
+#include <linux/uio.h>
 
 /*
  *  Compatibility
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
                   ` (11 preceding siblings ...)
  2015-02-23 18:00 ` [PATCH 12/12] fs: move struct kiocb to fs.h Christoph Hellwig
@ 2015-02-23 21:20 ` Al Viro
  2015-02-23 21:22   ` Christoph Hellwig
                     ` (2 more replies)
  12 siblings, 3 replies; 25+ messages in thread
From: Al Viro @ 2015-02-23 21:20 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

On Mon, Feb 23, 2015 at 10:00:24AM -0800, Christoph Hellwig wrote:
> This series cuts down the amount of fiels in the public iocb that is
> allocated on stack for every synchronous I/O, both by removing fields
> from it, and by adding a aio-specific iocb that is only allocated
> for aio requests.
> 
> Additionally it cleans up various corner cases in the aio completion
> code and allowes for adding a simple in-kernel async read/write
> interface.
> 
> The first few patches are from Al's gadget branch and reposted
> here because they are needed for the rest of the series.

FWIW, I would really like to hear from USB folks concerning those patches
(gadgetfs ones, that is).  I don't have any way to test them beyond "does
it compile" - no hardware that could run Linux and act as USB slave and
no idea if there are any sane emulator setups (e.g. qemu doesn't seem to
emulate anything drivers/usb/gadget/udc/* stuff would understand).

I'm not happy about the idea of having it merged into vfs.git#for-next
with zero testing and no comments from the people actually using the
drivers in question, _especially_ if it becomes a never-rebase branch
used as prereq for other development.

Now that the merge window is closed, could USB folks review and comment
on the stuff in
git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git gadget
?

PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
@ 2015-02-23 21:22   ` Christoph Hellwig
  2015-02-23 21:39     ` Al Viro
  2015-02-23 21:23   ` Felipe Balbi
  2015-02-25 17:13   ` Felipe Balbi
  2 siblings, 1 reply; 25+ messages in thread
From: Christoph Hellwig @ 2015-02-23 21:22 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Maxim Patlasov, Robert Baldyga,
	Michal Nazarewicz, Felipe Balbi, linux-aio, linux-fsdevel

On Mon, Feb 23, 2015 at 09:20:59PM +0000, Al Viro wrote:
> PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...

Btw, any chance to sneak dup_iter to Linus for this release?  I ended
up adding an opencoded version to the block layer for this merge window,
and I'd prefer to get these consolidated ASAP.

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
  2015-02-23 21:22   ` Christoph Hellwig
@ 2015-02-23 21:23   ` Felipe Balbi
  2015-02-23 21:42     ` Al Viro
  2015-02-25 17:13   ` Felipe Balbi
  2 siblings, 1 reply; 25+ messages in thread
From: Felipe Balbi @ 2015-02-23 21:23 UTC (permalink / raw)
  To: Al Viro
  Cc: Christoph Hellwig, Maxim Patlasov, Robert Baldyga,
	Michal Nazarewicz, Felipe Balbi, linux-aio, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 1629 bytes --]

Hi,

On Mon, Feb 23, 2015 at 09:20:59PM +0000, Al Viro wrote:
> On Mon, Feb 23, 2015 at 10:00:24AM -0800, Christoph Hellwig wrote:
> > This series cuts down the amount of fiels in the public iocb that is
> > allocated on stack for every synchronous I/O, both by removing fields
> > from it, and by adding a aio-specific iocb that is only allocated
> > for aio requests.
> > 
> > Additionally it cleans up various corner cases in the aio completion
> > code and allowes for adding a simple in-kernel async read/write
> > interface.
> > 
> > The first few patches are from Al's gadget branch and reposted
> > here because they are needed for the rest of the series.
> 
> FWIW, I would really like to hear from USB folks concerning those patches
> (gadgetfs ones, that is).  I don't have any way to test them beyond "does
> it compile" - no hardware that could run Linux and act as USB slave and
> no idea if there are any sane emulator setups (e.g. qemu doesn't seem to
> emulate anything drivers/usb/gadget/udc/* stuff would understand).
> 
> I'm not happy about the idea of having it merged into vfs.git#for-next
> with zero testing and no comments from the people actually using the
> drivers in question, _especially_ if it becomes a never-rebase branch
> used as prereq for other development.
> 
> Now that the merge window is closed, could USB folks review and comment
> on the stuff in
> git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git gadget
> ?
> 
> PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...

give me a couple days, quite busy myself ;-)

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:22   ` Christoph Hellwig
@ 2015-02-23 21:39     ` Al Viro
  2015-02-24  3:47       ` Al Viro
  0 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2015-02-23 21:39 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

On Mon, Feb 23, 2015 at 10:22:24PM +0100, Christoph Hellwig wrote:
> On Mon, Feb 23, 2015 at 09:20:59PM +0000, Al Viro wrote:
> > PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...
> 
> Btw, any chance to sneak dup_iter to Linus for this release?  I ended
> up adding an opencoded version to the block layer for this merge window,
> and I'd prefer to get these consolidated ASAP.

Hmm...  Let's wait for USB folks to review those fixes - dup_iter() is
helper needed for a fix of easily triggered use-after-free bug, after
all, and beginning of the cycle is obviously OK for such.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:23   ` Felipe Balbi
@ 2015-02-23 21:42     ` Al Viro
  2015-02-23 21:50       ` Felipe Balbi
  0 siblings, 1 reply; 25+ messages in thread
From: Al Viro @ 2015-02-23 21:42 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: Christoph Hellwig, Maxim Patlasov, Robert Baldyga,
	Michal Nazarewicz, linux-aio, linux-fsdevel

On Mon, Feb 23, 2015 at 03:23:43PM -0600, Felipe Balbi wrote:
> > FWIW, I would really like to hear from USB folks concerning those patches
> > (gadgetfs ones, that is).  I don't have any way to test them beyond "does
> > it compile" - no hardware that could run Linux and act as USB slave and
> > no idea if there are any sane emulator setups (e.g. qemu doesn't seem to
> > emulate anything drivers/usb/gadget/udc/* stuff would understand).
> > 
> > I'm not happy about the idea of having it merged into vfs.git#for-next
> > with zero testing and no comments from the people actually using the
> > drivers in question, _especially_ if it becomes a never-rebase branch
> > used as prereq for other development.
> > 
> > Now that the merge window is closed, could USB folks review and comment
> > on the stuff in
> > git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git gadget
> > ?
> > 
> > PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...
> 
> give me a couple days, quite busy myself ;-)

Sure, no problem...  BTW, how does one normally test in that area?  IOW,
which emulators and/or real hardware that could be found on e.g. ebay can
handle the current kernels?

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:42     ` Al Viro
@ 2015-02-23 21:50       ` Felipe Balbi
  0 siblings, 0 replies; 25+ messages in thread
From: Felipe Balbi @ 2015-02-23 21:50 UTC (permalink / raw)
  To: Al Viro
  Cc: Felipe Balbi, Christoph Hellwig, Maxim Patlasov, Robert Baldyga,
	Michal Nazarewicz, linux-aio, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 2141 bytes --]

Hi,

On Mon, Feb 23, 2015 at 09:42:13PM +0000, Al Viro wrote:
> On Mon, Feb 23, 2015 at 03:23:43PM -0600, Felipe Balbi wrote:
> > > FWIW, I would really like to hear from USB folks concerning those patches
> > > (gadgetfs ones, that is).  I don't have any way to test them beyond "does
> > > it compile" - no hardware that could run Linux and act as USB slave and
> > > no idea if there are any sane emulator setups (e.g. qemu doesn't seem to
> > > emulate anything drivers/usb/gadget/udc/* stuff would understand).
> > > 
> > > I'm not happy about the idea of having it merged into vfs.git#for-next
> > > with zero testing and no comments from the people actually using the
> > > drivers in question, _especially_ if it becomes a never-rebase branch
> > > used as prereq for other development.
> > > 
> > > Now that the merge window is closed, could USB folks review and comment
> > > on the stuff in
> > > git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git gadget
> > > ?
> > > 
> > > PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...
> > 
> > give me a couple days, quite busy myself ;-)
> 
> Sure, no problem...  BTW, how does one normally test in that area?  IOW,
> which emulators and/or real hardware that could be found on e.g. ebay can
> handle the current kernels?

well, you can use dummy_hcd, for example. It's completely done in SW.

If you want HW, there are quite a few you can use: beaglebone black is
cheap (but MUSB is a terrible controller), OMAP5 uEVM ain't that cheap,
but a better controller, BeagleBoard X15 is also good, but not that
cheap either. There's also gumstix overo which is the same controller as
Beaglebone Black.

AM437x SK (which is not yet available for purchase unfortunately), has
a good USB controller, works just fine with mainline (since 3.17) and
it's one of the boards I use daily. Unfortunately, not yet released for
public consumption :-(

There are other boards like raspberry pi (*real* cheap), or minnowboard
but I don't have those and I can't really say whether they "Just
Work(TM)" with upstream.

cheers

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:39     ` Al Viro
@ 2015-02-24  3:47       ` Al Viro
  0 siblings, 0 replies; 25+ messages in thread
From: Al Viro @ 2015-02-24  3:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Maxim Patlasov, Robert Baldyga, Michal Nazarewicz, Felipe Balbi,
	linux-aio, linux-fsdevel

On Mon, Feb 23, 2015 at 09:39:47PM +0000, Al Viro wrote:
> On Mon, Feb 23, 2015 at 10:22:24PM +0100, Christoph Hellwig wrote:
> > On Mon, Feb 23, 2015 at 09:20:59PM +0000, Al Viro wrote:
> > > PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...
> > 
> > Btw, any chance to sneak dup_iter to Linus for this release?  I ended
> > up adding an opencoded version to the block layer for this merge window,
> > and I'd prefer to get these consolidated ASAP.
> 
> Hmm...  Let's wait for USB folks to review those fixes - dup_iter() is
> helper needed for a fix of easily triggered use-after-free bug, after
> all, and beginning of the cycle is obviously OK for such.

While we are at it - once the situation with USB patches gets resolved,
I'm going to put that into vfs.git.  If nothing else, O_DIRECT, O_APPEND and
probably O_NDELAY as well, need to be mirrored into ->ki_flags, to deal with
races on flipping those suckers during ->{read,write}_iter().  That, and
followup patches to affected filesystems out to go through vfs.git.

Another thing is that rw_copy_check_uvector() should be split in two helpers -
one to have iov_iter_init() folded into it, another being equivalent to
passing CHECK_IOVEC_ONLY (sg_io() and foreign address space treatment in
process_vm_readv()).  Also vfs.git fodder, obviously, and should be on
top of that stuff.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC] split struct kiocb
  2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
  2015-02-23 21:22   ` Christoph Hellwig
  2015-02-23 21:23   ` Felipe Balbi
@ 2015-02-25 17:13   ` Felipe Balbi
  2 siblings, 0 replies; 25+ messages in thread
From: Felipe Balbi @ 2015-02-25 17:13 UTC (permalink / raw)
  To: Al Viro, David Cohen
  Cc: Christoph Hellwig, Maxim Patlasov, Robert Baldyga,
	Michal Nazarewicz, Felipe Balbi, linux-aio, linux-fsdevel

[-- Attachment #1: Type: text/plain, Size: 1744 bytes --]

Hi,

On Mon, Feb 23, 2015 at 09:20:59PM +0000, Al Viro wrote:
> On Mon, Feb 23, 2015 at 10:00:24AM -0800, Christoph Hellwig wrote:
> > This series cuts down the amount of fiels in the public iocb that is
> > allocated on stack for every synchronous I/O, both by removing fields
> > from it, and by adding a aio-specific iocb that is only allocated
> > for aio requests.
> > 
> > Additionally it cleans up various corner cases in the aio completion
> > code and allowes for adding a simple in-kernel async read/write
> > interface.
> > 
> > The first few patches are from Al's gadget branch and reposted
> > here because they are needed for the rest of the series.
> 
> FWIW, I would really like to hear from USB folks concerning those patches
> (gadgetfs ones, that is).  I don't have any way to test them beyond "does
> it compile" - no hardware that could run Linux and act as USB slave and
> no idea if there are any sane emulator setups (e.g. qemu doesn't seem to
> emulate anything drivers/usb/gadget/udc/* stuff would understand).
> 
> I'm not happy about the idea of having it merged into vfs.git#for-next
> with zero testing and no comments from the people actually using the
> drivers in question, _especially_ if it becomes a never-rebase branch
> used as prereq for other development.
> 
> Now that the merge window is closed, could USB folks review and comment
> on the stuff in
> git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git gadget
> ?
> 
> PS: I would prefer to rebase #iov_iter and #gadget to -rc1, actually...

I very much like those patches.

David, any chance you can test the branch above on your adb setup just
to make sure we're not breaking anything ?

cheers

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 09/12] fuse: handle synchronous iocbs internally
  2015-02-23 18:00 ` [PATCH 09/12] fuse: handle synchronous iocbs internally Christoph Hellwig
@ 2015-03-06  2:54   ` Maxim Patlasov
  0 siblings, 0 replies; 25+ messages in thread
From: Maxim Patlasov @ 2015-03-06  2:54 UTC (permalink / raw)
  To: Christoph Hellwig, Al Viro
  Cc: Robert Baldyga, Michal Nazarewicz, Felipe Balbi, linux-aio,
	linux-fsdevel

Reviewed-by: Maxim Patlasov <MPatlasov@parallels.com>

On 02/23/2015 10:00 AM, Christoph Hellwig wrote:
> Based on a patch from Maxim Patlasov <MPatlasov@parallels.com>.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   fs/fuse/file.c   | 51 +++++++++++++++++++++++++++++++--------------------
>   fs/fuse/fuse_i.h |  1 +
>   2 files changed, 32 insertions(+), 20 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index c01ec3b..f81d83e 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -528,6 +528,17 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
>   	}
>   }
>   
> +static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
> +{
> +	if (io->err)
> +		return io->err;
> +
> +	if (io->bytes >= 0 && io->write)
> +		return -EIO;
> +
> +	return io->bytes < 0 ? io->size : io->bytes;
> +}
> +
>   /**
>    * In case of short read, the caller sets 'pos' to the position of
>    * actual end of fuse request in IO request. Otherwise, if bytes_requested
> @@ -546,6 +557,7 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
>    */
>   static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
>   {
> +	bool is_sync = is_sync_kiocb(io->iocb);
>   	int left;
>   
>   	spin_lock(&io->lock);
> @@ -555,27 +567,21 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
>   		io->bytes = pos;
>   
>   	left = --io->reqs;
> +	if (!left && is_sync)
> +		complete(io->done);
>   	spin_unlock(&io->lock);
>   
> -	if (!left) {
> -		long res;
> +	if (!left && !is_sync) {
> +		ssize_t res = fuse_get_res_by_io(io);
>   
> -		if (io->err)
> -			res = io->err;
> -		else if (io->bytes >= 0 && io->write)
> -			res = -EIO;
> -		else {
> -			res = io->bytes < 0 ? io->size : io->bytes;
> +		if (res >= 0) {
> +			struct inode *inode = file_inode(io->iocb->ki_filp);
> +			struct fuse_conn *fc = get_fuse_conn(inode);
> +			struct fuse_inode *fi = get_fuse_inode(inode);
>   
> -			if (!is_sync_kiocb(io->iocb)) {
> -				struct inode *inode = file_inode(io->iocb->ki_filp);
> -				struct fuse_conn *fc = get_fuse_conn(inode);
> -				struct fuse_inode *fi = get_fuse_inode(inode);
> -
> -				spin_lock(&fc->lock);
> -				fi->attr_version = ++fc->attr_version;
> -				spin_unlock(&fc->lock);
> -			}
> +			spin_lock(&fc->lock);
> +			fi->attr_version = ++fc->attr_version;
> +			spin_unlock(&fc->lock);
>   		}
>   
>   		aio_complete(io->iocb, res, 0);
> @@ -2801,6 +2807,7 @@ static ssize_t
>   fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
>   			loff_t offset)
>   {
> +	DECLARE_COMPLETION_ONSTACK(wait);
>   	ssize_t ret = 0;
>   	struct file *file = iocb->ki_filp;
>   	struct fuse_file *ff = file->private_data;
> @@ -2852,6 +2859,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
>   	if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
>   		io->async = false;
>   
> +	if (io->async && is_sync_kiocb(iocb))
> +		io->done = &wait;
> +
>   	if (rw == WRITE)
>   		ret = __fuse_direct_write(io, iter, &pos);
>   	else
> @@ -2864,11 +2874,12 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
>   		if (!is_sync_kiocb(iocb))
>   			return -EIOCBQUEUED;
>   
> -		ret = wait_on_sync_kiocb(iocb);
> -	} else {
> -		kfree(io);
> +		wait_for_completion(&wait);
> +		ret = fuse_get_res_by_io(io);
>   	}
>   
> +	kfree(io);
> +
>   	if (rw == WRITE) {
>   		if (ret > 0)
>   			fuse_write_update_size(inode, pos);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 1cdfb07..7354dc1 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -263,6 +263,7 @@ struct fuse_io_priv {
>   	int err;
>   	struct kiocb *iocb;
>   	struct file *file;
> +	struct completion *done;
>   };
>   
>   /**

--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org.  For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/12] gadgetfs: use-after-free in ->aio_read()
  2015-02-23 18:00 ` [PATCH 06/12] gadgetfs: use-after-free in ->aio_read() Christoph Hellwig
@ 2015-03-08 10:07   ` Ming Lei
  2015-03-09 15:18     ` Michal Nazarewicz
  0 siblings, 1 reply; 25+ messages in thread
From: Ming Lei @ 2015-03-08 10:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Al Viro, Maxim Patlasov, Robert Baldyga, Michal Nazarewicz,
	Felipe Balbi, open list:AIO, Linux FS Devel, stable

On Tue, Feb 24, 2015 at 2:00 AM, Christoph Hellwig <hch@lst.de> wrote:
> From: Al Viro <viro@zeniv.linux.org.uk>
>
> AIO_PREAD requests call ->aio_read() with iovec on caller's stack, so if
> we are going to access it asynchronously, we'd better get ourselves
> a copy - the one on kernel stack of aio_run_iocb() won't be there
> anymore.  function/f_fs.c take care of doing that, legacy/inode.c
> doesn't...
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  drivers/usb/gadget/legacy/inode.c | 15 ++++++++++++---
>  1 file changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
> index db49ec4..9fbbaa0 100644
> --- a/drivers/usb/gadget/legacy/inode.c
> +++ b/drivers/usb/gadget/legacy/inode.c
> @@ -566,7 +566,6 @@ static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
>                 if (total == 0)
>                         break;
>         }
> -
>         return len;
>  }
>
> @@ -585,6 +584,7 @@ static void ep_user_copy_worker(struct work_struct *work)
>         aio_complete(iocb, ret, ret);
>
>         kfree(priv->buf);
> +       kfree(priv->iv);
>         kfree(priv);
>  }
>
> @@ -605,6 +605,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
>          */
>         if (priv->iv == NULL || unlikely(req->actual == 0)) {
>                 kfree(req->buf);
> +               kfree(priv->iv);
>                 kfree(priv);
>                 iocb->private = NULL;
>                 /* aio_complete() reports bytes-transferred _and_ faults */
> @@ -640,7 +641,7 @@ ep_aio_rwtail(
>         struct usb_request      *req;
>         ssize_t                 value;
>
> -       priv = kmalloc(sizeof *priv, GFP_KERNEL);
> +       priv = kzalloc(sizeof *priv, GFP_KERNEL);
>         if (!priv) {
>                 value = -ENOMEM;
>  fail:
> @@ -649,7 +650,14 @@ fail:
>         }
>         iocb->private = priv;
>         priv->iocb = iocb;
> -       priv->iv = iv;
> +       if (iv) {
> +               priv->iv = kmemdup(iv, nr_segs * sizeof(struct iovec),
> +                                  GFP_KERNEL);
> +               if (!priv->iv) {
> +                       kfree(priv);
> +                       goto fail;
> +               }
> +       }

It should be simpler and more efficient to allocate 'iv' piggyback
inside 'priv'.

>         priv->nr_segs = nr_segs;
>         INIT_WORK(&priv->work, ep_user_copy_worker);
>
> @@ -689,6 +697,7 @@ fail:
>         mutex_unlock(&epdata->lock);
>
>         if (unlikely(value)) {
> +               kfree(priv->iv);
>                 kfree(priv);
>                 put_ep(epdata);
>         } else
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



Thanks,
Ming Lei

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 06/12] gadgetfs: use-after-free in ->aio_read()
  2015-03-08 10:07   ` Ming Lei
@ 2015-03-09 15:18     ` Michal Nazarewicz
  0 siblings, 0 replies; 25+ messages in thread
From: Michal Nazarewicz @ 2015-03-09 15:18 UTC (permalink / raw)
  To: Ming Lei, Christoph Hellwig
  Cc: Al Viro, Maxim Patlasov, Robert Baldyga, Felipe Balbi,
	open list:AIO, Linux FS Devel, stable

On Sun, Mar 08 2015, Ming Lei <tom.leiming@gmail.com> wrote:
> On Tue, Feb 24, 2015 at 2:00 AM, Christoph Hellwig <hch@lst.de> wrote:
>> From: Al Viro <viro@zeniv.linux.org.uk>
>>
>> AIO_PREAD requests call ->aio_read() with iovec on caller's stack, so if
>> we are going to access it asynchronously, we'd better get ourselves
>> a copy - the one on kernel stack of aio_run_iocb() won't be there
>> anymore.  function/f_fs.c take care of doing that, legacy/inode.c
>> doesn't...
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

Acked-by: Michal Nazarewicz <mina86@mina86.com>

but at the same time:

>> @@ -649,7 +650,14 @@ fail:
>>         }
>>         iocb->private = priv;
>>         priv->iocb = iocb;
>> -       priv->iv = iv;
>> +       if (iv) {
>> +               priv->iv = kmemdup(iv, nr_segs * sizeof(struct iovec),
>> +                                  GFP_KERNEL);
>> +               if (!priv->iv) {
>> +                       kfree(priv);
>> +                       goto fail;
>> +               }
>> +       }
>
> It should be simpler and more efficient to allocate 'iv' piggyback
> inside 'priv'.

+1

	priv = kmalloc(sizeof *priv + (iv ? nr_segs * sizeof *iv : 0),
		       GFP_KERNEL);
	…
	priv->iv = iv ? (void*)(priv + 1) : NULL;

>>         priv->nr_segs = nr_segs;
>>         INIT_WORK(&priv->work, ep_user_copy_worker);
>>

-- 
Best regards,                                         _     _
.o. | Liege of Serenely Enlightened Majesty of      o' \,=./ `o
..o | Computer Science,  Michał “mina86” Nazarewicz    (o o)
ooo +--<mpn@google.com>--<xmpp:mina86@jabber.org>--ooO--(_)--Ooo--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* [RFC] split struct kiocb
@ 2015-01-27 17:55 Christoph Hellwig
  0 siblings, 0 replies; 25+ messages in thread
From: Christoph Hellwig @ 2015-01-27 17:55 UTC (permalink / raw)
  To: Al Viro; +Cc: Miklos Szeredi, linux-aio, linux-fsdevel

This series cuts down the amount of fiels in the public iocb that is
allocated on stack for every synchronous I/O, both by removing fields
from it, and by adding a aio-specific iocb that is only allocated
for aio requests.

Additionally it cleans up various corner cases in the aio completion
code and adds a simple in-kernel async read/write interface.

Note that there still are some issues with fuse, see the first patch
for details.


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2015-03-09 15:18 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-23 18:00 [RFC] split struct kiocb Christoph Hellwig
2015-02-23 18:00 ` [PATCH 01/12] new helper: dup_iter() Christoph Hellwig
2015-02-23 18:00 ` [PATCH 02/12] move iov_iter.c from mm/ to lib/ Christoph Hellwig
2015-02-23 18:00 ` [PATCH 03/12] gadget/function/f_fs.c: close leaks Christoph Hellwig
2015-02-23 18:00 ` [PATCH 04/12] gadget/function/f_fs.c: use put iov_iter into io_data Christoph Hellwig
2015-02-23 18:00 ` [PATCH 05/12] gadget/function/f_fs.c: switch to ->{read,write}_iter() Christoph Hellwig
2015-02-23 18:00 ` [PATCH 06/12] gadgetfs: use-after-free in ->aio_read() Christoph Hellwig
2015-03-08 10:07   ` Ming Lei
2015-03-09 15:18     ` Michal Nazarewicz
2015-02-23 18:00 ` [PATCH 07/12] gadget: switch ep_io_operations to ->read_iter/->write_iter Christoph Hellwig
2015-02-23 18:00 ` [PATCH 08/12] fs: remove ki_nbytes Christoph Hellwig
2015-02-23 18:00 ` [PATCH 09/12] fuse: handle synchronous iocbs internally Christoph Hellwig
2015-03-06  2:54   ` Maxim Patlasov
2015-02-23 18:00 ` [PATCH 10/12] fs: don't allow to complete sync iocbs through aio_complete Christoph Hellwig
2015-02-23 18:00 ` [PATCH 11/12] fs: split generic and aio kiocb Christoph Hellwig
2015-02-23 18:00 ` [PATCH 12/12] fs: move struct kiocb to fs.h Christoph Hellwig
2015-02-23 21:20 ` [RFC] split struct kiocb Al Viro
2015-02-23 21:22   ` Christoph Hellwig
2015-02-23 21:39     ` Al Viro
2015-02-24  3:47       ` Al Viro
2015-02-23 21:23   ` Felipe Balbi
2015-02-23 21:42     ` Al Viro
2015-02-23 21:50       ` Felipe Balbi
2015-02-25 17:13   ` Felipe Balbi
  -- strict thread matches above, loose matches on Subject: below --
2015-01-27 17:55 Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.