* [RFC PATCH 02/37] generic_perform_write()/iomap_write_actor(): saner logics for short copy
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 03/37] fuse_fill_write_pages(): don't bother with iov_iter_single_seg_count() Al Viro
` (34 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
if we run into a short copy and ->write_end() refuses to advance at all,
use the amount we'd managed to copy for the next iteration to handle.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/iomap/buffered-io.c | 25 ++++++++++---------------
mm/filemap.c | 24 +++++++++---------------
2 files changed, 19 insertions(+), 30 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f2cd2034a87b..354b41d20e5d 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -771,10 +771,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
- *
- * Not only is this an optimisation, but it is also required
- * to check that the address is actually valid, when atomic
- * usercopies are used, below.
*/
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
@@ -791,25 +787,24 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
- copied = iomap_write_end(inode, pos, bytes, copied, page, iomap,
+ status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
srcmap);
cond_resched();
- iov_iter_advance(i, copied);
- if (unlikely(copied == 0)) {
+ if (unlikely(status == 0)) {
/*
- * If we were unable to copy any data at all, we must
- * fall back to a single segment length write.
- *
- * If we didn't fallback here, we could livelock
- * because not all segments in the iov can be copied at
- * once without a pagefault.
+ * A short copy made iomap_write_end() reject the
+ * thing entirely. Might be memory poisoning
+ * halfway through, might be a race with munmap,
+ * might be severe memory pressure.
*/
- bytes = min_t(unsigned long, PAGE_SIZE - offset,
- iov_iter_single_seg_count(i));
+ if (copied)
+ bytes = copied;
goto again;
}
+ copied = status;
+ iov_iter_advance(i, copied);
pos += copied;
written += copied;
length -= copied;
diff --git a/mm/filemap.c b/mm/filemap.c
index 66f7e9fdfbc4..0be24942bf8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3642,10 +3642,6 @@ ssize_t generic_perform_write(struct file *file,
* Otherwise there's a nasty deadlock on copying from the
* same page as we're writing to, without it being marked
* up-to-date.
- *
- * Not only is this an optimisation, but it is also required
- * to check that the address is actually valid, when atomic
- * usercopies are used, below.
*/
if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
status = -EFAULT;
@@ -3672,24 +3668,22 @@ ssize_t generic_perform_write(struct file *file,
page, fsdata);
if (unlikely(status < 0))
break;
- copied = status;
cond_resched();
- iov_iter_advance(i, copied);
- if (unlikely(copied == 0)) {
+ if (unlikely(status == 0)) {
/*
- * If we were unable to copy any data at all, we must
- * fall back to a single segment length write.
- *
- * If we didn't fallback here, we could livelock
- * because not all segments in the iov can be copied at
- * once without a pagefault.
+ * A short copy made ->write_end() reject the
+ * thing entirely. Might be memory poisoning
+ * halfway through, might be a race with munmap,
+ * might be severe memory pressure.
*/
- bytes = min_t(unsigned long, PAGE_SIZE - offset,
- iov_iter_single_seg_count(i));
+ if (copied)
+ bytes = copied;
goto again;
}
+ copied = status;
+ iov_iter_advance(i, copied);
pos += copied;
written += copied;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 03/37] fuse_fill_write_pages(): don't bother with iov_iter_single_seg_count()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
2021-06-06 19:10 ` [RFC PATCH 02/37] generic_perform_write()/iomap_write_actor(): saner logics for short copy Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 04/37] iov_iter: Remove iov_iter_for_each_range() Al Viro
` (33 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
another rudiment of fault-in originally having been limited to the
first segment, same as in generic_perform_write() and friends.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
fs/fuse/file.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 09ef2a4d25ed..44bd301fa4fb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1178,7 +1178,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (!tmp) {
unlock_page(page);
put_page(page);
- bytes = min(bytes, iov_iter_single_seg_count(ii));
goto again;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 04/37] iov_iter: Remove iov_iter_for_each_range()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
2021-06-06 19:10 ` [RFC PATCH 02/37] generic_perform_write()/iomap_write_actor(): saner logics for short copy Al Viro
2021-06-06 19:10 ` [RFC PATCH 03/37] fuse_fill_write_pages(): don't bother with iov_iter_single_seg_count() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 05/37] teach copy_page_to_iter() to handle compound pages Al Viro
` (32 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
From: David Howells <dhowells@redhat.com>
Remove iov_iter_for_each_range() as it's no longer used with the removal of
lustre.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/linux/uio.h | 4 ----
lib/iov_iter.c | 27 ---------------------------
2 files changed, 31 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index d3ec87706d75..74a401f04bd3 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -294,8 +294,4 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
int import_single_range(int type, void __user *buf, size_t len,
struct iovec *iov, struct iov_iter *i);
-int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
- int (*f)(struct kvec *vec, void *context),
- void *context);
-
#endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c701b7a187f2..8f5ce5b1ff91 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -2093,30 +2093,3 @@ int import_single_range(int rw, void __user *buf, size_t len,
return 0;
}
EXPORT_SYMBOL(import_single_range);
-
-int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
- int (*f)(struct kvec *vec, void *context),
- void *context)
-{
- struct kvec w;
- int err = -EINVAL;
- if (!bytes)
- return 0;
-
- iterate_all_kinds(i, bytes, v, -EINVAL, ({
- w.iov_base = kmap(v.bv_page) + v.bv_offset;
- w.iov_len = v.bv_len;
- err = f(&w, context);
- kunmap(v.bv_page);
- err;}), ({
- w = v;
- err = f(&w, context);}), ({
- w.iov_base = kmap(v.bv_page) + v.bv_offset;
- w.iov_len = v.bv_len;
- err = f(&w, context);
- kunmap(v.bv_page);
- err;})
- )
- return err;
-}
-EXPORT_SYMBOL(iov_iter_for_each_range);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 05/37] teach copy_page_to_iter() to handle compound pages
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (2 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 04/37] iov_iter: Remove iov_iter_for_each_range() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 06/37] copy_page_to_iter(): fix ITER_DISCARD case Al Viro
` (31 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
In situation when copy_page_to_iter() got a compound page the current
code would only work on systems with no CONFIG_HIGHMEM. It *is* the majority
of real-world setups, or we would've drown in bug reports by now. Still needs
fixing.
Current variant works for solitary page; rename that to
__copy_page_to_iter() and turn the handling of compound pages into a loop over
subpages.
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8f5ce5b1ff91..12fb04b23143 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -957,11 +957,9 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
return false;
}
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
- if (unlikely(!page_copy_sane(page, offset, bytes)))
- return 0;
if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
void *kaddr = kmap_atomic(page);
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
@@ -974,6 +972,30 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
else
return copy_page_to_iter_pipe(page, offset, bytes, i);
}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+ struct iov_iter *i)
+{
+ size_t res = 0;
+ if (unlikely(!page_copy_sane(page, offset, bytes)))
+ return 0;
+ page += offset / PAGE_SIZE; // first subpage
+ offset %= PAGE_SIZE;
+ while (1) {
+ size_t n = __copy_page_to_iter(page, offset,
+ min(bytes, (size_t)PAGE_SIZE - offset), i);
+ res += n;
+ bytes -= n;
+ if (!bytes || !n)
+ break;
+ offset += n;
+ if (offset == PAGE_SIZE) {
+ page++;
+ offset = 0;
+ }
+ }
+ return res;
+}
EXPORT_SYMBOL(copy_page_to_iter);
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 06/37] copy_page_to_iter(): fix ITER_DISCARD case
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (3 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 05/37] teach copy_page_to_iter() to handle compound pages Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 07/37] [xarray] iov_iter_fault_in_readable() should do nothing in xarray case Al Viro
` (30 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
we need to advance the iterator...
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 12fb04b23143..c8877cffb7bc 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -965,9 +965,12 @@ static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
return wanted;
- } else if (unlikely(iov_iter_is_discard(i)))
+ } else if (unlikely(iov_iter_is_discard(i))) {
+ if (unlikely(i->count < bytes))
+ bytes = i->count;
+ i->count -= bytes;
return bytes;
- else if (likely(!iov_iter_is_pipe(i)))
+ } else if (likely(!iov_iter_is_pipe(i)))
return copy_page_to_iter_iovec(page, offset, bytes, i);
else
return copy_page_to_iter_pipe(page, offset, bytes, i);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 07/37] [xarray] iov_iter_fault_in_readable() should do nothing in xarray case
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (4 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 06/37] copy_page_to_iter(): fix ITER_DISCARD case Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 08/37] iov_iter_advance(): use consistent semantics for move past the end Al Viro
` (29 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
... and actually should just check it's given an iovec-backed iterator
in the first place.
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c8877cffb7bc..a3aabeda945b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -476,7 +476,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
int err;
struct iovec v;
- if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+ if (iter_is_iovec(i)) {
iterate_iovec(i, bytes, v, iov, skip, ({
err = fault_in_pages_readable(v.iov_base, v.iov_len);
if (unlikely(err))
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 08/37] iov_iter_advance(): use consistent semantics for move past the end
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (5 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 07/37] [xarray] iov_iter_fault_in_readable() should do nothing in xarray case Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 09/37] iov_iter: switch ..._full() variants of primitives to use of iov_iter_revert() Al Viro
` (28 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
asking to advance by more than we have left in the iov_iter should
move to the very end; it should *not* leave negative i->count and
it should not spew into syslog, etc. - it's a legitimate operation.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index a3aabeda945b..bdbe6691457d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1117,8 +1117,6 @@ static inline void pipe_truncate(struct iov_iter *i)
static void pipe_advance(struct iov_iter *i, size_t size)
{
struct pipe_inode_info *pipe = i->pipe;
- if (unlikely(i->count < size))
- size = i->count;
if (size) {
struct pipe_buffer *buf;
unsigned int p_mask = pipe->ring_size - 1;
@@ -1159,6 +1157,8 @@ static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
void iov_iter_advance(struct iov_iter *i, size_t size)
{
+ if (unlikely(i->count < size))
+ size = i->count;
if (unlikely(iov_iter_is_pipe(i))) {
pipe_advance(i, size);
return;
@@ -1168,7 +1168,6 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
return;
}
if (unlikely(iov_iter_is_xarray(i))) {
- size = min(size, i->count);
i->iov_offset += size;
i->count -= size;
return;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 09/37] iov_iter: switch ..._full() variants of primitives to use of iov_iter_revert()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (6 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 08/37] iov_iter_advance(): use consistent semantics for move past the end Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 10/37] iov_iter: reorder handling of flavours in primitives Al Viro
` (27 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Use corresponding plain variants, revert on short copy. That's the way it
should've been done from the very beginning, except that we didn't have
iov_iter_revert() back then...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/linux/uio.h | 32 ++++++++++------
lib/iov_iter.c | 104 ----------------------------------------------------
2 files changed, 21 insertions(+), 115 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 74a401f04bd3..c697c23138b5 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -132,9 +132,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
-bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
-bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
static __always_inline __must_check
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
@@ -157,10 +155,11 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
static __always_inline __must_check
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
{
- if (unlikely(!check_copy_size(addr, bytes, false)))
- return false;
- else
- return _copy_from_iter_full(addr, bytes, i);
+ size_t copied = copy_from_iter(addr, bytes, i);
+ if (likely(copied == bytes))
+ return true;
+ iov_iter_revert(i, bytes - copied);
+ return false;
}
static __always_inline __must_check
@@ -175,10 +174,11 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
static __always_inline __must_check
bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
- if (unlikely(!check_copy_size(addr, bytes, false)))
- return false;
- else
- return _copy_from_iter_full_nocache(addr, bytes, i);
+ size_t copied = copy_from_iter_nocache(addr, bytes, i);
+ if (likely(copied == bytes))
+ return true;
+ iov_iter_revert(i, bytes - copied);
+ return false;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
@@ -278,7 +278,17 @@ struct csum_state {
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csstate, struct iov_iter *i);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
-bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+
+static __always_inline __must_check
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes,
+ __wsum *csum, struct iov_iter *i)
+{
+ size_t copied = csum_and_copy_from_iter(addr, bytes, csum, i);
+ if (likely(copied == bytes))
+ return true;
+ iov_iter_revert(i, bytes - copied);
+ return false;
+}
size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index bdbe6691457d..ce9f8b9168ea 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -819,35 +819,6 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(_copy_from_iter);
-bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
-{
- char *to = addr;
- if (unlikely(iov_iter_is_pipe(i))) {
- WARN_ON(1);
- return false;
- }
- if (unlikely(i->count < bytes))
- return false;
-
- if (iter_is_iovec(i))
- might_fault();
- iterate_all_kinds(i, bytes, v, ({
- if (copyin((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len))
- return false;
- 0;}),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
- )
-
- iov_iter_advance(i, bytes);
- return true;
-}
-EXPORT_SYMBOL(_copy_from_iter_full);
-
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
char *to = addr;
@@ -907,32 +878,6 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
#endif
-bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
-{
- char *to = addr;
- if (unlikely(iov_iter_is_pipe(i))) {
- WARN_ON(1);
- return false;
- }
- if (unlikely(i->count < bytes))
- return false;
- iterate_all_kinds(i, bytes, v, ({
- if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len))
- return false;
- 0;}),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
- )
-
- iov_iter_advance(i, bytes);
- return true;
-}
-EXPORT_SYMBOL(_copy_from_iter_full_nocache);
-
static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
{
struct page *head;
@@ -1740,55 +1685,6 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
}
EXPORT_SYMBOL(csum_and_copy_from_iter);
-bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
- struct iov_iter *i)
-{
- char *to = addr;
- __wsum sum, next;
- size_t off = 0;
- sum = *csum;
- if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
- WARN_ON(1);
- return false;
- }
- if (unlikely(i->count < bytes))
- return false;
- iterate_all_kinds(i, bytes, v, ({
- next = csum_and_copy_from_user(v.iov_base,
- (to += v.iov_len) - v.iov_len,
- v.iov_len);
- if (!next)
- return false;
- sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- 0;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
- p + v.bv_offset, v.bv_len,
- sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
- }),({
- sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len,
- sum, off);
- off += v.iov_len;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
- p + v.bv_offset, v.bv_len,
- sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
- })
- )
- *csum = sum;
- iov_iter_advance(i, bytes);
- return true;
-}
-EXPORT_SYMBOL(csum_and_copy_from_iter_full);
-
size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
struct iov_iter *i)
{
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 10/37] iov_iter: reorder handling of flavours in primitives
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (7 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 09/37] iov_iter: switch ..._full() variants of primitives to use of iov_iter_revert() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 11/37] iov_iter_advance(): don't modify ->iov_offset for ITER_DISCARD Al Viro
` (26 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
iovec is the most common one; test it first and test explicitly,
rather than "not anything else". Replace all flavour checks with
use of iov_iter_is_...() helpers.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 91 +++++++++++++++++++++++++++++-----------------------------
1 file changed, 45 insertions(+), 46 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ce9f8b9168ea..bdcf1fbeb2db 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -117,22 +117,21 @@
#define iterate_all_kinds(i, n, v, I, B, K, X) { \
if (likely(n)) { \
size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
+ if (likely(iter_is_iovec(i))) { \
+ const struct iovec *iov; \
+ struct iovec v; \
+ iterate_iovec(i, n, v, iov, skip, (I)) \
+ } else if (iov_iter_is_bvec(i)) { \
struct bio_vec v; \
struct bvec_iter __bi; \
iterate_bvec(i, n, v, __bi, skip, (B)) \
- } else if (unlikely(i->type & ITER_KVEC)) { \
+ } else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec; \
struct kvec v; \
iterate_kvec(i, n, v, kvec, skip, (K)) \
- } else if (unlikely(i->type & ITER_DISCARD)) { \
- } else if (unlikely(i->type & ITER_XARRAY)) { \
+ } else if (iov_iter_is_xarray(i)) { \
struct bio_vec v; \
iterate_xarray(i, n, v, skip, (X)); \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
} \
} \
}
@@ -142,7 +141,17 @@
n = i->count; \
if (i->count) { \
size_t skip = i->iov_offset; \
- if (unlikely(i->type & ITER_BVEC)) { \
+ if (likely(iter_is_iovec(i))) { \
+ const struct iovec *iov; \
+ struct iovec v; \
+ iterate_iovec(i, n, v, iov, skip, (I)) \
+ if (skip == iov->iov_len) { \
+ iov++; \
+ skip = 0; \
+ } \
+ i->nr_segs -= iov - i->iov; \
+ i->iov = iov; \
+ } else if (iov_iter_is_bvec(i)) { \
const struct bio_vec *bvec = i->bvec; \
struct bio_vec v; \
struct bvec_iter __bi; \
@@ -150,7 +159,7 @@
i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
i->nr_segs -= i->bvec - bvec; \
skip = __bi.bi_bvec_done; \
- } else if (unlikely(i->type & ITER_KVEC)) { \
+ } else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec; \
struct kvec v; \
iterate_kvec(i, n, v, kvec, skip, (K)) \
@@ -160,21 +169,11 @@
} \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
- } else if (unlikely(i->type & ITER_DISCARD)) { \
- skip += n; \
- } else if (unlikely(i->type & ITER_XARRAY)) { \
+ } else if (iov_iter_is_xarray(i)) { \
struct bio_vec v; \
iterate_xarray(i, n, v, skip, (X)) \
- } else { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- if (skip == iov->iov_len) { \
- iov++; \
- skip = 0; \
- } \
- i->nr_segs -= iov - i->iov; \
- i->iov = iov; \
+ } else if (iov_iter_is_discard(i)) { \
+ skip += n; \
} \
i->count -= n; \
i->iov_offset = skip; \
@@ -905,20 +904,24 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
- if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
+ if (likely(iter_is_iovec(i)))
+ return copy_page_to_iter_iovec(page, offset, bytes, i);
+ if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
void *kaddr = kmap_atomic(page);
size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
return wanted;
- } else if (unlikely(iov_iter_is_discard(i))) {
+ }
+ if (iov_iter_is_pipe(i))
+ return copy_page_to_iter_pipe(page, offset, bytes, i);
+ if (unlikely(iov_iter_is_discard(i))) {
if (unlikely(i->count < bytes))
bytes = i->count;
i->count -= bytes;
return bytes;
- } else if (likely(!iov_iter_is_pipe(i)))
- return copy_page_to_iter_iovec(page, offset, bytes, i);
- else
- return copy_page_to_iter_pipe(page, offset, bytes, i);
+ }
+ WARN_ON(1);
+ return 0;
}
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
@@ -951,17 +954,16 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
{
if (unlikely(!page_copy_sane(page, offset, bytes)))
return 0;
- if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
- WARN_ON(1);
- return 0;
- }
- if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
+ if (likely(iter_is_iovec(i)))
+ return copy_page_from_iter_iovec(page, offset, bytes, i);
+ if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
void *kaddr = kmap_atomic(page);
size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
kunmap_atomic(kaddr);
return wanted;
- } else
- return copy_page_from_iter_iovec(page, offset, bytes, i);
+ }
+ WARN_ON(1);
+ return 0;
}
EXPORT_SYMBOL(copy_page_from_iter);
@@ -1203,16 +1205,13 @@ EXPORT_SYMBOL(iov_iter_revert);
*/
size_t iov_iter_single_seg_count(const struct iov_iter *i)
{
- if (unlikely(iov_iter_is_pipe(i)))
- return i->count; // it is a silly place, anyway
- if (i->nr_segs == 1)
- return i->count;
- if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
- return i->count;
- if (iov_iter_is_bvec(i))
- return min(i->count, i->bvec->bv_len - i->iov_offset);
- else
- return min(i->count, i->iov->iov_len - i->iov_offset);
+ if (i->nr_segs > 1) {
+ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
+ return min(i->count, i->iov->iov_len - i->iov_offset);
+ if (iov_iter_is_bvec(i))
+ return min(i->count, i->bvec->bv_len - i->iov_offset);
+ }
+ return i->count;
}
EXPORT_SYMBOL(iov_iter_single_seg_count);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 11/37] iov_iter_advance(): don't modify ->iov_offset for ITER_DISCARD
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (8 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 10/37] iov_iter: reorder handling of flavours in primitives Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 12/37] iov_iter: separate direction from flavour Al Viro
` (25 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
the field is not used for that flavour
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index bdcf1fbeb2db..e6c5834da32d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -172,8 +172,6 @@
} else if (iov_iter_is_xarray(i)) { \
struct bio_vec v; \
iterate_xarray(i, n, v, skip, (X)) \
- } else if (iov_iter_is_discard(i)) { \
- skip += n; \
} \
i->count -= n; \
i->iov_offset = skip; \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 12/37] iov_iter: separate direction from flavour
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (9 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 11/37] iov_iter_advance(): don't modify ->iov_offset for ITER_DISCARD Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 13/37] iov_iter: optimize iov_iter_advance() for iovec and kvec Al Viro
` (24 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Instead of having them mixed in iter->type, use separate ->iter_type
and ->data_source (u8 and bool resp.) And don't bother with (pseudo-)
bitmap for the former - microoptimizations from being able to check
if the flavour is one of two values are not worth the confusion for
optimizer. It can't prove that we never get e.g. ITER_IOVEC | ITER_PIPE,
so we end up with extra headache.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/linux/uio.h | 24 ++++++--------
lib/iov_iter.c | 92 +++++++++++++++++++++++++++++++++--------------------
2 files changed, 67 insertions(+), 49 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index c697c23138b5..56b6ff235281 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -19,21 +19,17 @@ struct kvec {
enum iter_type {
/* iter types */
- ITER_IOVEC = 4,
- ITER_KVEC = 8,
- ITER_BVEC = 16,
- ITER_PIPE = 32,
- ITER_DISCARD = 64,
- ITER_XARRAY = 128,
+ ITER_IOVEC,
+ ITER_KVEC,
+ ITER_BVEC,
+ ITER_PIPE,
+ ITER_XARRAY,
+ ITER_DISCARD,
};
struct iov_iter {
- /*
- * Bit 0 is the read/write bit, set if we're writing.
- * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
- * the caller isn't expecting to drop a page reference when done.
- */
- unsigned int type;
+ u8 iter_type;
+ bool data_source;
size_t iov_offset;
size_t count;
union {
@@ -55,7 +51,7 @@ struct iov_iter {
static inline enum iter_type iov_iter_type(const struct iov_iter *i)
{
- return i->type & ~(READ | WRITE);
+ return i->iter_type;
}
static inline bool iter_is_iovec(const struct iov_iter *i)
@@ -90,7 +86,7 @@ static inline bool iov_iter_is_xarray(const struct iov_iter *i)
static inline unsigned char iov_iter_rw(const struct iov_iter *i)
{
- return i->type & (READ | WRITE);
+ return i->data_source ? WRITE : READ;
}
/*
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e6c5834da32d..5a02c94a51ab 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -489,19 +489,26 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
size_t count)
{
WARN_ON(direction & ~(READ | WRITE));
- direction &= READ | WRITE;
/* It will get better. Eventually... */
- if (uaccess_kernel()) {
- i->type = ITER_KVEC | direction;
- i->kvec = (struct kvec *)iov;
- } else {
- i->type = ITER_IOVEC | direction;
- i->iov = iov;
- }
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
+ if (uaccess_kernel())
+ *i = (struct iov_iter) {
+ .iter_type = ITER_KVEC,
+ .data_source = direction,
+ .kvec = (struct kvec *)iov,
+ .nr_segs = nr_segs,
+ .iov_offset = 0,
+ .count = count
+ };
+ else
+ *i = (struct iov_iter) {
+ .iter_type = ITER_IOVEC,
+ .data_source = direction,
+ .iov = iov,
+ .nr_segs = nr_segs,
+ .iov_offset = 0,
+ .count = count
+ };
}
EXPORT_SYMBOL(iov_iter_init);
@@ -1218,11 +1225,14 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
size_t count)
{
WARN_ON(direction & ~(READ | WRITE));
- i->type = ITER_KVEC | (direction & (READ | WRITE));
- i->kvec = kvec;
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
+ *i = (struct iov_iter){
+ .iter_type = ITER_KVEC,
+ .data_source = direction,
+ .kvec = kvec,
+ .nr_segs = nr_segs,
+ .iov_offset = 0,
+ .count = count
+ };
}
EXPORT_SYMBOL(iov_iter_kvec);
@@ -1231,11 +1241,14 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
size_t count)
{
WARN_ON(direction & ~(READ | WRITE));
- i->type = ITER_BVEC | (direction & (READ | WRITE));
- i->bvec = bvec;
- i->nr_segs = nr_segs;
- i->iov_offset = 0;
- i->count = count;
+ *i = (struct iov_iter){
+ .iter_type = ITER_BVEC,
+ .data_source = direction,
+ .bvec = bvec,
+ .nr_segs = nr_segs,
+ .iov_offset = 0,
+ .count = count
+ };
}
EXPORT_SYMBOL(iov_iter_bvec);
@@ -1245,12 +1258,15 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
{
BUG_ON(direction != READ);
WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
- i->type = ITER_PIPE | READ;
- i->pipe = pipe;
- i->head = pipe->head;
- i->iov_offset = 0;
- i->count = count;
- i->start_head = i->head;
+ *i = (struct iov_iter){
+ .iter_type = ITER_PIPE,
+ .data_source = false,
+ .pipe = pipe,
+ .head = pipe->head,
+ .start_head = pipe->head,
+ .iov_offset = 0,
+ .count = count
+ };
}
EXPORT_SYMBOL(iov_iter_pipe);
@@ -1271,11 +1287,14 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
struct xarray *xarray, loff_t start, size_t count)
{
BUG_ON(direction & ~1);
- i->type = ITER_XARRAY | (direction & (READ | WRITE));
- i->xarray = xarray;
- i->xarray_start = start;
- i->count = count;
- i->iov_offset = 0;
+ *i = (struct iov_iter) {
+ .iter_type = ITER_XARRAY,
+ .data_source = direction,
+ .xarray = xarray,
+ .xarray_start = start,
+ .count = count,
+ .iov_offset = 0
+ };
}
EXPORT_SYMBOL(iov_iter_xarray);
@@ -1291,9 +1310,12 @@ EXPORT_SYMBOL(iov_iter_xarray);
void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
{
BUG_ON(direction != READ);
- i->type = ITER_DISCARD | READ;
- i->count = count;
- i->iov_offset = 0;
+ *i = (struct iov_iter){
+ .iter_type = ITER_DISCARD,
+ .data_source = false,
+ .count = count,
+ .iov_offset = 0
+ };
}
EXPORT_SYMBOL(iov_iter_discard);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 13/37] iov_iter: optimize iov_iter_advance() for iovec and kvec
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (10 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 12/37] iov_iter: separate direction from flavour Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 14/37] sanitize iov_iter_fault_in_readable() Al Viro
` (23 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
We can do better than generic iterate_and_advance() for this one;
inspired by bvec_iter_advance() (and massaged into that form by
equivalent transformations).
[fixed a braino caught by kernel test robot <oliver.sang@intel.com>]
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5a02c94a51ab..5621a3457118 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1107,28 +1107,42 @@ static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
i->iov_offset = bi.bi_bvec_done;
}
+static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
+{
+ const struct iovec *iov, *end;
+
+ if (!i->count)
+ return;
+ i->count -= size;
+
+ size += i->iov_offset; // from beginning of current segment
+ for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
+ if (likely(size < iov->iov_len))
+ break;
+ size -= iov->iov_len;
+ }
+ i->iov_offset = size;
+ i->nr_segs -= iov - i->iov;
+ i->iov = iov;
+}
+
void iov_iter_advance(struct iov_iter *i, size_t size)
{
if (unlikely(i->count < size))
size = i->count;
- if (unlikely(iov_iter_is_pipe(i))) {
+ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
+ /* iovec and kvec have identical layouts */
+ iov_iter_iovec_advance(i, size);
+ } else if (iov_iter_is_bvec(i)) {
+ iov_iter_bvec_advance(i, size);
+ } else if (iov_iter_is_pipe(i)) {
pipe_advance(i, size);
- return;
- }
- if (unlikely(iov_iter_is_discard(i))) {
- i->count -= size;
- return;
- }
- if (unlikely(iov_iter_is_xarray(i))) {
+ } else if (unlikely(iov_iter_is_xarray(i))) {
i->iov_offset += size;
i->count -= size;
- return;
- }
- if (iov_iter_is_bvec(i)) {
- iov_iter_bvec_advance(i, size);
- return;
+ } else if (iov_iter_is_discard(i)) {
+ i->count -= size;
}
- iterate_and_advance(i, size, v, 0, 0, 0, 0)
}
EXPORT_SYMBOL(iov_iter_advance);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 14/37] sanitize iov_iter_fault_in_readable()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (11 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 13/37] iov_iter: optimize iov_iter_advance() for iovec and kvec Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 15/37] iov_iter_alignment(): don't bother with iterate_all_kinds() Al Viro
` (22 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
1) constify iov_iter argument; we are not advancing it in this primitive.
2) cap the amount requested by the amount of data in iov_iter. All
existing callers should've been safe, but the check is really cheap and
doing it here makes for easier analysis, as well as more consistent
semantics among the primitives.
3) don't bother with iterate_iovec(). Explicit loop is not any harder
to follow, and we get rid of standalone iterate_iovec() users - it's
only used by iterate_and_advance() and (soon to be gone) iterate_all_kinds().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/linux/uio.h | 2 +-
lib/iov_iter.c | 26 ++++++++++++++++----------
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 56b6ff235281..18b4e0a8e3bf 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -119,7 +119,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
struct iov_iter *i, unsigned long offset, size_t bytes);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes);
size_t iov_iter_single_seg_count(const struct iov_iter *i);
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5621a3457118..21b3e253b766 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -466,19 +466,25 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
* Return 0 on success, or non-zero if the memory could not be accessed (i.e.
* because it is an invalid address).
*/
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
{
- size_t skip = i->iov_offset;
- const struct iovec *iov;
- int err;
- struct iovec v;
-
if (iter_is_iovec(i)) {
- iterate_iovec(i, bytes, v, iov, skip, ({
- err = fault_in_pages_readable(v.iov_base, v.iov_len);
+ const struct iovec *p;
+ size_t skip;
+
+ if (bytes > i->count)
+ bytes = i->count;
+ for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
+ size_t len = min(bytes, p->iov_len - skip);
+ int err;
+
+ if (unlikely(!len))
+ continue;
+ err = fault_in_pages_readable(p->iov_base + skip, len);
if (unlikely(err))
- return err;
- 0;}))
+ return err;
+ bytes -= len;
+ }
}
return 0;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 15/37] iov_iter_alignment(): don't bother with iterate_all_kinds()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (12 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 14/37] sanitize iov_iter_fault_in_readable() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds() Al Viro
` (21 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
It's easier to go over the array manually. We need to watch out
for truncated iov_iter, though - iovec array might cover more
than i->count.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 53 insertions(+), 10 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 21b3e253b766..bb7089cd0cf7 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1339,27 +1339,70 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
}
EXPORT_SYMBOL(iov_iter_discard);
-unsigned long iov_iter_alignment(const struct iov_iter *i)
+static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
{
unsigned long res = 0;
size_t size = i->count;
+ size_t skip = i->iov_offset;
+ unsigned k;
+
+ for (k = 0; k < i->nr_segs; k++, skip = 0) {
+ size_t len = i->iov[k].iov_len - skip;
+ if (len) {
+ res |= (unsigned long)i->iov[k].iov_base + skip;
+ if (len > size)
+ len = size;
+ res |= len;
+ size -= len;
+ if (!size)
+ break;
+ }
+ }
+ return res;
+}
- if (unlikely(iov_iter_is_pipe(i))) {
+static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
+{
+ unsigned res = 0;
+ size_t size = i->count;
+ unsigned skip = i->iov_offset;
+ unsigned k;
+
+ for (k = 0; k < i->nr_segs; k++, skip = 0) {
+ size_t len = i->bvec[k].bv_len - skip;
+ res |= (unsigned long)i->bvec[k].bv_offset + skip;
+ if (len > size)
+ len = size;
+ res |= len;
+ size -= len;
+ if (!size)
+ break;
+ }
+ return res;
+}
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+ /* iovec and kvec have identical layouts */
+ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
+ return iov_iter_alignment_iovec(i);
+
+ if (iov_iter_is_bvec(i))
+ return iov_iter_alignment_bvec(i);
+
+ if (iov_iter_is_pipe(i)) {
unsigned int p_mask = i->pipe->ring_size - 1;
+ size_t size = i->count;
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
return size | i->iov_offset;
return size;
}
- if (unlikely(iov_iter_is_xarray(i)))
+
+ if (iov_iter_is_xarray(i))
return (i->xarray_start + i->iov_offset) | i->count;
- iterate_all_kinds(i, size, v,
- (res |= (unsigned long)v.iov_base | v.iov_len, 0),
- res |= v.bv_offset | v.bv_len,
- res |= (unsigned long)v.iov_base | v.iov_len,
- res |= v.bv_offset | v.bv_len
- )
- return res;
+
+ return 0;
}
EXPORT_SYMBOL(iov_iter_alignment);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (13 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 15/37] iov_iter_alignment(): don't bother with iterate_all_kinds() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-09 13:01 ` Qian Cai
2021-06-06 19:10 ` [RFC PATCH 17/37] get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc() Al Viro
` (20 subsequent siblings)
35 siblings, 1 reply; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
For one thing, it's only used for iovec (and makes sense only for those).
For another, here we don't care about iov_offset, since the beginning of
the first segment and the end of the last one are ignored. So it makes
a lot more sense to just walk through the iovec array...
We need to deal with the case of truncated iov_iter, but unlike the
situation with iov_iter_alignment() we don't care where the last
segment ends - just which segment is the last one.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index bb7089cd0cf7..a6947301b9a0 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1409,23 +1409,26 @@ EXPORT_SYMBOL(iov_iter_alignment);
unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
{
unsigned long res = 0;
+ unsigned long v = 0;
size_t size = i->count;
+ unsigned k;
- if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
+ if (unlikely(iter_is_iovec(i))) {
WARN_ON(1);
return ~0U;
}
- iterate_all_kinds(i, size, v,
- (res |= (!res ? 0 : (unsigned long)v.iov_base) |
- (size != v.iov_len ? size : 0), 0),
- (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
- (size != v.bv_len ? size : 0)),
- (res |= (!res ? 0 : (unsigned long)v.iov_base) |
- (size != v.iov_len ? size : 0)),
- (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
- (size != v.bv_len ? size : 0))
- );
+ for (k = 0; k < i->nr_segs; k++) {
+ if (i->iov[k].iov_len) {
+ unsigned long base = (unsigned long)i->iov[k].iov_base;
+ if (v) // if not the first one
+ res |= base | v; // this start | previous end
+ v = base + i->iov[k].iov_len;
+ if (size <= i->iov[k].iov_len)
+ break;
+ size -= i->iov[k].iov_len;
+ }
+ }
return res;
}
EXPORT_SYMBOL(iov_iter_gap_alignment);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* Re: [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds()
2021-06-06 19:10 ` [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds() Al Viro
@ 2021-06-09 13:01 ` Qian Cai
2021-06-09 18:06 ` Al Viro
0 siblings, 1 reply; 57+ messages in thread
From: Qian Cai @ 2021-06-09 13:01 UTC (permalink / raw)
To: Al Viro, Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
On 6/6/2021 3:10 PM, Al Viro wrote:
> For one thing, it's only used for iovec (and makes sense only for those).
> For another, here we don't care about iov_offset, since the beginning of
> the first segment and the end of the last one are ignored. So it makes
> a lot more sense to just walk through the iovec array...
>
> We need to deal with the case of truncated iov_iter, but unlike the
> situation with iov_iter_alignment() we don't care where the last
> segment ends - just which segment is the last one.
>
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Al, today's linux-next started to give a boot warning. I looked at the code and noticed this series is new. Any thoughts?
[ 70.060822][ T1286] WARNING: CPU: 26 PID: 1286 at lib/iov_iter.c:1320 iov_iter_gap_alignment+0x144/0x1a8
[ 70.070323][ T1286] Modules linked in: loop cppc_cpufreq processor efivarfs ip_tables x_tables ext4 mbcache jbd2 dm_mod igb i2c_algo_bit nvme mlx5_core i2c_core nvme_core firmware_class
[ 70.086922][ T1286] CPU: 26 PID: 1286 Comm: fwupd Tainted: G W 5.13.0-rc5-next-20210609+ #19
[ 70.096668][ T1286] Hardware name: MiTAC RAPTOR EV-883832-X3-0001/RAPTOR, BIOS 1.6 06/28/2020
[ 70.105196][ T1286] pstate: 10000005 (nzcV daif -PAN -UAO -TCO BTYPE=--)
[ 70.111902][ T1286] pc : iov_iter_gap_alignment+0x144/0x1a8
[ 70.117485][ T1286] lr : blk_rq_map_user_iov+0xd00/0xff0
[ 70.122808][ T1286] sp : ffff80002360f680
[ 70.126821][ T1286] x29: ffff80002360f680 x28: 0000000000000000 x27: ffff00005dc8d620
[ 70.134670][ T1286] x26: 0000000000001000 x25: ffff80002360f980 x24: 0000000000001000
[ 70.142518][ T1286] x23: 0000000000000000 x22: 0000000000000007 x21: 1ffff000046c1f30
[ 70.150365][ T1286] x20: 0000000000000fff x19: ffff80002360f980 x18: 0000000000000000
[ 70.158214][ T1286] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[ 70.166067][ T1286] x14: 0000000000000045 x13: 0000000000000000 x12: 000000000000f1f1
[ 70.173915][ T1286] x11: ffff00093d700040 x10: 000000000000f2f2 x9 : 00000000f3000000
[ 70.181763][ T1286] x8 : dfff800000000000 x7 : 00000000f3f3f3f3 x6 : 00000000f2000000
[ 70.189610][ T1286] x5 : 0000000000000000 x4 : 0000000000000cc0 x3 : 1ffff000046c1f30
[ 70.197459][ T1286] x2 : 1ffff000046c1f30 x1 : 0000000000000000 x0 : 0000000000000000
[ 70.205307][ T1286] Call trace:
[ 70.208453][ T1286] iov_iter_gap_alignment+0x144/0x1a8
[ 70.213693][ T1286] blk_rq_map_user_iov+0xd00/0xff0
[ 70.218668][ T1286] blk_rq_map_user+0xf4/0x190
[ 70.223209][ T1286] nvme_submit_user_cmd.isra.0+0x150/0x4e0 [nvme_core]
[ 70.229943][ T1286] nvme_user_cmd.isra.0+0x298/0x3d0 [nvme_core]
[ 70.236063][ T1286] nvme_dev_ioctl+0x19c/0x3c8 [nvme_core]
[ 70.241661][ T1286] __arm64_sys_ioctl+0x114/0x180
[ 70.246463][ T1286] invoke_syscall.constprop.0+0xdc/0x1d8
[ 70.251959][ T1286] do_el0_svc+0x1f8/0x298
[ 70.256151][ T1286] el0_svc+0x20/0x30
[ 70.259910][ T1286] el0t_64_sync_handler+0xb0/0xb8
[ 70.264796][ T1286] el0t_64_sync+0x178/0x17c
[ 70.269161][ T1286] irq event stamp: 254526
[ 70.273348][ T1286] hardirqs last enabled at (254525): [<ffff8000102cae90>] seqcount_lockdep_reader_access.constprop.0+0x138/0x190
[ 70.285183][ T1286] hardirqs last disabled at (254526): [<ffff8000111371c0>] el1_dbg+0x28/0x80
[ 70.293805][ T1286] softirqs last enabled at (250668): [<ffff800010010a90>] _stext+0xa90/0x1114
[ 70.302597][ T1286] softirqs last disabled at (250661): [<ffff800010114f64>] irq_exit+0x53c/0x610
> ---
> lib/iov_iter.c | 25 ++++++++++++++-----------
> 1 file changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index bb7089cd0cf7..a6947301b9a0 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1409,23 +1409,26 @@ EXPORT_SYMBOL(iov_iter_alignment);
> unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
> {
> unsigned long res = 0;
> + unsigned long v = 0;
> size_t size = i->count;
> + unsigned k;
>
> - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
> + if (unlikely(iter_is_iovec(i))) {
> WARN_ON(1);
> return ~0U;
> }
>
> - iterate_all_kinds(i, size, v,
> - (res |= (!res ? 0 : (unsigned long)v.iov_base) |
> - (size != v.iov_len ? size : 0), 0),
> - (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
> - (size != v.bv_len ? size : 0)),
> - (res |= (!res ? 0 : (unsigned long)v.iov_base) |
> - (size != v.iov_len ? size : 0)),
> - (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
> - (size != v.bv_len ? size : 0))
> - );
> + for (k = 0; k < i->nr_segs; k++) {
> + if (i->iov[k].iov_len) {
> + unsigned long base = (unsigned long)i->iov[k].iov_base;
> + if (v) // if not the first one
> + res |= base | v; // this start | previous end
> + v = base + i->iov[k].iov_len;
> + if (size <= i->iov[k].iov_len)
> + break;
> + size -= i->iov[k].iov_len;
> + }
> + }
> return res;
> }
> EXPORT_SYMBOL(iov_iter_gap_alignment);
>
^ permalink raw reply [flat|nested] 57+ messages in thread
* Re: [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds()
2021-06-09 13:01 ` Qian Cai
@ 2021-06-09 18:06 ` Al Viro
0 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-09 18:06 UTC (permalink / raw)
To: Qian Cai
Cc: Linus Torvalds, linux-fsdevel, linux-kernel, David Sterba,
Miklos Szeredi, Anton Altaparmakov, David Howells,
Matthew Wilcox, Pavel Begunkov
On Wed, Jun 09, 2021 at 09:01:36AM -0400, Qian Cai wrote:
> On 6/6/2021 3:10 PM, Al Viro wrote:
> > For one thing, it's only used for iovec (and makes sense only for those).
^^^^^^^^^^^^^^^^^^^
[snip]
> > - if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
> > + if (unlikely(iter_is_iovec(i))) {
^^^^^^^^^^^^^^^^
This. A nice demonstration of braino repeatedly overlooked on read-through,
especially when the change described in commit message is obvious and
looks similar to the change done in the patch.
Happens without any deliberate attacks involved - as the matter of fact,
it's easier to spot that kind of crap in somebody else's patch...
Anyway, the obvious fix (
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 3a68f578695f..6569e3f5d01d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1402,10 +1402,8 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
size_t size = i->count;
unsigned k;
- if (unlikely(iter_is_iovec(i))) {
- WARN_ON(1);
+ if (WARN_ON(!iter_is_iovec(i)))
return ~0U;
- }
for (k = 0; k < i->nr_segs; k++) {
if (i->iov[k].iov_len) {
) folded in and pushed...
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 17/37] get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (14 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 16/37] iov_iter_gap_alignment(): get rid of iterate_all_kinds() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 18/37] iov_iter_npages(): don't bother with iterate_all_kinds() Al Viro
` (19 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Here iterate_all_kinds() is used just to find the first (non-empty, in
case of iovec) segment. Which can be easily done explicitly.
Note that in bvec case we now can get more than PAGE_SIZE worth of them,
in case when we have a compound page in bvec and a range that crosses
a subpage boundary. Older behaviour had been to stop on that boundary;
we used to get the right first page (for_each_bvec() took care of that),
but that was all we'd got.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 147 +++++++++++++++++++++++++++++++++++----------------------
1 file changed, 91 insertions(+), 56 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index a6947301b9a0..5e8d5e4ee92d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1463,9 +1463,6 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
unsigned int iter_head, npages;
size_t capacity;
- if (!maxsize)
- return 0;
-
if (!sanity(i))
return -EFAULT;
@@ -1546,29 +1543,67 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
return actual;
}
+/* must be done on non-empty ITER_IOVEC one */
+static unsigned long first_iovec_segment(const struct iov_iter *i,
+ size_t *size, size_t *start,
+ size_t maxsize, unsigned maxpages)
+{
+ size_t skip;
+ long k;
+
+ for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
+ unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
+ size_t len = i->iov[k].iov_len - skip;
+
+ if (unlikely(!len))
+ continue;
+ if (len > maxsize)
+ len = maxsize;
+ len += (*start = addr % PAGE_SIZE);
+ if (len > maxpages * PAGE_SIZE)
+ len = maxpages * PAGE_SIZE;
+ *size = len;
+ return addr & PAGE_MASK;
+ }
+ BUG(); // if it had been empty, we wouldn't get called
+}
+
+/* must be done on non-empty ITER_BVEC one */
+static struct page *first_bvec_segment(const struct iov_iter *i,
+ size_t *size, size_t *start,
+ size_t maxsize, unsigned maxpages)
+{
+ struct page *page;
+ size_t skip = i->iov_offset, len;
+
+ len = i->bvec->bv_len - skip;
+ if (len > maxsize)
+ len = maxsize;
+ skip += i->bvec->bv_offset;
+ page = i->bvec->bv_page + skip / PAGE_SIZE;
+ len += (*start = skip % PAGE_SIZE);
+ if (len > maxpages * PAGE_SIZE)
+ len = maxpages * PAGE_SIZE;
+ *size = len;
+ return page;
+}
+
ssize_t iov_iter_get_pages(struct iov_iter *i,
struct page **pages, size_t maxsize, unsigned maxpages,
size_t *start)
{
+ size_t len;
+ int n, res;
+
if (maxsize > i->count)
maxsize = i->count;
+ if (!maxsize)
+ return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return pipe_get_pages(i, pages, maxsize, maxpages, start);
- if (unlikely(iov_iter_is_xarray(i)))
- return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
- if (unlikely(iov_iter_is_discard(i)))
- return -EFAULT;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
+ if (likely(iter_is_iovec(i))) {
+ unsigned long addr;
- if (len > maxpages * PAGE_SIZE)
- len = maxpages * PAGE_SIZE;
- addr &= ~(PAGE_SIZE - 1);
+ addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
n = DIV_ROUND_UP(len, PAGE_SIZE);
res = get_user_pages_fast(addr, n,
iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
@@ -1576,17 +1611,21 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
if (unlikely(res < 0))
return res;
return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- get_page(*pages = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- }),
- 0
- )
- return 0;
+ }
+ if (iov_iter_is_bvec(i)) {
+ struct page *page;
+
+ page = first_bvec_segment(i, &len, start, maxsize, maxpages);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+ while (n--)
+ get_page(*pages++ = page++);
+ return len - *start;
+ }
+ if (iov_iter_is_pipe(i))
+ return pipe_get_pages(i, pages, maxsize, maxpages, start);
+ if (iov_iter_is_xarray(i))
+ return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
+ return -EFAULT;
}
EXPORT_SYMBOL(iov_iter_get_pages);
@@ -1603,9 +1642,6 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
unsigned int iter_head, npages;
ssize_t n;
- if (!maxsize)
- return 0;
-
if (!sanity(i))
return -EFAULT;
@@ -1678,24 +1714,18 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
size_t *start)
{
struct page **p;
+ size_t len;
+ int n, res;
if (maxsize > i->count)
maxsize = i->count;
+ if (!maxsize)
+ return 0;
- if (unlikely(iov_iter_is_pipe(i)))
- return pipe_get_pages_alloc(i, pages, maxsize, start);
- if (unlikely(iov_iter_is_xarray(i)))
- return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
- if (unlikely(iov_iter_is_discard(i)))
- return -EFAULT;
-
- iterate_all_kinds(i, maxsize, v, ({
- unsigned long addr = (unsigned long)v.iov_base;
- size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
- int n;
- int res;
+ if (likely(iter_is_iovec(i))) {
+ unsigned long addr;
- addr &= ~(PAGE_SIZE - 1);
+ addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
n = DIV_ROUND_UP(len, PAGE_SIZE);
p = get_pages_array(n);
if (!p)
@@ -1708,19 +1738,24 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
}
*pages = p;
return (res == n ? len : res * PAGE_SIZE) - *start;
- 0;}),({
- /* can't be more than PAGE_SIZE */
- *start = v.bv_offset;
- *pages = p = get_pages_array(1);
+ }
+ if (iov_iter_is_bvec(i)) {
+ struct page *page;
+
+ page = first_bvec_segment(i, &len, start, maxsize, ~0U);
+ n = DIV_ROUND_UP(len, PAGE_SIZE);
+ *pages = p = get_pages_array(n);
if (!p)
return -ENOMEM;
- get_page(*p = v.bv_page);
- return v.bv_len;
- }),({
- return -EFAULT;
- }), 0
- )
- return 0;
+ while (n--)
+ get_page(*p++ = page++);
+ return len - *start;
+ }
+ if (iov_iter_is_pipe(i))
+ return pipe_get_pages_alloc(i, pages, maxsize, start);
+ if (iov_iter_is_xarray(i))
+ return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
+ return -EFAULT;
}
EXPORT_SYMBOL(iov_iter_get_pages_alloc);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 18/37] iov_iter_npages(): don't bother with iterate_all_kinds()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (15 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 17/37] get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 19/37] [xarray] iov_iter_npages(): just use DIV_ROUND_UP() Al Viro
` (18 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
note that in bvec case pages can be compound ones - we can't just assume
that each segment is covered by one (sub)page
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 88 +++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 54 insertions(+), 34 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e8d5e4ee92d..04c81481d309 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1877,19 +1877,56 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
}
EXPORT_SYMBOL(hash_and_copy_to_iter);
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int iov_npages(const struct iov_iter *i, int maxpages)
{
- size_t size = i->count;
+ size_t skip = i->iov_offset, size = i->count;
+ const struct iovec *p;
int npages = 0;
- if (!size)
- return 0;
- if (unlikely(iov_iter_is_discard(i)))
- return 0;
+ for (p = i->iov; size; skip = 0, p++) {
+ unsigned offs = offset_in_page(p->iov_base + skip);
+ size_t len = min(p->iov_len - skip, size);
- if (unlikely(iov_iter_is_pipe(i))) {
- struct pipe_inode_info *pipe = i->pipe;
+ if (len) {
+ size -= len;
+ npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
+ if (unlikely(npages > maxpages))
+ return maxpages;
+ }
+ }
+ return npages;
+}
+
+static int bvec_npages(const struct iov_iter *i, int maxpages)
+{
+ size_t skip = i->iov_offset, size = i->count;
+ const struct bio_vec *p;
+ int npages = 0;
+
+ for (p = i->bvec; size; skip = 0, p++) {
+ unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
+ size_t len = min(p->bv_len - skip, size);
+
+ size -= len;
+ npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
+ if (unlikely(npages > maxpages))
+ return maxpages;
+ }
+ return npages;
+}
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+ if (unlikely(!i->count))
+ return 0;
+ /* iovec and kvec have identical layouts */
+ if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
+ return iov_npages(i, maxpages);
+ if (iov_iter_is_bvec(i))
+ return bvec_npages(i, maxpages);
+ if (iov_iter_is_pipe(i)) {
unsigned int iter_head;
+ int npages;
size_t off;
if (!sanity(i))
@@ -1897,11 +1934,13 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
data_start(i, &iter_head, &off);
/* some of this one + all after this one */
- npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
- if (npages >= maxpages)
- return maxpages;
- } else if (unlikely(iov_iter_is_xarray(i))) {
+ npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
+ return min(npages, maxpages);
+ }
+ if (iov_iter_is_xarray(i)) {
+ size_t size = i->count;
unsigned offset;
+ int npages;
offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
@@ -1913,28 +1952,9 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
if (size)
npages++;
}
- if (npages >= maxpages)
- return maxpages;
- } else iterate_all_kinds(i, size, v, ({
- unsigned long p = (unsigned long)v.iov_base;
- npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
- - p / PAGE_SIZE;
- if (npages >= maxpages)
- return maxpages;
- 0;}),({
- npages++;
- if (npages >= maxpages)
- return maxpages;
- }),({
- unsigned long p = (unsigned long)v.iov_base;
- npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
- - p / PAGE_SIZE;
- if (npages >= maxpages)
- return maxpages;
- }),
- 0
- )
- return npages;
+ return min(npages, maxpages);
+ }
+ return 0;
}
EXPORT_SYMBOL(iov_iter_npages);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 19/37] [xarray] iov_iter_npages(): just use DIV_ROUND_UP()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (16 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 18/37] iov_iter_npages(): don't bother with iterate_all_kinds() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 20/37] iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant Al Viro
` (17 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Compiler is capable of recognizing division by power of 2 and turning
it into shifts.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 04c81481d309..6a968d2ff081 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1938,20 +1938,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
return min(npages, maxpages);
}
if (iov_iter_is_xarray(i)) {
- size_t size = i->count;
- unsigned offset;
- int npages;
-
- offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
-
- npages = 1;
- if (size > PAGE_SIZE - offset) {
- size -= PAGE_SIZE - offset;
- npages += size >> PAGE_SHIFT;
- size &= ~PAGE_MASK;
- if (size)
- npages++;
- }
+ unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
+ int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
return min(npages, maxpages);
}
return 0;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 20/37] iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (17 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 19/37] [xarray] iov_iter_npages(): just use DIV_ROUND_UP() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 21/37] csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter() Al Viro
` (16 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Replacement is called copy_page_from_iter_atomic(); unlike the old primitive the
callers do *not* need to do iov_iter_advance() after it. In case when they end
up consuming less than they'd been given they need to do iov_iter_revert() on
everything they had not consumed. That, however, needs to be done only on slow
paths.
All in-tree callers converted. And that kills the last user of iterate_all_kinds()
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
Documentation/filesystems/porting.rst | 9 +++++++++
fs/btrfs/file.c | 23 +++++++++++------------
fs/fuse/file.c | 3 +--
fs/iomap/buffered-io.c | 14 +++++++-------
fs/ntfs/file.c | 4 +---
include/linux/uio.h | 4 ++--
lib/iov_iter.c | 30 ++++--------------------------
mm/filemap.c | 16 ++++++++--------
8 files changed, 43 insertions(+), 60 deletions(-)
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 0302035781be..43b492d08dec 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -890,3 +890,12 @@ been called or returned with non -EIOCBQUEUED code.
mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
whereas previously it could be paired with mnt_drop_write() as well.
+
+---
+
+**mandatory**
+
+iov_iter_copy_from_user_atomic() is gone; use copy_page_from_iter_atomic().
+The difference is copy_page_from_iter_atomic() advances the iterator and
+you don't need iov_iter_advance() after it. However, if you decide to use
+only a part of obtained data, you should do iov_iter_revert().
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 864c08d08a35..78cb8f9eaa6b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -398,7 +398,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
/*
* Copy data from userspace to the current page
*/
- copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+ copied = copy_page_from_iter_atomic(page, offset, count, i);
/* Flush processor's dcache for this page */
flush_dcache_page(page);
@@ -412,20 +412,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
* The rest of the btrfs_file_write code will fall
* back to page at a time copies after we return 0.
*/
- if (!PageUptodate(page) && copied < count)
- copied = 0;
+ if (unlikely(copied < count)) {
+ if (!PageUptodate(page)) {
+ iov_iter_revert(i, copied);
+ copied = 0;
+ }
+ if (!copied)
+ break;
+ }
- iov_iter_advance(i, copied);
write_bytes -= copied;
total_copied += copied;
-
- /* Return to btrfs_file_write_iter to fault page */
- if (unlikely(copied == 0))
- break;
-
- if (copied < PAGE_SIZE - offset) {
- offset += copied;
- } else {
+ offset += copied;
+ if (offset == PAGE_SIZE) {
pg++;
offset = 0;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 44bd301fa4fb..4722fa31a185 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1171,10 +1171,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+ tmp = copy_page_from_iter_atomic(page, offset, bytes, ii);
flush_dcache_page(page);
- iov_iter_advance(ii, tmp);
if (!tmp) {
unlock_page(page);
put_page(page);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 354b41d20e5d..c5ff13e0e7cf 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -785,13 +785,15 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (mapping_writably_mapped(inode->i_mapping))
flush_dcache_page(page);
- copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+ copied = copy_page_from_iter_atomic(page, offset, bytes, i);
status = iomap_write_end(inode, pos, bytes, copied, page, iomap,
srcmap);
- cond_resched();
+ if (unlikely(copied != status))
+ iov_iter_revert(i, copied - status);
+ cond_resched();
if (unlikely(status == 0)) {
/*
* A short copy made iomap_write_end() reject the
@@ -803,11 +805,9 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
bytes = copied;
goto again;
}
- copied = status;
- iov_iter_advance(i, copied);
- pos += copied;
- written += copied;
- length -= copied;
+ pos += status;
+ written += status;
+ length -= status;
balance_dirty_pages_ratelimited(inode->i_mapping);
} while (iov_iter_count(i) && length);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 0666d4578137..ab4f3362466d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1690,9 +1690,7 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
len = PAGE_SIZE - ofs;
if (len > bytes)
len = bytes;
- copied = iov_iter_copy_from_user_atomic(*pages, i, ofs,
- len);
- iov_iter_advance(i, copied);
+ copied = copy_page_from_iter_atomic(*pages, ofs, len, i);
total += copied;
bytes -= copied;
if (!bytes)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 18b4e0a8e3bf..fd88d9911dad 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -115,8 +115,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
};
}
-size_t iov_iter_copy_from_user_atomic(struct page *page,
- struct iov_iter *i, unsigned long offset, size_t bytes);
+size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+ size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6a968d2ff081..dbd6b92f6200 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -114,28 +114,6 @@
n = wanted - n; \
}
-#define iterate_all_kinds(i, n, v, I, B, K, X) { \
- if (likely(n)) { \
- size_t skip = i->iov_offset; \
- if (likely(iter_is_iovec(i))) { \
- const struct iovec *iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
- } else if (iov_iter_is_bvec(i)) { \
- struct bio_vec v; \
- struct bvec_iter __bi; \
- iterate_bvec(i, n, v, __bi, skip, (B)) \
- } else if (iov_iter_is_kvec(i)) { \
- const struct kvec *kvec; \
- struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
- } else if (iov_iter_is_xarray(i)) { \
- struct bio_vec v; \
- iterate_xarray(i, n, v, skip, (X)); \
- } \
- } \
-}
-
#define iterate_and_advance(i, n, v, I, B, K, X) { \
if (unlikely(i->count < n)) \
n = i->count; \
@@ -1020,8 +998,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_zero);
-size_t iov_iter_copy_from_user_atomic(struct page *page,
- struct iov_iter *i, unsigned long offset, size_t bytes)
+size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
+ struct iov_iter *i)
{
char *kaddr = kmap_atomic(page), *p = kaddr + offset;
if (unlikely(!page_copy_sane(page, offset, bytes))) {
@@ -1033,7 +1011,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
WARN_ON(1);
return 0;
}
- iterate_all_kinds(i, bytes, v,
+ iterate_and_advance(i, bytes, v,
copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
v.bv_offset, v.bv_len),
@@ -1044,7 +1022,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
kunmap_atomic(kaddr);
return bytes;
}
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+EXPORT_SYMBOL(copy_page_from_iter_atomic);
static inline void pipe_truncate(struct iov_iter *i)
{
diff --git a/mm/filemap.c b/mm/filemap.c
index 0be24942bf8e..cf9de790f493 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3661,14 +3661,16 @@ ssize_t generic_perform_write(struct file *file,
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
+ copied = copy_page_from_iter_atomic(page, offset, bytes, i);
flush_dcache_page(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
page, fsdata);
- if (unlikely(status < 0))
- break;
-
+ if (unlikely(status != copied)) {
+ iov_iter_revert(i, copied - max(status, 0L));
+ if (unlikely(status < 0))
+ break;
+ }
cond_resched();
if (unlikely(status == 0)) {
@@ -3682,10 +3684,8 @@ ssize_t generic_perform_write(struct file *file,
bytes = copied;
goto again;
}
- copied = status;
- iov_iter_advance(i, copied);
- pos += copied;
- written += copied;
+ pos += status;
+ written += status;
balance_dirty_pages_ratelimited(mapping);
} while (iov_iter_count(i));
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 21/37] csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (18 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 20/37] iov_iter: replace iov_iter_copy_from_user_atomic() with iterator-advancing variant Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 22/37] iterate_and_advance(): get rid of magic in case when n is 0 Al Viro
` (15 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Namely, have off counted starting from 0 rather than from csstate->off.
To compensate we need to shift the initial value (csstate->sum) (rotate
by 8 bits, as usual for csum) and do the same after we are finished adding
the pieces up.
What we get out of that is a bit more redundancy in our variables - from
is always equal to addr + off, which will be useful several commits down
the road.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/net/checksum.h | 14 ++++++++------
lib/iov_iter.c | 8 ++++----
2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 0d05b9e8690b..5b96d5bd6e54 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -80,16 +80,18 @@ static inline __sum16 csum16_sub(__sum16 csum, __be16 addend)
return csum16_add(csum, ~addend);
}
-static inline __wsum
-csum_block_add(__wsum csum, __wsum csum2, int offset)
+static inline __wsum csum_shift(__wsum sum, int offset)
{
- u32 sum = (__force u32)csum2;
-
/* rotate sum to align it with a 16b boundary */
if (offset & 1)
- sum = ror32(sum, 8);
+ return (__force __wsum)ror32((__force u32)sum, 8);
+ return sum;
+}
- return csum_add(csum, (__force __wsum)sum);
+static inline __wsum
+csum_block_add(__wsum csum, __wsum csum2, int offset)
+{
+ return csum_add(csum, csum_shift(csum2, offset));
}
static inline __wsum
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index dbd6b92f6200..906e9d49c487 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1794,8 +1794,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
if (unlikely(iov_iter_is_pipe(i)))
return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
- sum = csstate->csum;
- off = csstate->off;
+ sum = csum_shift(csstate->csum, csstate->off);
+ off = 0;
if (unlikely(iov_iter_is_discard(i))) {
WARN_ON(1); /* for now */
return 0;
@@ -1830,8 +1830,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
off += v.bv_len;
})
)
- csstate->csum = sum;
- csstate->off = off;
+ csstate->csum = csum_shift(sum, csstate->off);
+ csstate->off += bytes;
return bytes;
}
EXPORT_SYMBOL(csum_and_copy_to_iter);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 22/37] iterate_and_advance(): get rid of magic in case when n is 0
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (19 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 21/37] csum_and_copy_to_iter(): massage into form closer to csum_and_copy_from_iter() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 23/37] iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec Al Viro
` (14 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
iov_iter_advance() needs to do some non-trivial work when it's given
0 as argument (skip all empty iovecs, mostly). We used to implement
it via iterate_and_advance(); we no longer do so and for all other
users of iterate_and_advance() zero length is a no-op.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 906e9d49c487..ebb907c6393c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -117,7 +117,7 @@
#define iterate_and_advance(i, n, v, I, B, K, X) { \
if (unlikely(i->count < n)) \
n = i->count; \
- if (i->count) { \
+ if (likely(n)) { \
size_t skip = i->iov_offset; \
if (likely(iter_is_iovec(i))) { \
const struct iovec *iov; \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 23/37] iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (20 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 22/37] iterate_and_advance(): get rid of magic in case when n is 0 Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 24/37] iov_iter: unify iterate_iovec and iterate_kvec Al Viro
` (13 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Premature optimization is the root of all evil... Trying
to unroll the first pass through the loop makes it harder
to follow and not just for readers - compiler ends up
generating worse code than it would on a "non-optimized"
loop.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 91 +++++++++++++++++++++++-----------------------------------
1 file changed, 36 insertions(+), 55 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ebb907c6393c..578f40788943 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -16,55 +16,44 @@
#define PIPE_PARANOIA /* for now */
-#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
- size_t left; \
- size_t wanted = n; \
- __p = i->iov; \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- left = (STEP); \
- __v.iov_len -= left; \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
- } else { \
- left = 0; \
- } \
- while (unlikely(!left && n)) { \
- __p++; \
- __v.iov_len = min(n, __p->iov_len); \
- if (unlikely(!__v.iov_len)) \
- continue; \
- __v.iov_base = __p->iov_base; \
- left = (STEP); \
- __v.iov_len -= left; \
- skip = __v.iov_len; \
- n -= __v.iov_len; \
- } \
- n = wanted - n; \
+#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
+ size_t left; \
+ size_t wanted = n; \
+ __p = i->iov; \
+ do { \
+ __v.iov_len = min(n, __p->iov_len - skip); \
+ if (likely(__v.iov_len)) { \
+ __v.iov_base = __p->iov_base + skip; \
+ left = (STEP); \
+ __v.iov_len -= left; \
+ skip += __v.iov_len; \
+ n -= __v.iov_len; \
+ if (skip < __p->iov_len) \
+ break; \
+ } \
+ __p++; \
+ skip = 0; \
+ } while (n); \
+ n = wanted - n; \
}
-#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->kvec; \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- (void)(STEP); \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
- } \
- while (unlikely(n)) { \
- __p++; \
- __v.iov_len = min(n, __p->iov_len); \
- if (unlikely(!__v.iov_len)) \
- continue; \
- __v.iov_base = __p->iov_base; \
- (void)(STEP); \
- skip = __v.iov_len; \
- n -= __v.iov_len; \
- } \
- n = wanted; \
+#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
+ size_t wanted = n; \
+ __p = i->kvec; \
+ do { \
+ __v.iov_len = min(n, __p->iov_len - skip); \
+ if (likely(__v.iov_len)) { \
+ __v.iov_base = __p->iov_base + skip; \
+ (void)(STEP); \
+ skip += __v.iov_len; \
+ n -= __v.iov_len; \
+ if (skip < __p->iov_len) \
+ break; \
+ } \
+ __p++; \
+ skip = 0; \
+ } while (n); \
+ n = wanted - n; \
}
#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
@@ -123,10 +112,6 @@
const struct iovec *iov; \
struct iovec v; \
iterate_iovec(i, n, v, iov, skip, (I)) \
- if (skip == iov->iov_len) { \
- iov++; \
- skip = 0; \
- } \
i->nr_segs -= iov - i->iov; \
i->iov = iov; \
} else if (iov_iter_is_bvec(i)) { \
@@ -141,10 +126,6 @@
const struct kvec *kvec; \
struct kvec v; \
iterate_kvec(i, n, v, kvec, skip, (K)) \
- if (skip == kvec->iov_len) { \
- kvec++; \
- skip = 0; \
- } \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 24/37] iov_iter: unify iterate_iovec and iterate_kvec
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (21 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 23/37] iov_iter: massage iterate_iovec and iterate_kvec to logics similar to iterate_bvec Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 25/37] iterate_bvec(): expand bvec.h macro forest, massage a bit Al Viro
` (12 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
The differences between iterate_iovec and iterate_kvec are minor:
* kvec callback is treated as if it returned 0
* initialization of __p is with i->iov and i->kvec resp.
which is trivially dealt with.
No code generation changes - compiler is quite capable of turning
left = ((void)(STEP), 0);
__v.iov_len -= left;
(with no accesses to left downstream) and
(void)(STEP);
into the same code.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 28 +++++-----------------------
1 file changed, 5 insertions(+), 23 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 578f40788943..19c103e9ef7d 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -16,10 +16,10 @@
#define PIPE_PARANOIA /* for now */
+/* covers iovec and kvec alike */
#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
size_t left; \
size_t wanted = n; \
- __p = i->iov; \
do { \
__v.iov_len = min(n, __p->iov_len - skip); \
if (likely(__v.iov_len)) { \
@@ -37,25 +37,6 @@
n = wanted - n; \
}
-#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
- size_t wanted = n; \
- __p = i->kvec; \
- do { \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- (void)(STEP); \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
- if (skip < __p->iov_len) \
- break; \
- } \
- __p++; \
- skip = 0; \
- } while (n); \
- n = wanted - n; \
-}
-
#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
struct bvec_iter __start; \
__start.bi_size = n; \
@@ -109,7 +90,7 @@
if (likely(n)) { \
size_t skip = i->iov_offset; \
if (likely(iter_is_iovec(i))) { \
- const struct iovec *iov; \
+ const struct iovec *iov = i->iov; \
struct iovec v; \
iterate_iovec(i, n, v, iov, skip, (I)) \
i->nr_segs -= iov - i->iov; \
@@ -123,9 +104,10 @@
i->nr_segs -= i->bvec - bvec; \
skip = __bi.bi_bvec_done; \
} else if (iov_iter_is_kvec(i)) { \
- const struct kvec *kvec; \
+ const struct kvec *kvec = i->kvec; \
struct kvec v; \
- iterate_kvec(i, n, v, kvec, skip, (K)) \
+ iterate_iovec(i, n, v, kvec, skip, \
+ ((void)(K),0)) \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 25/37] iterate_bvec(): expand bvec.h macro forest, massage a bit
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (22 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 24/37] iov_iter: unify iterate_iovec and iterate_kvec Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 26/37] iov_iter: teach iterate_{bvec,xarray}() about possible short copies Al Viro
` (11 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
... incidentally, using pointer instead of index in an array
(the only change here) trims half-kilobyte of .text...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 33 ++++++++++++++++++++-------------
1 file changed, 20 insertions(+), 13 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 19c103e9ef7d..af9525c21c77 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -37,14 +37,23 @@
n = wanted - n; \
}
-#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
- struct bvec_iter __start; \
- __start.bi_size = n; \
- __start.bi_bvec_done = skip; \
- __start.bi_idx = 0; \
- for_each_bvec(__v, i->bvec, __bi, __start) { \
- (void)(STEP); \
- } \
+#define iterate_bvec(i, n, __v, p, skip, STEP) { \
+ size_t wanted = n; \
+ while (n) { \
+ unsigned offset = p->bv_offset + skip; \
+ __v.bv_offset = offset % PAGE_SIZE; \
+ __v.bv_page = p->bv_page + offset / PAGE_SIZE; \
+ __v.bv_len = min(min(n, p->bv_len - skip), \
+ (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
+ (void)(STEP); \
+ skip += __v.bv_len; \
+ if (skip == p->bv_len) { \
+ skip = 0; \
+ p++; \
+ } \
+ n -= __v.bv_len; \
+ } \
+ n = wanted - n; \
}
#define iterate_xarray(i, n, __v, skip, STEP) { \
@@ -98,11 +107,9 @@
} else if (iov_iter_is_bvec(i)) { \
const struct bio_vec *bvec = i->bvec; \
struct bio_vec v; \
- struct bvec_iter __bi; \
- iterate_bvec(i, n, v, __bi, skip, (B)) \
- i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
- i->nr_segs -= i->bvec - bvec; \
- skip = __bi.bi_bvec_done; \
+ iterate_bvec(i, n, v, bvec, skip, (B)) \
+ i->nr_segs -= bvec - i->bvec; \
+ i->bvec = bvec; \
} else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec = i->kvec; \
struct kvec v; \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 26/37] iov_iter: teach iterate_{bvec,xarray}() about possible short copies
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (23 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 25/37] iterate_bvec(): expand bvec.h macro forest, massage a bit Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 27/37] iov_iter: get rid of separate bvec and xarray callbacks Al Viro
` (10 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
... and now we finally can sort out the mess in _copy_mc_to_iter().
Provide a variant of iterate_and_advance() that does *NOT* ignore
the return values of bvec, xarray and kvec callbacks, use that in
_copy_mc_to_iter(). That gets rid of magic in those callbacks -
we used to need it so we'd get at least the right return value in
case of failure halfway through.
As a bonus, now iterator is advanced by the amount actually copied
for all flavours. That's what the callers expect and it used to do that
correctly in iovec and xarray cases. However, in kvec and bvec cases
the iterator had not been advanced on such failures, breaking the users.
Fixed now...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 65 ++++++++++++++++++++++------------------------------------
1 file changed, 24 insertions(+), 41 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index af9525c21c77..9dc36deddb68 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -41,22 +41,27 @@
size_t wanted = n; \
while (n) { \
unsigned offset = p->bv_offset + skip; \
+ unsigned left; \
__v.bv_offset = offset % PAGE_SIZE; \
__v.bv_page = p->bv_page + offset / PAGE_SIZE; \
__v.bv_len = min(min(n, p->bv_len - skip), \
(size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
- (void)(STEP); \
+ left = (STEP); \
+ __v.bv_len -= left; \
skip += __v.bv_len; \
if (skip == p->bv_len) { \
skip = 0; \
p++; \
} \
n -= __v.bv_len; \
+ if (left) \
+ break; \
} \
n = wanted - n; \
}
#define iterate_xarray(i, n, __v, skip, STEP) { \
+ __label__ __out; \
struct page *head = NULL; \
size_t wanted = n, seg, offset; \
loff_t start = i->xarray_start + skip; \
@@ -67,6 +72,7 @@
\
rcu_read_lock(); \
xas_for_each(&xas, head, ULONG_MAX) { \
+ unsigned left; \
if (xas_retry(&xas, head)) \
continue; \
if (WARN_ON(xa_is_value(head))) \
@@ -80,20 +86,20 @@
seg = PAGE_SIZE - offset; \
__v.bv_offset = offset; \
__v.bv_len = min(n, seg); \
- (void)(STEP); \
+ left = (STEP); \
+ __v.bv_len -= left; \
n -= __v.bv_len; \
skip += __v.bv_len; \
- if (n == 0) \
- break; \
+ if (left || n == 0) \
+ goto __out; \
} \
- if (n == 0) \
- break; \
} \
+__out: \
rcu_read_unlock(); \
n = wanted - n; \
}
-#define iterate_and_advance(i, n, v, I, B, K, X) { \
+#define __iterate_and_advance(i, n, v, I, B, K, X) { \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
@@ -113,8 +119,7 @@
} else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec = i->kvec; \
struct kvec v; \
- iterate_iovec(i, n, v, kvec, skip, \
- ((void)(K),0)) \
+ iterate_iovec(i, n, v, kvec, skip, (K)) \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
@@ -125,6 +130,9 @@
i->iov_offset = skip; \
} \
}
+#define iterate_and_advance(i, n, v, I, B, K, X) \
+ __iterate_and_advance(i, n, v, I, ((void)(B),0), \
+ ((void)(K),0), ((void)(X),0))
static int copyout(void __user *to, const void *from, size_t n)
{
@@ -709,45 +717,20 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
const char *from = addr;
- unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
if (unlikely(iov_iter_is_pipe(i)))
return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
- iterate_and_advance(i, bytes, v,
+ __iterate_and_advance(i, bytes, v,
copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
- ({
- rem = copy_mc_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len);
- if (rem) {
- curr_addr = (unsigned long) from;
- bytes = curr_addr - s_addr - rem;
- return bytes;
- }
- }),
- ({
- rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
- - v.iov_len, v.iov_len);
- if (rem) {
- curr_addr = (unsigned long) from;
- bytes = curr_addr - s_addr - rem;
- return bytes;
- }
- }),
- ({
- rem = copy_mc_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len);
- if (rem) {
- curr_addr = (unsigned long) from;
- bytes = curr_addr - s_addr - rem;
- rcu_read_unlock();
- i->iov_offset += bytes;
- i->count -= bytes;
- return bytes;
- }
- })
+ copy_mc_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len),
+ copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+ - v.iov_len, v.iov_len),
+ copy_mc_to_page(v.bv_page, v.bv_offset,
+ (from += v.bv_len) - v.bv_len, v.bv_len)
)
return bytes;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 27/37] iov_iter: get rid of separate bvec and xarray callbacks
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (24 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 26/37] iov_iter: teach iterate_{bvec,xarray}() about possible short copies Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 28/37] iov_iter: make the amount already copied available to iterator callbacks Al Viro
` (9 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
After the previous commit we have
* xarray and bvec callbacks idential in all cases
* both equivalent to kvec callback wrapped into
kmap_local_page()/kunmap_local() pair.
So we can pass only two (iovec and kvec) callbacks to
iterate_and_advance() and let iterate_{bvec,xarray} wrap
it into kmap_local_page()/kunmap_local_page().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 112 ++++++++++++++++-----------------------------------------
1 file changed, 30 insertions(+), 82 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9dc36deddb68..5a871d001e12 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -42,18 +42,20 @@
while (n) { \
unsigned offset = p->bv_offset + skip; \
unsigned left; \
- __v.bv_offset = offset % PAGE_SIZE; \
- __v.bv_page = p->bv_page + offset / PAGE_SIZE; \
- __v.bv_len = min(min(n, p->bv_len - skip), \
+ void *kaddr = kmap_local_page(p->bv_page + \
+ offset / PAGE_SIZE); \
+ __v.iov_base = kaddr + offset % PAGE_SIZE; \
+ __v.iov_len = min(min(n, p->bv_len - skip), \
(size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
left = (STEP); \
- __v.bv_len -= left; \
- skip += __v.bv_len; \
+ kunmap_local(kaddr); \
+ __v.iov_len -= left; \
+ skip += __v.iov_len; \
if (skip == p->bv_len) { \
skip = 0; \
p++; \
} \
- n -= __v.bv_len; \
+ n -= __v.iov_len; \
if (left) \
break; \
} \
@@ -81,15 +83,16 @@
break; \
for (j = (head->index < index) ? index - head->index : 0; \
j < thp_nr_pages(head); j++) { \
- __v.bv_page = head + j; \
- offset = (i->xarray_start + skip) & ~PAGE_MASK; \
+ void *kaddr = kmap_local_page(head + j); \
+ offset = (i->xarray_start + skip) % PAGE_SIZE; \
+ __v.iov_base = kaddr + offset; \
seg = PAGE_SIZE - offset; \
- __v.bv_offset = offset; \
- __v.bv_len = min(n, seg); \
+ __v.iov_len = min(n, seg); \
left = (STEP); \
- __v.bv_len -= left; \
- n -= __v.bv_len; \
- skip += __v.bv_len; \
+ kunmap_local(kaddr); \
+ __v.iov_len -= left; \
+ n -= __v.iov_len; \
+ skip += __v.iov_len; \
if (left || n == 0) \
goto __out; \
} \
@@ -99,7 +102,7 @@ __out: \
n = wanted - n; \
}
-#define __iterate_and_advance(i, n, v, I, B, K, X) { \
+#define __iterate_and_advance(i, n, v, I, K) { \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
@@ -112,8 +115,8 @@ __out: \
i->iov = iov; \
} else if (iov_iter_is_bvec(i)) { \
const struct bio_vec *bvec = i->bvec; \
- struct bio_vec v; \
- iterate_bvec(i, n, v, bvec, skip, (B)) \
+ struct kvec v; \
+ iterate_bvec(i, n, v, bvec, skip, (K)) \
i->nr_segs -= bvec - i->bvec; \
i->bvec = bvec; \
} else if (iov_iter_is_kvec(i)) { \
@@ -123,16 +126,15 @@ __out: \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
- struct bio_vec v; \
- iterate_xarray(i, n, v, skip, (X)) \
+ struct kvec v; \
+ iterate_xarray(i, n, v, skip, (K)) \
} \
i->count -= n; \
i->iov_offset = skip; \
} \
}
-#define iterate_and_advance(i, n, v, I, B, K, X) \
- __iterate_and_advance(i, n, v, I, ((void)(B),0), \
- ((void)(K),0), ((void)(X),0))
+#define iterate_and_advance(i, n, v, I, K) \
+ __iterate_and_advance(i, n, v, I, ((void)(K),0))
static int copyout(void __user *to, const void *from, size_t n)
{
@@ -623,11 +625,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
might_fault();
iterate_and_advance(i, bytes, v,
copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
- memcpy_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len),
- memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
- memcpy_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len)
+ memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
)
return bytes;
@@ -725,12 +723,8 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
__iterate_and_advance(i, bytes, v,
copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
v.iov_len),
- copy_mc_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len),
copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
- - v.iov_len, v.iov_len),
- copy_mc_to_page(v.bv_page, v.bv_offset,
- (from += v.bv_len) - v.bv_len, v.bv_len)
+ - v.iov_len, v.iov_len)
)
return bytes;
@@ -749,11 +743,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
might_fault();
iterate_and_advance(i, bytes, v,
copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
)
return bytes;
@@ -770,11 +760,7 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
iterate_and_advance(i, bytes, v,
__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
+ memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
)
return bytes;
@@ -806,12 +792,8 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
iterate_and_advance(i, bytes, v,
__copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len),
- memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
- v.iov_len),
- memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
+ v.iov_len)
)
return bytes;
@@ -942,9 +924,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
return pipe_zero(bytes, i);
iterate_and_advance(i, bytes, v,
clear_user(v.iov_base, v.iov_len),
- memzero_page(v.bv_page, v.bv_offset, v.bv_len),
- memset(v.iov_base, 0, v.iov_len),
- memzero_page(v.bv_page, v.bv_offset, v.bv_len)
+ memset(v.iov_base, 0, v.iov_len)
)
return bytes;
@@ -966,11 +946,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
}
iterate_and_advance(i, bytes, v,
copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len),
- memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
- v.bv_offset, v.bv_len)
+ memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
)
kunmap_atomic(kaddr);
return bytes;
@@ -1711,24 +1687,10 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
}
next ? 0 : v.iov_len;
}), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
- p + v.bv_offset, v.bv_len,
- sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
- }),({
sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
v.iov_base, v.iov_len,
sum, off);
off += v.iov_len;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
- p + v.bv_offset, v.bv_len,
- sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
})
)
*csum = sum;
@@ -1763,24 +1725,10 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
}
next ? 0 : v.iov_len;
}), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy(p + v.bv_offset,
- (from += v.bv_len) - v.bv_len,
- v.bv_len, sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
- }),({
sum = csum_and_memcpy(v.iov_base,
(from += v.iov_len) - v.iov_len,
v.iov_len, sum, off);
off += v.iov_len;
- }), ({
- char *p = kmap_atomic(v.bv_page);
- sum = csum_and_memcpy(p + v.bv_offset,
- (from += v.bv_len) - v.bv_len,
- v.bv_len, sum, off);
- kunmap_atomic(p);
- off += v.bv_len;
})
)
csstate->csum = csum_shift(sum, csstate->off);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 28/37] iov_iter: make the amount already copied available to iterator callbacks
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (25 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 27/37] iov_iter: get rid of separate bvec and xarray callbacks Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 29/37] iov_iter: make iterator callbacks use base and len instead of iovec Al Viro
` (8 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Making iterator macros keep track of the amount of data copied is pretty
easy and it has several benefits:
1) we no longer need the mess like (from += v.iov_len) - v.iov_len
in the callbacks - initial value + total amount copied so far would do
just fine.
2) less obviously, we no longer need to remember the initial amount
of data we wanted to copy; the loops in iterator macros are along the lines
of
wanted = bytes;
while (bytes) {
copy some
bytes -= copied
if short copy
break
}
bytes = wanted - bytes;
Replacement is
offs = 0;
while (bytes) {
copy some
offs += copied
bytes -= copied
if short copy
break
}
bytes = offs;
That wouldn't be a win per se, but unlike the initial value of bytes, the amount
copied so far *is* useful in callbacks.
3) in some cases (csum_and_copy_..._iter()) we already had offs manually
maintained by the callbacks. With that change we can drop that.
Less boilerplate and more readable code...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 120 ++++++++++++++++++++++++---------------------------------
1 file changed, 50 insertions(+), 70 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5a871d001e12..14e34f9df490 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -17,15 +17,14 @@
#define PIPE_PARANOIA /* for now */
/* covers iovec and kvec alike */
-#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
- size_t left; \
- size_t wanted = n; \
+#define iterate_iovec(i, n, __v, __off, __p, skip, STEP) { \
+ size_t __off = 0; \
do { \
__v.iov_len = min(n, __p->iov_len - skip); \
if (likely(__v.iov_len)) { \
__v.iov_base = __p->iov_base + skip; \
- left = (STEP); \
- __v.iov_len -= left; \
+ __v.iov_len -= (STEP); \
+ __off += __v.iov_len; \
skip += __v.iov_len; \
n -= __v.iov_len; \
if (skip < __p->iov_len) \
@@ -34,11 +33,11 @@
__p++; \
skip = 0; \
} while (n); \
- n = wanted - n; \
+ n = __off; \
}
-#define iterate_bvec(i, n, __v, p, skip, STEP) { \
- size_t wanted = n; \
+#define iterate_bvec(i, n, __v, __off, p, skip, STEP) { \
+ size_t __off = 0; \
while (n) { \
unsigned offset = p->bv_offset + skip; \
unsigned left; \
@@ -50,6 +49,7 @@
left = (STEP); \
kunmap_local(kaddr); \
__v.iov_len -= left; \
+ __off += __v.iov_len; \
skip += __v.iov_len; \
if (skip == p->bv_len) { \
skip = 0; \
@@ -59,13 +59,14 @@
if (left) \
break; \
} \
- n = wanted - n; \
+ n = __off; \
}
-#define iterate_xarray(i, n, __v, skip, STEP) { \
+#define iterate_xarray(i, n, __v, __off, skip, STEP) { \
__label__ __out; \
+ size_t __off = 0; \
struct page *head = NULL; \
- size_t wanted = n, seg, offset; \
+ size_t seg, offset; \
loff_t start = i->xarray_start + skip; \
pgoff_t index = start >> PAGE_SHIFT; \
int j; \
@@ -84,25 +85,26 @@
for (j = (head->index < index) ? index - head->index : 0; \
j < thp_nr_pages(head); j++) { \
void *kaddr = kmap_local_page(head + j); \
- offset = (i->xarray_start + skip) % PAGE_SIZE; \
+ offset = (start + __off) % PAGE_SIZE; \
__v.iov_base = kaddr + offset; \
seg = PAGE_SIZE - offset; \
__v.iov_len = min(n, seg); \
left = (STEP); \
kunmap_local(kaddr); \
__v.iov_len -= left; \
+ __off += __v.iov_len; \
n -= __v.iov_len; \
- skip += __v.iov_len; \
if (left || n == 0) \
goto __out; \
} \
} \
__out: \
rcu_read_unlock(); \
- n = wanted - n; \
+ skip += __off; \
+ n = __off; \
}
-#define __iterate_and_advance(i, n, v, I, K) { \
+#define __iterate_and_advance(i, n, v, off, I, K) { \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
@@ -110,31 +112,31 @@ __out: \
if (likely(iter_is_iovec(i))) { \
const struct iovec *iov = i->iov; \
struct iovec v; \
- iterate_iovec(i, n, v, iov, skip, (I)) \
+ iterate_iovec(i, n, v, off, iov, skip, (I)) \
i->nr_segs -= iov - i->iov; \
i->iov = iov; \
} else if (iov_iter_is_bvec(i)) { \
const struct bio_vec *bvec = i->bvec; \
struct kvec v; \
- iterate_bvec(i, n, v, bvec, skip, (K)) \
+ iterate_bvec(i, n, v, off, bvec, skip, (K)) \
i->nr_segs -= bvec - i->bvec; \
i->bvec = bvec; \
} else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec = i->kvec; \
struct kvec v; \
- iterate_iovec(i, n, v, kvec, skip, (K)) \
+ iterate_iovec(i, n, v, off, kvec, skip, (K)) \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
struct kvec v; \
- iterate_xarray(i, n, v, skip, (K)) \
+ iterate_xarray(i, n, v, off, skip, (K)) \
} \
i->count -= n; \
i->iov_offset = skip; \
} \
}
-#define iterate_and_advance(i, n, v, I, K) \
- __iterate_and_advance(i, n, v, I, ((void)(K),0))
+#define iterate_and_advance(i, n, v, off, I, K) \
+ __iterate_and_advance(i, n, v, off, I, ((void)(K),0))
static int copyout(void __user *to, const void *from, size_t n)
{
@@ -618,14 +620,13 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
- const char *from = addr;
if (unlikely(iov_iter_is_pipe(i)))
return copy_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
- iterate_and_advance(i, bytes, v,
- copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
- memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
+ iterate_and_advance(i, bytes, v, off,
+ copyout(v.iov_base, addr + off, v.iov_len),
+ memcpy(v.iov_base, addr + off, v.iov_len)
)
return bytes;
@@ -714,17 +715,13 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
*/
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
{
- const char *from = addr;
-
if (unlikely(iov_iter_is_pipe(i)))
return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
- __iterate_and_advance(i, bytes, v,
- copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
- v.iov_len),
- copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
- - v.iov_len, v.iov_len)
+ __iterate_and_advance(i, bytes, v, off,
+ copyout_mc(v.iov_base, addr + off, v.iov_len),
+ copy_mc_to_kernel(v.iov_base, addr + off, v.iov_len)
)
return bytes;
@@ -734,16 +731,15 @@ EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
- char *to = addr;
if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
if (iter_is_iovec(i))
might_fault();
- iterate_and_advance(i, bytes, v,
- copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, v, off,
+ copyin(addr + off, v.iov_base, v.iov_len),
+ memcpy(addr + off, v.iov_base, v.iov_len)
)
return bytes;
@@ -752,15 +748,14 @@ EXPORT_SYMBOL(_copy_from_iter);
size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
{
- char *to = addr;
if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v,
- __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
+ iterate_and_advance(i, bytes, v, off,
+ __copy_from_user_inatomic_nocache(addr + off,
v.iov_base, v.iov_len),
- memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ memcpy(addr + off, v.iov_base, v.iov_len)
)
return bytes;
@@ -784,16 +779,13 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
*/
size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
{
- char *to = addr;
if (unlikely(iov_iter_is_pipe(i))) {
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v,
- __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len),
- memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
- v.iov_len)
+ iterate_and_advance(i, bytes, v, off,
+ __copy_from_user_flushcache(addr + off, v.iov_base, v.iov_len),
+ memcpy_flushcache(addr + off, v.iov_base, v.iov_len)
)
return bytes;
@@ -922,7 +914,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
if (unlikely(iov_iter_is_pipe(i)))
return pipe_zero(bytes, i);
- iterate_and_advance(i, bytes, v,
+ iterate_and_advance(i, bytes, v, count,
clear_user(v.iov_base, v.iov_len),
memset(v.iov_base, 0, v.iov_len)
)
@@ -944,9 +936,9 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v,
- copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
- memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, v, off,
+ copyin(p + off, v.iov_base, v.iov_len),
+ memcpy(p + off, v.iov_base, v.iov_len)
)
kunmap_atomic(kaddr);
return bytes;
@@ -1669,28 +1661,22 @@ EXPORT_SYMBOL(iov_iter_get_pages_alloc);
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
struct iov_iter *i)
{
- char *to = addr;
__wsum sum, next;
- size_t off = 0;
sum = *csum;
if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v, ({
+ iterate_and_advance(i, bytes, v, off, ({
next = csum_and_copy_from_user(v.iov_base,
- (to += v.iov_len) - v.iov_len,
+ addr + off,
v.iov_len);
- if (next) {
+ if (next)
sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- }
next ? 0 : v.iov_len;
}), ({
- sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
- v.iov_base, v.iov_len,
+ sum = csum_and_memcpy(addr + off, v.iov_base, v.iov_len,
sum, off);
- off += v.iov_len;
})
)
*csum = sum;
@@ -1702,33 +1688,27 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
struct iov_iter *i)
{
struct csum_state *csstate = _csstate;
- const char *from = addr;
__wsum sum, next;
- size_t off;
if (unlikely(iov_iter_is_pipe(i)))
return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
sum = csum_shift(csstate->csum, csstate->off);
- off = 0;
if (unlikely(iov_iter_is_discard(i))) {
WARN_ON(1); /* for now */
return 0;
}
- iterate_and_advance(i, bytes, v, ({
- next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
+ iterate_and_advance(i, bytes, v, off, ({
+ next = csum_and_copy_to_user(addr + off,
v.iov_base,
v.iov_len);
- if (next) {
+ if (next)
sum = csum_block_add(sum, next, off);
- off += v.iov_len;
- }
next ? 0 : v.iov_len;
}), ({
sum = csum_and_memcpy(v.iov_base,
- (from += v.iov_len) - v.iov_len,
+ addr + off,
v.iov_len, sum, off);
- off += v.iov_len;
})
)
csstate->csum = csum_shift(sum, csstate->off);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 29/37] iov_iter: make iterator callbacks use base and len instead of iovec
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (26 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 28/37] iov_iter: make the amount already copied available to iterator callbacks Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 30/37] pull handling of ->iov_offset into iterate_{iovec,bvec,xarray} Al Viro
` (7 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
Iterator macros used to provide the arguments for step callbacks in
a structure matching the flavour - iovec for ITER_IOVEC, kvec for
ITER_KVEC and bio_vec for ITER_BVEC. That already broke down for
ITER_XARRAY (bio_vec there); now that we are using kvec callback
for bvec and xarray cases, we are always passing a pointer + length
(void __user * + size_t for ITER_IOVEC callback, void * + size_t
for everything else).
Note that the original reason for bio_vec (page + offset + len) in
case of ITER_BVEC used to be that we did *not* want to kmap a
page when all we wanted was e.g. to find the alignment of its
subrange. Now all such users are gone and the ones that are left
want the page mapped anyway for actually copying the data.
So in all cases we have pointer + length, and there's no good
reason for keeping those in struct iovec or struct kvec - we
can just pass them to callback separately.
Again, less boilerplate in callbacks...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 182 ++++++++++++++++++++++++++++-----------------------------
1 file changed, 91 insertions(+), 91 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 14e34f9df490..8a7a8e5f4155 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -17,86 +17,86 @@
#define PIPE_PARANOIA /* for now */
/* covers iovec and kvec alike */
-#define iterate_iovec(i, n, __v, __off, __p, skip, STEP) { \
- size_t __off = 0; \
+#define iterate_iovec(i, n, base, len, off, __p, skip, STEP) { \
+ size_t off = 0; \
do { \
- __v.iov_len = min(n, __p->iov_len - skip); \
- if (likely(__v.iov_len)) { \
- __v.iov_base = __p->iov_base + skip; \
- __v.iov_len -= (STEP); \
- __off += __v.iov_len; \
- skip += __v.iov_len; \
- n -= __v.iov_len; \
+ len = min(n, __p->iov_len - skip); \
+ if (likely(len)) { \
+ base = __p->iov_base + skip; \
+ len -= (STEP); \
+ off += len; \
+ skip += len; \
+ n -= len; \
if (skip < __p->iov_len) \
break; \
} \
__p++; \
skip = 0; \
} while (n); \
- n = __off; \
+ n = off; \
}
-#define iterate_bvec(i, n, __v, __off, p, skip, STEP) { \
- size_t __off = 0; \
+#define iterate_bvec(i, n, base, len, off, p, skip, STEP) { \
+ size_t off = 0; \
while (n) { \
unsigned offset = p->bv_offset + skip; \
unsigned left; \
void *kaddr = kmap_local_page(p->bv_page + \
offset / PAGE_SIZE); \
- __v.iov_base = kaddr + offset % PAGE_SIZE; \
- __v.iov_len = min(min(n, p->bv_len - skip), \
+ base = kaddr + offset % PAGE_SIZE; \
+ len = min(min(n, p->bv_len - skip), \
(size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
left = (STEP); \
kunmap_local(kaddr); \
- __v.iov_len -= left; \
- __off += __v.iov_len; \
- skip += __v.iov_len; \
+ len -= left; \
+ off += len; \
+ skip += len; \
if (skip == p->bv_len) { \
skip = 0; \
p++; \
} \
- n -= __v.iov_len; \
+ n -= len; \
if (left) \
break; \
} \
- n = __off; \
+ n = off; \
}
-#define iterate_xarray(i, n, __v, __off, skip, STEP) { \
+#define iterate_xarray(i, n, base, len, __off, skip, STEP) { \
__label__ __out; \
size_t __off = 0; \
struct page *head = NULL; \
- size_t seg, offset; \
+ size_t offset; \
loff_t start = i->xarray_start + skip; \
pgoff_t index = start >> PAGE_SHIFT; \
int j; \
\
XA_STATE(xas, i->xarray, index); \
\
- rcu_read_lock(); \
- xas_for_each(&xas, head, ULONG_MAX) { \
- unsigned left; \
- if (xas_retry(&xas, head)) \
- continue; \
- if (WARN_ON(xa_is_value(head))) \
- break; \
- if (WARN_ON(PageHuge(head))) \
- break; \
+ rcu_read_lock(); \
+ xas_for_each(&xas, head, ULONG_MAX) { \
+ unsigned left; \
+ if (xas_retry(&xas, head)) \
+ continue; \
+ if (WARN_ON(xa_is_value(head))) \
+ break; \
+ if (WARN_ON(PageHuge(head))) \
+ break; \
for (j = (head->index < index) ? index - head->index : 0; \
- j < thp_nr_pages(head); j++) { \
+ j < thp_nr_pages(head); j++) { \
void *kaddr = kmap_local_page(head + j); \
- offset = (start + __off) % PAGE_SIZE; \
- __v.iov_base = kaddr + offset; \
- seg = PAGE_SIZE - offset; \
- __v.iov_len = min(n, seg); \
- left = (STEP); \
- kunmap_local(kaddr); \
- __v.iov_len -= left; \
- __off += __v.iov_len; \
- n -= __v.iov_len; \
- if (left || n == 0) \
- goto __out; \
- } \
+ offset = (start + __off) % PAGE_SIZE; \
+ base = kaddr + offset; \
+ len = PAGE_SIZE - offset; \
+ len = min(n, len); \
+ left = (STEP); \
+ kunmap_local(kaddr); \
+ len -= left; \
+ __off += len; \
+ n -= len; \
+ if (left || n == 0) \
+ goto __out; \
+ } \
} \
__out: \
rcu_read_unlock(); \
@@ -104,39 +104,47 @@ __out: \
n = __off; \
}
-#define __iterate_and_advance(i, n, v, off, I, K) { \
+#define __iterate_and_advance(i, n, base, len, off, I, K) { \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
size_t skip = i->iov_offset; \
if (likely(iter_is_iovec(i))) { \
const struct iovec *iov = i->iov; \
- struct iovec v; \
- iterate_iovec(i, n, v, off, iov, skip, (I)) \
+ void __user *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ iov, skip, (I)) \
i->nr_segs -= iov - i->iov; \
i->iov = iov; \
} else if (iov_iter_is_bvec(i)) { \
const struct bio_vec *bvec = i->bvec; \
- struct kvec v; \
- iterate_bvec(i, n, v, off, bvec, skip, (K)) \
+ void *base; \
+ size_t len; \
+ iterate_bvec(i, n, base, len, off, \
+ bvec, skip, (K)) \
i->nr_segs -= bvec - i->bvec; \
i->bvec = bvec; \
} else if (iov_iter_is_kvec(i)) { \
const struct kvec *kvec = i->kvec; \
- struct kvec v; \
- iterate_iovec(i, n, v, off, kvec, skip, (K)) \
+ void *base; \
+ size_t len; \
+ iterate_iovec(i, n, base, len, off, \
+ kvec, skip, (K)) \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
- struct kvec v; \
- iterate_xarray(i, n, v, off, skip, (K)) \
+ void *base; \
+ size_t len; \
+ iterate_xarray(i, n, base, len, off, \
+ skip, (K)) \
} \
i->count -= n; \
i->iov_offset = skip; \
} \
}
-#define iterate_and_advance(i, n, v, off, I, K) \
- __iterate_and_advance(i, n, v, off, I, ((void)(K),0))
+#define iterate_and_advance(i, n, base, len, off, I, K) \
+ __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
static int copyout(void __user *to, const void *from, size_t n)
{
@@ -624,9 +632,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
return copy_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
- iterate_and_advance(i, bytes, v, off,
- copyout(v.iov_base, addr + off, v.iov_len),
- memcpy(v.iov_base, addr + off, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, off,
+ copyout(base, addr + off, len),
+ memcpy(base, addr + off, len)
)
return bytes;
@@ -719,9 +727,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
return copy_mc_pipe_to_iter(addr, bytes, i);
if (iter_is_iovec(i))
might_fault();
- __iterate_and_advance(i, bytes, v, off,
- copyout_mc(v.iov_base, addr + off, v.iov_len),
- copy_mc_to_kernel(v.iov_base, addr + off, v.iov_len)
+ __iterate_and_advance(i, bytes, base, len, off,
+ copyout_mc(base, addr + off, len),
+ copy_mc_to_kernel(base, addr + off, len)
)
return bytes;
@@ -737,9 +745,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
}
if (iter_is_iovec(i))
might_fault();
- iterate_and_advance(i, bytes, v, off,
- copyin(addr + off, v.iov_base, v.iov_len),
- memcpy(addr + off, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, off,
+ copyin(addr + off, base, len),
+ memcpy(addr + off, base, len)
)
return bytes;
@@ -752,10 +760,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v, off,
- __copy_from_user_inatomic_nocache(addr + off,
- v.iov_base, v.iov_len),
- memcpy(addr + off, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, off,
+ __copy_from_user_inatomic_nocache(addr + off, base, len),
+ memcpy(addr + off, base, len)
)
return bytes;
@@ -783,9 +790,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v, off,
- __copy_from_user_flushcache(addr + off, v.iov_base, v.iov_len),
- memcpy_flushcache(addr + off, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, off,
+ __copy_from_user_flushcache(addr + off, base, len),
+ memcpy_flushcache(addr + off, base, len)
)
return bytes;
@@ -914,9 +921,9 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
{
if (unlikely(iov_iter_is_pipe(i)))
return pipe_zero(bytes, i);
- iterate_and_advance(i, bytes, v, count,
- clear_user(v.iov_base, v.iov_len),
- memset(v.iov_base, 0, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, count,
+ clear_user(base, len),
+ memset(base, 0, len)
)
return bytes;
@@ -936,9 +943,9 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v, off,
- copyin(p + off, v.iov_base, v.iov_len),
- memcpy(p + off, v.iov_base, v.iov_len)
+ iterate_and_advance(i, bytes, base, len, off,
+ copyin(p + off, base, len),
+ memcpy(p + off, base, len)
)
kunmap_atomic(kaddr);
return bytes;
@@ -1667,16 +1674,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
WARN_ON(1);
return 0;
}
- iterate_and_advance(i, bytes, v, off, ({
- next = csum_and_copy_from_user(v.iov_base,
- addr + off,
- v.iov_len);
+ iterate_and_advance(i, bytes, base, len, off, ({
+ next = csum_and_copy_from_user(base, addr + off, len);
if (next)
sum = csum_block_add(sum, next, off);
- next ? 0 : v.iov_len;
+ next ? 0 : len;
}), ({
- sum = csum_and_memcpy(addr + off, v.iov_base, v.iov_len,
- sum, off);
+ sum = csum_and_memcpy(addr + off, base, len, sum, off);
})
)
*csum = sum;
@@ -1698,17 +1702,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
WARN_ON(1); /* for now */
return 0;
}
- iterate_and_advance(i, bytes, v, off, ({
- next = csum_and_copy_to_user(addr + off,
- v.iov_base,
- v.iov_len);
+ iterate_and_advance(i, bytes, base, len, off, ({
+ next = csum_and_copy_to_user(addr + off, base, len);
if (next)
sum = csum_block_add(sum, next, off);
- next ? 0 : v.iov_len;
+ next ? 0 : len;
}), ({
- sum = csum_and_memcpy(v.iov_base,
- addr + off,
- v.iov_len, sum, off);
+ sum = csum_and_memcpy(base, addr + off, len, sum, off);
})
)
csstate->csum = csum_shift(sum, csstate->off);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 30/37] pull handling of ->iov_offset into iterate_{iovec,bvec,xarray}
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (27 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 29/37] iov_iter: make iterator callbacks use base and len instead of iovec Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 31/37] iterate_xarray(): only of the first iteration we might get offset != 0 Al Viro
` (6 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
fewer arguments (by one, but still...) for iterate_...() macros
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8a7a8e5f4155..c1580e574d76 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -17,8 +17,9 @@
#define PIPE_PARANOIA /* for now */
/* covers iovec and kvec alike */
-#define iterate_iovec(i, n, base, len, off, __p, skip, STEP) { \
+#define iterate_iovec(i, n, base, len, off, __p, STEP) { \
size_t off = 0; \
+ size_t skip = i->iov_offset; \
do { \
len = min(n, __p->iov_len - skip); \
if (likely(len)) { \
@@ -33,18 +34,20 @@
__p++; \
skip = 0; \
} while (n); \
+ i->iov_offset = skip; \
n = off; \
}
-#define iterate_bvec(i, n, base, len, off, p, skip, STEP) { \
+#define iterate_bvec(i, n, base, len, off, p, STEP) { \
size_t off = 0; \
+ unsigned skip = i->iov_offset; \
while (n) { \
unsigned offset = p->bv_offset + skip; \
unsigned left; \
void *kaddr = kmap_local_page(p->bv_page + \
offset / PAGE_SIZE); \
base = kaddr + offset % PAGE_SIZE; \
- len = min(min(n, p->bv_len - skip), \
+ len = min(min(n, (size_t)(p->bv_len - skip)), \
(size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
left = (STEP); \
kunmap_local(kaddr); \
@@ -59,15 +62,16 @@
if (left) \
break; \
} \
+ i->iov_offset = skip; \
n = off; \
}
-#define iterate_xarray(i, n, base, len, __off, skip, STEP) { \
+#define iterate_xarray(i, n, base, len, __off, STEP) { \
__label__ __out; \
size_t __off = 0; \
struct page *head = NULL; \
size_t offset; \
- loff_t start = i->xarray_start + skip; \
+ loff_t start = i->xarray_start + i->iov_offset; \
pgoff_t index = start >> PAGE_SHIFT; \
int j; \
\
@@ -100,7 +104,7 @@
} \
__out: \
rcu_read_unlock(); \
- skip += __off; \
+ i->iov_offset += __off; \
n = __off; \
}
@@ -108,13 +112,12 @@ __out: \
if (unlikely(i->count < n)) \
n = i->count; \
if (likely(n)) { \
- size_t skip = i->iov_offset; \
if (likely(iter_is_iovec(i))) { \
const struct iovec *iov = i->iov; \
void __user *base; \
size_t len; \
iterate_iovec(i, n, base, len, off, \
- iov, skip, (I)) \
+ iov, (I)) \
i->nr_segs -= iov - i->iov; \
i->iov = iov; \
} else if (iov_iter_is_bvec(i)) { \
@@ -122,7 +125,7 @@ __out: \
void *base; \
size_t len; \
iterate_bvec(i, n, base, len, off, \
- bvec, skip, (K)) \
+ bvec, (K)) \
i->nr_segs -= bvec - i->bvec; \
i->bvec = bvec; \
} else if (iov_iter_is_kvec(i)) { \
@@ -130,17 +133,16 @@ __out: \
void *base; \
size_t len; \
iterate_iovec(i, n, base, len, off, \
- kvec, skip, (K)) \
+ kvec, (K)) \
i->nr_segs -= kvec - i->kvec; \
i->kvec = kvec; \
} else if (iov_iter_is_xarray(i)) { \
void *base; \
size_t len; \
iterate_xarray(i, n, base, len, off, \
- skip, (K)) \
+ (K)) \
} \
i->count -= n; \
- i->iov_offset = skip; \
} \
}
#define iterate_and_advance(i, n, base, len, off, I, K) \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 31/37] iterate_xarray(): only of the first iteration we might get offset != 0
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (28 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 30/37] pull handling of ->iov_offset into iterate_{iovec,bvec,xarray} Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 32/37] copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases Al Viro
` (5 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
recalculating offset on each iteration is pointless - on all subsequent
passes through the loop it will be zero anyway.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c1580e574d76..9ecbf59c3378 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -70,9 +70,9 @@
__label__ __out; \
size_t __off = 0; \
struct page *head = NULL; \
- size_t offset; \
loff_t start = i->xarray_start + i->iov_offset; \
- pgoff_t index = start >> PAGE_SHIFT; \
+ unsigned offset = start % PAGE_SIZE; \
+ pgoff_t index = start / PAGE_SIZE; \
int j; \
\
XA_STATE(xas, i->xarray, index); \
@@ -89,7 +89,6 @@
for (j = (head->index < index) ? index - head->index : 0; \
j < thp_nr_pages(head); j++) { \
void *kaddr = kmap_local_page(head + j); \
- offset = (start + __off) % PAGE_SIZE; \
base = kaddr + offset; \
len = PAGE_SIZE - offset; \
len = min(n, len); \
@@ -100,6 +99,7 @@
n -= len; \
if (left || n == 0) \
goto __out; \
+ offset = 0; \
} \
} \
__out: \
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 32/37] copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (29 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 31/37] iterate_xarray(): only of the first iteration we might get offset != 0 Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 33/37] copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases Al Viro
` (4 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
kmap_local_page() is enough there. Moreover, we can use _copy_to_iter()
for actual copying in those cases - no useful extra checks on the
address we are copying from in that call.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9ecbf59c3378..4fcd0cc44e47 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -832,9 +832,9 @@ static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes
if (likely(iter_is_iovec(i)))
return copy_page_to_iter_iovec(page, offset, bytes, i);
if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
- void *kaddr = kmap_atomic(page);
- size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
- kunmap_atomic(kaddr);
+ void *kaddr = kmap_local_page(page);
+ size_t wanted = _copy_to_iter(kaddr + offset, bytes, i);
+ kunmap_local(kaddr);
return wanted;
}
if (iov_iter_is_pipe(i))
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 33/37] copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (30 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 32/37] copy_page_to_iter(): don't bother with kmap_atomic() for bvec/kvec cases Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 34/37] iov_iter: clean csum_and_copy_...() primitives up a bit Al Viro
` (3 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
kmap_local_page() is enough.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4fcd0cc44e47..0a9e246178c1 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -882,9 +882,9 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
if (likely(iter_is_iovec(i)))
return copy_page_from_iter_iovec(page, offset, bytes, i);
if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
- void *kaddr = kmap_atomic(page);
+ void *kaddr = kmap_local_page(page);
size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
return wanted;
}
WARN_ON(1);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 34/37] iov_iter: clean csum_and_copy_...() primitives up a bit
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (31 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 33/37] copy_page_from_iter(): don't need kmap_atomic() for kvec/bvec cases Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 35/37] pipe_zero(): we don't need no stinkin' kmap_atomic() Al Viro
` (2 subsequent siblings)
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
1) kmap_atomic() is not needed here, kmap_local_page() is enough.
2) No need to make sum = csum_block_add(sum, next, off); conditional
upon next != 0 - adding 0 is a no-op as far as csum_block_add()
is concerned.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0a9e246178c1..56d2606a47e4 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -611,9 +611,9 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
return 0;
do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
- char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
+ char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
- kunmap_atomic(p);
+ kunmap_local(p);
i->head = i_head;
i->iov_offset = r + chunk;
n -= chunk;
@@ -1678,8 +1678,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
}
iterate_and_advance(i, bytes, base, len, off, ({
next = csum_and_copy_from_user(base, addr + off, len);
- if (next)
- sum = csum_block_add(sum, next, off);
+ sum = csum_block_add(sum, next, off);
next ? 0 : len;
}), ({
sum = csum_and_memcpy(addr + off, base, len, sum, off);
@@ -1706,8 +1705,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
}
iterate_and_advance(i, bytes, base, len, off, ({
next = csum_and_copy_to_user(addr + off, base, len);
- if (next)
- sum = csum_block_add(sum, next, off);
+ sum = csum_block_add(sum, next, off);
next ? 0 : len;
}), ({
sum = csum_and_memcpy(base, addr + off, len, sum, off);
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 35/37] pipe_zero(): we don't need no stinkin' kmap_atomic()...
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (32 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 34/37] iov_iter: clean csum_and_copy_...() primitives up a bit Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 36/37] clean up copy_mc_pipe_to_iter() Al Viro
2021-06-06 19:10 ` [RFC PATCH 37/37] csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller Al Viro
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
FWIW, memcpy_to_page() itself almost certainly ought to
use kmap_local_page()...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 56d2606a47e4..7471fb181643 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -908,7 +908,9 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
do {
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
- memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
+ char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
+ memset(p + off, 0, chunk);
+ kunmap_local(p);
i->head = i_head;
i->iov_offset = off + chunk;
n -= chunk;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 36/37] clean up copy_mc_pipe_to_iter()
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (33 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 35/37] pipe_zero(): we don't need no stinkin' kmap_atomic() Al Viro
@ 2021-06-06 19:10 ` Al Viro
2021-06-06 19:10 ` [RFC PATCH 37/37] csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller Al Viro
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
... and we don't need kmap_atomic() there - kmap_local_page() is fine.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 33 +++++++++------------------------
1 file changed, 9 insertions(+), 24 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7471fb181643..0ee359b62afc 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -653,19 +653,6 @@ static int copyout_mc(void __user *to, const void *from, size_t n)
return n;
}
-static unsigned long copy_mc_to_page(struct page *page, size_t offset,
- const char *from, size_t len)
-{
- unsigned long ret;
- char *to;
-
- to = kmap_atomic(page);
- ret = copy_mc_to_kernel(to + offset, from, len);
- kunmap_atomic(to);
-
- return ret;
-}
-
static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct iov_iter *i)
{
@@ -677,25 +664,23 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
if (!sanity(i))
return 0;
- bytes = n = push_pipe(i, bytes, &i_head, &off);
- if (unlikely(!n))
- return 0;
- do {
+ n = push_pipe(i, bytes, &i_head, &off);
+ while (n) {
size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
+ char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
unsigned long rem;
-
- rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
- off, addr, chunk);
+ rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
+ chunk -= rem;
+ kunmap_local(p);
i->head = i_head;
- i->iov_offset = off + chunk - rem;
- xfer += chunk - rem;
+ i->iov_offset = off + chunk;
+ xfer += chunk;
if (rem)
break;
n -= chunk;
- addr += chunk;
off = 0;
i_head++;
- } while (n);
+ }
i->count -= xfer;
return xfer;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread
* [RFC PATCH 37/37] csum_and_copy_to_pipe_iter(): leave handling of csum_state to caller
2021-06-06 19:10 ` [RFC PATCH 01/37] ntfs_copy_from_user_iter(): don't bother with copying iov_iter Al Viro
` (34 preceding siblings ...)
2021-06-06 19:10 ` [RFC PATCH 36/37] clean up copy_mc_pipe_to_iter() Al Viro
@ 2021-06-06 19:10 ` Al Viro
35 siblings, 0 replies; 57+ messages in thread
From: Al Viro @ 2021-06-06 19:10 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-fsdevel, linux-kernel, David Sterba, Miklos Szeredi,
Anton Altaparmakov, David Howells, Matthew Wilcox,
Pavel Begunkov
... since all the logics is already there for use by iovec/kvec/etc.
cases.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
lib/iov_iter.c | 41 ++++++++++++++++++-----------------------
1 file changed, 18 insertions(+), 23 deletions(-)
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0ee359b62afc..11b39bd1d1ab 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -593,39 +593,34 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
}
static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
- struct csum_state *csstate,
- struct iov_iter *i)
+ struct iov_iter *i, __wsum *sump)
{
struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
- __wsum sum = csstate->csum;
- size_t off = csstate->off;
+ __wsum sum = *sump;
+ size_t off = 0;
unsigned int i_head;
- size_t n, r;
+ size_t r;
if (!sanity(i))
return 0;
- bytes = n = push_pipe(i, bytes, &i_head, &r);
- if (unlikely(!n))
- return 0;
- do {
- size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
+ bytes = push_pipe(i, bytes, &i_head, &r);
+ while (bytes) {
+ size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r);
char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
- sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
+ sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
kunmap_local(p);
i->head = i_head;
i->iov_offset = r + chunk;
- n -= chunk;
+ bytes -= chunk;
off += chunk;
- addr += chunk;
r = 0;
i_head++;
- } while (n);
- i->count -= bytes;
- csstate->csum = sum;
- csstate->off = off;
- return bytes;
+ }
+ *sump = sum;
+ i->count -= off;
+ return off;
}
size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
@@ -1682,15 +1677,15 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
struct csum_state *csstate = _csstate;
__wsum sum, next;
- if (unlikely(iov_iter_is_pipe(i)))
- return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
-
- sum = csum_shift(csstate->csum, csstate->off);
if (unlikely(iov_iter_is_discard(i))) {
WARN_ON(1); /* for now */
return 0;
}
- iterate_and_advance(i, bytes, base, len, off, ({
+
+ sum = csum_shift(csstate->csum, csstate->off);
+ if (unlikely(iov_iter_is_pipe(i)))
+ bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
+ else iterate_and_advance(i, bytes, base, len, off, ({
next = csum_and_copy_to_user(addr + off, base, len);
sum = csum_block_add(sum, next, off);
next ? 0 : len;
--
2.11.0
^ permalink raw reply related [flat|nested] 57+ messages in thread