All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>,
	Matthew Wilcox <willy@infradead.org>,
	linux-fsdevel@vger.kernel.org
Subject: Re: [RFC] what to do with IOCB_DSYNC?
Date: Sun, 22 May 2022 07:02:01 -0600	[thread overview]
Message-ID: <96aa35fc-3ff7-a3a1-05b5-9fae5c9c1067@kernel.dk> (raw)
In-Reply-To: <YooxLS8Lrt6ErwIM@zeniv-ca.linux.org.uk>

On 5/22/22 6:48 AM, Al Viro wrote:
> On Sun, May 22, 2022 at 06:39:39AM -0600, Jens Axboe wrote:
>> On 5/22/22 5:45 AM, Christoph Hellwig wrote:
>>> On Sun, May 22, 2022 at 12:15:59PM +0100, Matthew Wilcox wrote:
>>>>> 	Direct kernel pointer, surely?  And from a quick look,
>>>>> iov_iter_is_kaddr() checks for the wrong value...
>>>>
>>>> Indeed.  I didn't test it; it was a quick patch to see if the idea was
>>>> worth pursuing.  Neither you nor Christoph thought so at the time, so
>>>> I dropped it.  if there are performance improvements to be had from
>>>> doing something like that, it's a more compelling idea than just "Hey,
>>>> this removes a few lines of code and a bit of stack space from every
>>>> caller".
>>>
>>> Oh, right I actually misremembered what the series did.  But something
>>> similar except for user pointers might help with the performance issues
>>> that Jens sees, and if it does it might be worth it to avoid having
>>> both the legacy read/write path and the iter path in various drivers.
>>
>> Right, ITER_UADDR or something would useful. I'll try and test that,
>> should be easy to wire up.
> 
> Careful - it's not just iov_iter_advance() and __iterate_and_advance() (that
> one should use the same "callback" argument as iovec case).  /dev/random is
> not the only thing we use read(2) and write(2) on...
> 
> I can cook a patch doing that, just let me get some caffeine into the
> bloodstream first...

Sure, if you want to cook it up. Here's what I did so far, no caffeine
and not even compiled yet :-). It also doesn't do the page copy,
separate advancing, etc. Was going to check all the parts in iov_iter.c
that checks the type and has separate handlers.

The read vs write const and not is not great...


diff --git a/fs/read_write.c b/fs/read_write.c
index e643aec2b0ef..862ec6c549c6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -389,14 +389,13 @@ EXPORT_SYMBOL(rw_verify_area);
 
 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
-	struct iovec iov = { .iov_base = buf, .iov_len = len };
 	struct kiocb kiocb;
 	struct iov_iter iter;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = (ppos ? *ppos : 0);
-	iov_iter_init(&iter, READ, &iov, 1, len);
+	uaddr_iter_init(&iter, READ, buf, len);
 
 	ret = call_read_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
@@ -492,14 +491,13 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 {
-	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 	struct kiocb kiocb;
 	struct iov_iter iter;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = (ppos ? *ppos : 0);
-	iov_iter_init(&iter, WRITE, &iov, 1, len);
+	uaddr_iter_init(&iter, WRITE, (void __user *) buf, len);
 
 	ret = call_write_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 739285fe5a2f..58e20e83745e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,6 +21,7 @@ struct kvec {
 enum iter_type {
 	/* iter types */
 	ITER_IOVEC,
+	ITER_UADDR,
 	ITER_KVEC,
 	ITER_BVEC,
 	ITER_PIPE,
@@ -42,6 +43,7 @@ struct iov_iter {
 	size_t count;
 	union {
 		const struct iovec *iov;
+		void __user *uaddr;
 		const struct kvec *kvec;
 		const struct bio_vec *bvec;
 		struct xarray *xarray;
@@ -75,6 +77,11 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_IOVEC;
 }
 
+static inline bool iter_is_uaddr(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_UADDR;
+}
+
 static inline bool iov_iter_is_kvec(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_KVEC;
@@ -241,6 +248,18 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
 
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
 
+static inline void uaddr_iter_init(struct iov_iter *iter, int rw,
+				   void __user *uaddr, size_t len)
+{
+	iter->iter_type = ITER_UADDR;
+	iter->nofault = false;
+	iter->data_source = rw;
+	iter->iov_offset = 0;
+	iter->count = len;
+	iter->uaddr = uaddr;
+	iter->nr_segs = 0;
+}
+
 static inline size_t iov_iter_count(const struct iov_iter *i)
 {
 	return i->count;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6dd5330f7a99..4239fffd14cb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -38,6 +38,22 @@
 	n = off;						\
 }
 
+#define iterate_uaddr(i, n, base, len, off, STEP) {		\
+	void __user *base;					\
+	size_t len;						\
+	size_t off = 0;						\
+	size_t skip = i->iov_offset;				\
+	len = min(n, i->count - skip);				\
+	if (likely(len)) {					\
+		base = i->uaddr + skip;				\
+		len -= (STEP);					\
+		off += len;					\
+		skip += len;					\
+	}							\
+	i->iov_offset = skip;					\
+	n = off;						\
+}
+
 #define iterate_bvec(i, n, base, len, off, p, STEP) {		\
 	size_t off = 0;						\
 	unsigned skip = i->iov_offset;				\
@@ -118,6 +134,8 @@ __out:								\
 						iov, (I))	\
 			i->nr_segs -= iov - i->iov;		\
 			i->iov = iov;				\
+		} else if (iter_is_uaddr(i)) {			\
+			iterate_uaddr(i, n, base, len, off, (I)) \
 		} else if (iov_iter_is_bvec(i)) {		\
 			const struct bio_vec *bvec = i->bvec;	\
 			void *base;				\
@@ -662,7 +680,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (iter_is_iovec(i) || iter_is_uaddr(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyout(base, addr + off, len),
@@ -744,7 +762,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_mc_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (iter_is_iovec(i) || iter_is_uaddr(i))
 		might_fault();
 	__iterate_and_advance(i, bytes, base, len, off,
 		copyout_mc(base, addr + off, len),
@@ -1061,6 +1079,10 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
 	i->iov = iov;
 }
 
+static void iter_uaddr_advance(struct iov_iter *i, size_t size)
+{
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
@@ -1068,6 +1090,8 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
 		/* iovec and kvec have identical layouts */
 		iov_iter_iovec_advance(i, size);
+	} else if (iter_is_uaddr(i)) {
+		iter_uaddr_advance(i, size);
 	} else if (iov_iter_is_bvec(i)) {
 		iov_iter_bvec_advance(i, size);
 	} else if (iov_iter_is_pipe(i)) {

-- 
Jens Axboe


  reply	other threads:[~2022-05-22 13:02 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-21  0:46 [RFC] what to do with IOCB_DSYNC? Al Viro
2021-06-21 13:59 ` Christoph Hellwig
2021-06-21 14:03   ` Matthew Wilcox
2021-06-21 14:09     ` Christoph Hellwig
2021-06-21 14:16       ` Matthew Wilcox
2021-06-21 14:22         ` Christoph Hellwig
2021-06-21 14:32           ` Al Viro
2021-06-21 14:35             ` Christoph Hellwig
2021-06-21 15:22               ` Jens Axboe
2022-05-21 17:48               ` Al Viro
2022-05-21 19:03                 ` Jens Axboe
2022-05-21 22:14                   ` Jens Axboe
2022-05-22  7:45                     ` Christoph Hellwig
2022-05-22 10:23                       ` Matthew Wilcox
2022-05-22 10:36                         ` Al Viro
2022-05-22 11:15                           ` Matthew Wilcox
2022-05-22 11:45                             ` Christoph Hellwig
2022-05-22 12:39                               ` Jens Axboe
2022-05-22 12:48                                 ` Al Viro
2022-05-22 13:02                                   ` Jens Axboe [this message]
2022-05-22 13:07                                     ` Al Viro
2022-05-22 13:09                                       ` Jens Axboe
2022-05-22 18:06                                         ` Jens Axboe
2022-05-22 18:25                                           ` Al Viro
2022-05-22 18:29                                             ` Jens Axboe
2022-05-22 18:39                                               ` Al Viro
2022-05-22 18:48                                                 ` Jens Axboe
2022-05-22 19:04                                                   ` Al Viro
2022-05-22 20:03                                                     ` Jens Axboe
2022-05-23  0:42                                                       ` Al Viro
2022-05-23  1:22                                                         ` Jens Axboe
2022-05-23  1:28                                                           ` Jens Axboe
2022-05-23  1:50                                                             ` Jens Axboe
2022-05-23  2:43                                                               ` Jens Axboe
2022-05-23 14:22                                                                 ` Al Viro
2022-05-23 14:34                                                                   ` Jens Axboe
2022-05-23 14:47                                                                     ` Al Viro
2022-05-23 15:12                                                                       ` Jens Axboe
2022-05-23 15:44                                                                         ` Jens Axboe
2022-05-23 15:49                                                                           ` Matthew Wilcox
2022-05-23 15:55                                                                             ` Jens Axboe
2022-05-23 16:03                                                                               ` Jens Axboe
2022-05-26 14:46                                                                                 ` Jason A. Donenfeld
2022-05-27 10:09                                                                                   ` Ingo Molnar
2022-05-27 10:15                                                                                     ` Jason A. Donenfeld
2022-05-27 14:45                                                                                       ` Samuel Neves
2022-05-27 10:25                                                                                     ` Ingo Molnar
2022-05-27 10:36                                                                                       ` Borislav Petkov
2022-05-28 20:54                                                                                         ` Sedat Dilek
2022-05-28 20:38                                                                                       ` Sedat Dilek
2022-05-28 20:39                                                                                         ` Sedat Dilek
2022-05-23 16:15                                                                         ` Al Viro
2022-05-25 14:34                                                         ` Matthew Wilcox
2022-05-26 23:19                                                     ` Al Viro
2022-05-27 14:51                                                       ` Jens Axboe
2022-05-22 12:21                             ` Al Viro
2022-05-22  7:43                 ` Christoph Hellwig
2022-05-22 12:41                   ` Al Viro
2022-05-22 12:51                     ` Christoph Hellwig
2021-06-21 14:22       ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=96aa35fc-3ff7-a3a1-05b5-9fae5c9c1067@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.