linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>,
	Matthew Wilcox <willy@infradead.org>,
	linux-fsdevel@vger.kernel.org
Subject: Re: [RFC] what to do with IOCB_DSYNC?
Date: Sun, 22 May 2022 07:02:01 -0600	[thread overview]
Message-ID: <96aa35fc-3ff7-a3a1-05b5-9fae5c9c1067@kernel.dk> (raw)
In-Reply-To: <YooxLS8Lrt6ErwIM@zeniv-ca.linux.org.uk>

On 5/22/22 6:48 AM, Al Viro wrote:
> On Sun, May 22, 2022 at 06:39:39AM -0600, Jens Axboe wrote:
>> On 5/22/22 5:45 AM, Christoph Hellwig wrote:
>>> On Sun, May 22, 2022 at 12:15:59PM +0100, Matthew Wilcox wrote:
>>>>> 	Direct kernel pointer, surely?  And from a quick look,
>>>>> iov_iter_is_kaddr() checks for the wrong value...
>>>>
>>>> Indeed.  I didn't test it; it was a quick patch to see if the idea was
>>>> worth pursuing.  Neither you nor Christoph thought so at the time, so
>>>> I dropped it.  if there are performance improvements to be had from
>>>> doing something like that, it's a more compelling idea than just "Hey,
>>>> this removes a few lines of code and a bit of stack space from every
>>>> caller".
>>>
>>> Oh, right I actually misremembered what the series did.  But something
>>> similar except for user pointers might help with the performance issues
>>> that Jens sees, and if it does it might be worth it to avoid having
>>> both the legacy read/write path and the iter path in various drivers.
>>
>> Right, ITER_UADDR or something would useful. I'll try and test that,
>> should be easy to wire up.
> 
> Careful - it's not just iov_iter_advance() and __iterate_and_advance() (that
> one should use the same "callback" argument as iovec case).  /dev/random is
> not the only thing we use read(2) and write(2) on...
> 
> I can cook a patch doing that, just let me get some caffeine into the
> bloodstream first...

Sure, if you want to cook it up. Here's what I did so far, no caffeine
and not even compiled yet :-). It also doesn't do the page copy,
separate advancing, etc. Was going to check all the parts in iov_iter.c
that checks the type and has separate handlers.

The read vs write const and not is not great...


diff --git a/fs/read_write.c b/fs/read_write.c
index e643aec2b0ef..862ec6c549c6 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -389,14 +389,13 @@ EXPORT_SYMBOL(rw_verify_area);
 
 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
-	struct iovec iov = { .iov_base = buf, .iov_len = len };
 	struct kiocb kiocb;
 	struct iov_iter iter;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = (ppos ? *ppos : 0);
-	iov_iter_init(&iter, READ, &iov, 1, len);
+	uaddr_iter_init(&iter, READ, buf, len);
 
 	ret = call_read_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
@@ -492,14 +491,13 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
 {
-	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
 	struct kiocb kiocb;
 	struct iov_iter iter;
 	ssize_t ret;
 
 	init_sync_kiocb(&kiocb, filp);
 	kiocb.ki_pos = (ppos ? *ppos : 0);
-	iov_iter_init(&iter, WRITE, &iov, 1, len);
+	uaddr_iter_init(&iter, WRITE, (void __user *) buf, len);
 
 	ret = call_write_iter(filp, &kiocb, &iter);
 	BUG_ON(ret == -EIOCBQUEUED);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 739285fe5a2f..58e20e83745e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,6 +21,7 @@ struct kvec {
 enum iter_type {
 	/* iter types */
 	ITER_IOVEC,
+	ITER_UADDR,
 	ITER_KVEC,
 	ITER_BVEC,
 	ITER_PIPE,
@@ -42,6 +43,7 @@ struct iov_iter {
 	size_t count;
 	union {
 		const struct iovec *iov;
+		void __user *uaddr;
 		const struct kvec *kvec;
 		const struct bio_vec *bvec;
 		struct xarray *xarray;
@@ -75,6 +77,11 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_IOVEC;
 }
 
+static inline bool iter_is_uaddr(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_UADDR;
+}
+
 static inline bool iov_iter_is_kvec(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_KVEC;
@@ -241,6 +248,18 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
 
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
 
+static inline void uaddr_iter_init(struct iov_iter *iter, int rw,
+				   void __user *uaddr, size_t len)
+{
+	iter->iter_type = ITER_UADDR;
+	iter->nofault = false;
+	iter->data_source = rw;
+	iter->iov_offset = 0;
+	iter->count = len;
+	iter->uaddr = uaddr;
+	iter->nr_segs = 0;
+}
+
 static inline size_t iov_iter_count(const struct iov_iter *i)
 {
 	return i->count;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6dd5330f7a99..4239fffd14cb 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -38,6 +38,22 @@
 	n = off;						\
 }
 
+#define iterate_uaddr(i, n, base, len, off, STEP) {		\
+	void __user *base;					\
+	size_t len;						\
+	size_t off = 0;						\
+	size_t skip = i->iov_offset;				\
+	len = min(n, i->count - skip);				\
+	if (likely(len)) {					\
+		base = i->uaddr + skip;				\
+		len -= (STEP);					\
+		off += len;					\
+		skip += len;					\
+	}							\
+	i->iov_offset = skip;					\
+	n = off;						\
+}
+
 #define iterate_bvec(i, n, base, len, off, p, STEP) {		\
 	size_t off = 0;						\
 	unsigned skip = i->iov_offset;				\
@@ -118,6 +134,8 @@ __out:								\
 						iov, (I))	\
 			i->nr_segs -= iov - i->iov;		\
 			i->iov = iov;				\
+		} else if (iter_is_uaddr(i)) {			\
+			iterate_uaddr(i, n, base, len, off, (I)) \
 		} else if (iov_iter_is_bvec(i)) {		\
 			const struct bio_vec *bvec = i->bvec;	\
 			void *base;				\
@@ -662,7 +680,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (iter_is_iovec(i) || iter_is_uaddr(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyout(base, addr + off, len),
@@ -744,7 +762,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_mc_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (iter_is_iovec(i) || iter_is_uaddr(i))
 		might_fault();
 	__iterate_and_advance(i, bytes, base, len, off,
 		copyout_mc(base, addr + off, len),
@@ -1061,6 +1079,10 @@ static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
 	i->iov = iov;
 }
 
+static void iter_uaddr_advance(struct iov_iter *i, size_t size)
+{
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
@@ -1068,6 +1090,8 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
 		/* iovec and kvec have identical layouts */
 		iov_iter_iovec_advance(i, size);
+	} else if (iter_is_uaddr(i)) {
+		iter_uaddr_advance(i, size);
 	} else if (iov_iter_is_bvec(i)) {
 		iov_iter_bvec_advance(i, size);
 	} else if (iov_iter_is_pipe(i)) {

-- 
Jens Axboe


  reply	other threads:[~2022-05-22 13:02 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-21  0:46 [RFC] what to do with IOCB_DSYNC? Al Viro
2021-06-21 13:59 ` Christoph Hellwig
2021-06-21 14:03   ` Matthew Wilcox
2021-06-21 14:09     ` Christoph Hellwig
2021-06-21 14:16       ` Matthew Wilcox
2021-06-21 14:22         ` Christoph Hellwig
2021-06-21 14:32           ` Al Viro
2021-06-21 14:35             ` Christoph Hellwig
2021-06-21 15:22               ` Jens Axboe
2022-05-21 17:48               ` Al Viro
2022-05-21 19:03                 ` Jens Axboe
2022-05-21 22:14                   ` Jens Axboe
2022-05-22  7:45                     ` Christoph Hellwig
2022-05-22 10:23                       ` Matthew Wilcox
2022-05-22 10:36                         ` Al Viro
2022-05-22 11:15                           ` Matthew Wilcox
2022-05-22 11:45                             ` Christoph Hellwig
2022-05-22 12:39                               ` Jens Axboe
2022-05-22 12:48                                 ` Al Viro
2022-05-22 13:02                                   ` Jens Axboe [this message]
2022-05-22 13:07                                     ` Al Viro
2022-05-22 13:09                                       ` Jens Axboe
2022-05-22 18:06                                         ` Jens Axboe
2022-05-22 18:25                                           ` Al Viro
2022-05-22 18:29                                             ` Jens Axboe
2022-05-22 18:39                                               ` Al Viro
2022-05-22 18:48                                                 ` Jens Axboe
2022-05-22 19:04                                                   ` Al Viro
2022-05-22 20:03                                                     ` Jens Axboe
2022-05-23  0:42                                                       ` Al Viro
2022-05-23  1:22                                                         ` Jens Axboe
2022-05-23  1:28                                                           ` Jens Axboe
2022-05-23  1:50                                                             ` Jens Axboe
2022-05-23  2:43                                                               ` Jens Axboe
2022-05-23 14:22                                                                 ` Al Viro
2022-05-23 14:34                                                                   ` Jens Axboe
2022-05-23 14:47                                                                     ` Al Viro
2022-05-23 15:12                                                                       ` Jens Axboe
2022-05-23 15:44                                                                         ` Jens Axboe
2022-05-23 15:49                                                                           ` Matthew Wilcox
2022-05-23 15:55                                                                             ` Jens Axboe
2022-05-23 16:03                                                                               ` Jens Axboe
2022-05-26 14:46                                                                                 ` Jason A. Donenfeld
2022-05-27 10:09                                                                                   ` Ingo Molnar
2022-05-27 10:15                                                                                     ` Jason A. Donenfeld
2022-05-27 14:45                                                                                       ` Samuel Neves
2022-05-27 10:25                                                                                     ` Ingo Molnar
2022-05-27 10:36                                                                                       ` Borislav Petkov
2022-05-28 20:54                                                                                         ` Sedat Dilek
2022-05-28 20:38                                                                                       ` Sedat Dilek
2022-05-28 20:39                                                                                         ` Sedat Dilek
2022-05-23 16:15                                                                         ` Al Viro
2022-05-25 14:34                                                         ` Matthew Wilcox
2022-05-26 23:19                                                     ` Al Viro
2022-05-27 14:51                                                       ` Jens Axboe
2022-05-22 12:21                             ` Al Viro
2022-05-22  7:43                 ` Christoph Hellwig
2022-05-22 12:41                   ` Al Viro
2022-05-22 12:51                     ` Christoph Hellwig
2021-06-21 14:22       ` Al Viro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=96aa35fc-3ff7-a3a1-05b5-9fae5c9c1067@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).