All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Metzmacher <metze@samba.org>
To: Pavel Begunkov <asml.silence@gmail.com>,
	io-uring <io-uring@vger.kernel.org>, Jens Axboe <axboe@kernel.dk>
Cc: Jakub Kicinski <kuba@kernel.org>, netdev <netdev@vger.kernel.org>,
	Dylan Yudaken <dylany@fb.com>
Subject: IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED)
Date: Thu, 20 Oct 2022 12:04:53 +0200	[thread overview]
Message-ID: <acad81e4-c2ef-59cc-5f0c-33b99082d270@samba.org> (raw)
In-Reply-To: <fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com>

Hi Pavel,

> Yep, sth like that should do, but let's guard against
> spurious net_zcopy_put() just in case.
> 
> used = false;
> copied = false;
> 
> callback(skb, success, ubuf) {
>      if (skb)
>          used = true;
>      if (!success)
>          copied = true;
> }
> complete() {
>      if (!used || copied)
>          set_flag(IORING_CQE_F_COPIED);
> }
> 
>> And __io_notif_complete_tw still needs:
>>
>>          if (!nd->zero_copied)
>>                  notif->cqe.flags |= IORING_CQE_F_COPIED;
> 
> Which can be shoved in a custom callback
> 

Ok, got the idea.

> I'm more concerned about future changes around it, but there won't
> be extra ifs.
> 
> #define COMMON_FLAGS (RECVSEND_FIRST_POLL|...)
> #define ALL_FLAGS (COMMON_FLAGS|RECVSEND_PROBE)
> 
> if (flags & ~COMMON_FLAGS) {
>      if (flags & ~ALL_FLAGS)
>          return err;
>      if (flags & RECVSEND_PROBE)
>          set_callback(notif);
> }

So far I came up with a IORING_SEND_NOTIF_REPORT_USAGE opt-in flag
and the reporting is done in cqe.res with IORING_NOTIF_USAGE_ZC_USED (0x00000001)
and/or IORING_NOTIF_USAGE_ZC_COPIED (0x8000000). So the caller is also
able to notice that some parts were able to use zero copy, while other
fragments were copied.

I haven't tested it yet, but I want to post it early...

What do you think?

metze

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ab7458033ee3..751fc4eff8d1 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,10 +296,28 @@ enum io_uring_op {
   *
   * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
   *				the buf_index field.
+ *
+ * IORING_SEND_NOTIF_REPORT_USAGE
+ *				If SEND[MSG]_ZC should report
+ *				the zerocopy usage in cqe.res
+ *				for the IORING_CQE_F_NOTIF cqe.
+ *				IORING_NOTIF_USAGE_ZC_USED if zero copy was used
+ *				(at least partially).
+ *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ *				(at least partially).
   */
  #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
  #define IORING_RECV_MULTISHOT		(1U << 1)
  #define IORING_RECVSEND_FIXED_BUF	(1U << 2)
+#define IORING_SEND_NOTIF_REPORT_USAGE	(1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_NOTIF_REPORT_USAGE was requested
+ */
+#define IORING_NOTIF_USAGE_ZC_USED	(1U << 0)
+#define IORING_NOTIF_USAGE_ZC_COPIED	(1U << 31)
+

  /*
   * accept flags stored in sqe->ioprio
diff --git a/io_uring/net.c b/io_uring/net.c
index 735eec545115..a79d7d349e19 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -946,9 +946,11 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)

  	zc->flags = READ_ONCE(sqe->ioprio);
  	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
-			  IORING_RECVSEND_FIXED_BUF))
+			  IORING_RECVSEND_FIXED_BUF |
+			  IORING_SEND_NOTIF_REPORT_USAGE))
  		return -EINVAL;
-	notif = zc->notif = io_alloc_notif(ctx);
+	notif = zc->notif = io_alloc_notif(ctx,
+					   zc->flags & IORING_SEND_NOTIF_REPORT_USAGE);
  	if (!notif)
  		return -ENOMEM;
  	notif->cqe.user_data = req->cqe.user_data;
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e37c6569d82e..3844e3c8ad7e 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -3,13 +3,14 @@
  #include <linux/file.h>
  #include <linux/slab.h>
  #include <linux/net.h>
+#include <linux/errqueue.h>
  #include <linux/io_uring.h>

  #include "io_uring.h"
  #include "notif.h"
  #include "rsrc.h"

-static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
+static inline void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
  {
  	struct io_notif_data *nd = io_notif_to_data(notif);
  	struct io_ring_ctx *ctx = notif->ctx;
@@ -21,20 +22,46 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
  	io_req_task_complete(notif, locked);
  }

-static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
-					  struct ubuf_info *uarg,
-					  bool success)
+static inline void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
+						 struct ubuf_info *uarg,
+						 bool success)
  {
  	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
  	struct io_kiocb *notif = cmd_to_io_kiocb(nd);

  	if (refcount_dec_and_test(&uarg->refcnt)) {
-		notif->io_task_work.func = __io_notif_complete_tw;
  		io_req_task_work_add(notif);
  	}
  }

-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
+static void __io_notif_complete_tw_report_usage(struct io_kiocb *notif, bool *locked)
+{
+	struct io_notif_data *nd = io_notif_to_data(notif);
+
+	if (likely(nd->zc_used))
+		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_USED;
+
+	if (unlikely(nd->zc_copied))
+		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
+	__io_notif_complete_tw(notif, locked);
+}
+
+static void io_uring_tx_zerocopy_callback_report_usage(struct sk_buff *skb,
+							struct ubuf_info *uarg,
+							bool success)
+{
+	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
+
+	if (success && !nd->zc_used && skb)
+		nd->zc_used = true;
+	else if (unlikely(!success && !nd->zc_copied))
+		nd->zc_copied = true;
+
+	io_uring_tx_zerocopy_callback(skb, uarg, success);
+}
+
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage)
  	__must_hold(&ctx->uring_lock)
  {
  	struct io_kiocb *notif;
@@ -54,7 +81,14 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
  	nd = io_notif_to_data(notif);
  	nd->account_pages = 0;
  	nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
-	nd->uarg.callback = io_uring_tx_zerocopy_callback;
+	if (report_usage) {
+		nd->zc_used = nd->zc_copied = false;
+		nd->uarg.callback = io_uring_tx_zerocopy_callback_report_usage;
+		notif->io_task_work.func = __io_notif_complete_tw_report_usage;
+	} else {
+		nd->uarg.callback = io_uring_tx_zerocopy_callback;
+		notif->io_task_work.func = __io_notif_complete_tw;
+	}
  	refcount_set(&nd->uarg.refcnt, 1);
  	return notif;
  }
@@ -66,7 +100,6 @@ void io_notif_flush(struct io_kiocb *notif)

  	/* drop slot's master ref */
  	if (refcount_dec_and_test(&nd->uarg.refcnt)) {
-		notif->io_task_work.func = __io_notif_complete_tw;
  		io_req_task_work_add(notif);
  	}
  }
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 5b4d710c8ca5..5ac7a2745e52 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -13,10 +13,12 @@ struct io_notif_data {
  	struct file		*file;
  	struct ubuf_info	uarg;
  	unsigned long		account_pages;
+	bool			zc_used;
+	bool			zc_copied;
  };

  void io_notif_flush(struct io_kiocb *notif);
-struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx);
+struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx, bool report_usage);

  static inline struct io_notif_data *io_notif_to_data(struct io_kiocb *notif)
  {


  reply	other threads:[~2022-10-20 10:05 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-14 11:06 IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-17 16:46 ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-18  8:43   ` IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-19 15:06     ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-19 16:12       ` IORING_CQE_F_COPIED Stefan Metzmacher
2022-10-20  2:24         ` IORING_CQE_F_COPIED Pavel Begunkov
2022-10-20 10:04           ` Stefan Metzmacher [this message]
2022-10-20 13:46             ` IORING_SEND_NOTIF_REPORT_USAGE (was Re: IORING_CQE_F_COPIED) Pavel Begunkov
2022-10-20 14:51               ` Stefan Metzmacher
2022-10-20 15:31                 ` Pavel Begunkov
2022-10-21  9:36                   ` Stefan Metzmacher
2022-10-21 11:09                     ` Pavel Begunkov
2022-10-21 14:03                       ` Stefan Metzmacher
2022-10-27  8:47                         ` Stefan Metzmacher
2022-10-27 10:51                         ` Pavel Begunkov
2022-10-20 10:10           ` IORING_SEND_NOTIF_USER_DATA " Stefan Metzmacher
2022-10-20 15:37             ` Pavel Begunkov
2022-10-21  8:32               ` Stefan Metzmacher
2022-10-21  9:27                 ` Pavel Begunkov
2022-10-21  9:45                   ` Stefan Metzmacher
2022-10-21 11:20                     ` Pavel Begunkov
2022-10-21 12:10                       ` Stefan Metzmacher
2022-10-21 10:15                   ` Stefan Metzmacher
2022-10-21 11:26                     ` Pavel Begunkov
2022-10-21 12:38                       ` Stefan Metzmacher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=acad81e4-c2ef-59cc-5f0c-33b99082d270@samba.org \
    --to=metze@samba.org \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=dylany@fb.com \
    --cc=io-uring@vger.kernel.org \
    --cc=kuba@kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.