Linux-Fsdevel Archive on lore.kernel.org
 help / color / Atom feed
* [PATCH] epoll: add nsec timeout support
@ 2020-10-26 16:08 Willem de Bruijn
  2020-10-26 16:13 ` Soheil Hassas Yeganeh
  0 siblings, 1 reply; 2+ messages in thread
From: Willem de Bruijn @ 2020-10-26 16:08 UTC (permalink / raw)
  To: linux-fsdevel; +Cc: linux-kernel, viro, soheil.kdev, arnd, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

The underlying hrtimer is programmed with nanosecond resolution.

Use cases such as datacenter networking operate on timescales well
below milliseconds. Setting shorter timeouts bounds tail latency.

Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the
interpretation of argument timeout in epoll_wait from msec to nsec.

The new eventpoll state fits in existing 4B of padding when busy poll
is compiled in (the default), and reads the same cacheline.

Signed-off-by: Willem de Bruijn <willemb@google.com>

---

Selftest for now at github. Can follow-up for kselftests.
https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c
---
 fs/eventpoll.c                 | 26 +++++++++++++++++++-------
 include/uapi/linux/eventpoll.h |  1 +
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4df61129566d..1216b909d155 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -225,6 +225,9 @@ struct eventpoll {
 	unsigned int napi_id;
 #endif
 
+	/* Accept timeout in ns resolution (EPOLL_NSTIMEO) */
+	unsigned int nstimeout:1;
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	/* tracks wakeup nests for lockdep validation */
 	u8 nests;
@@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep,
 	return esed.res;
 }
 
-static inline struct timespec64 ep_set_mstimeout(long ms)
+static inline struct timespec64 ep_set_nstimeout(long ns)
 {
-	struct timespec64 now, ts = {
-		.tv_sec = ms / MSEC_PER_SEC,
-		.tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
-	};
+	struct timespec64 now, ts;
 
+	ts = ns_to_timespec64(ns);
 	ktime_get_ts64(&now);
 	return timespec64_add_safe(now, ts);
 }
 
+static inline struct timespec64 ep_set_mstimeout(long ms)
+{
+	return ep_set_nstimeout(NSEC_PER_MSEC * ms);
+}
+
 /**
  * ep_poll - Retrieves ready events, and delivers them to the caller supplied
  *           event buffer.
@@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 	lockdep_assert_irqs_enabled();
 
 	if (timeout > 0) {
-		struct timespec64 end_time = ep_set_mstimeout(timeout);
+		struct timespec64 end_time;
+
+		end_time = ep->nstimeout ? ep_set_nstimeout(timeout) :
+					   ep_set_mstimeout(timeout);
 
 		slack = select_estimate_accuracy(&end_time);
 		to = &expires;
@@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags)
 	/* Check the EPOLL_* constant for consistency.  */
 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
 
-	if (flags & ~EPOLL_CLOEXEC)
+	if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO))
 		return -EINVAL;
 	/*
 	 * Create the internal data structure ("struct eventpoll").
@@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags)
 	error = ep_alloc(&ep);
 	if (error < 0)
 		return error;
+
+	ep->nstimeout = !!(flags & EPOLL_NSTIMEO);
+
 	/*
 	 * Creates all the items needed to setup an eventpoll file. That is,
 	 * a file structure and a free file descriptor.
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 8a3432d0f0dc..f6ef9c9f8ac2 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -21,6 +21,7 @@
 
 /* Flags for epoll_create1.  */
 #define EPOLL_CLOEXEC O_CLOEXEC
+#define EPOLL_NSTIMEO 0x1
 
 /* Valid opcodes to issue to sys_epoll_ctl() */
 #define EPOLL_CTL_ADD 1
-- 
2.29.0.rc1.297.gfa9743e501-goog


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] epoll: add nsec timeout support
  2020-10-26 16:08 [PATCH] epoll: add nsec timeout support Willem de Bruijn
@ 2020-10-26 16:13 ` Soheil Hassas Yeganeh
  0 siblings, 0 replies; 2+ messages in thread
From: Soheil Hassas Yeganeh @ 2020-10-26 16:13 UTC (permalink / raw)
  To: Willem de Bruijn
  Cc: linux-fsdevel, linux-kernel, Al Viro, Arnd Bergmann, Willem de Bruijn

On Mon, Oct 26, 2020 at 12:08 PM Willem de Bruijn
<willemdebruijn.kernel@gmail.com> wrote:
>
> From: Willem de Bruijn <willemb@google.com>
>
> The underlying hrtimer is programmed with nanosecond resolution.
>
> Use cases such as datacenter networking operate on timescales well
> below milliseconds. Setting shorter timeouts bounds tail latency.
>
> Add epoll_create1 flag EPOLL_NSTIMEO. When passed, this changes the
> interpretation of argument timeout in epoll_wait from msec to nsec.
>
> The new eventpoll state fits in existing 4B of padding when busy poll
> is compiled in (the default), and reads the same cacheline.
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Acked-by: Soheil Hassas Yeganeh <soheil@google.com>

Thanks for adding the feature!

> ---
>
> Selftest for now at github. Can follow-up for kselftests.
> https://github.com/wdebruij/kerneltools/blob/master/tests/epoll_nstimeo.c
> ---
>  fs/eventpoll.c                 | 26 +++++++++++++++++++-------
>  include/uapi/linux/eventpoll.h |  1 +
>  2 files changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 4df61129566d..1216b909d155 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -225,6 +225,9 @@ struct eventpoll {
>         unsigned int napi_id;
>  #endif
>
> +       /* Accept timeout in ns resolution (EPOLL_NSTIMEO) */
> +       unsigned int nstimeout:1;
> +
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>         /* tracks wakeup nests for lockdep validation */
>         u8 nests;
> @@ -1787,17 +1790,20 @@ static int ep_send_events(struct eventpoll *ep,
>         return esed.res;
>  }
>
> -static inline struct timespec64 ep_set_mstimeout(long ms)
> +static inline struct timespec64 ep_set_nstimeout(long ns)
>  {
> -       struct timespec64 now, ts = {
> -               .tv_sec = ms / MSEC_PER_SEC,
> -               .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
> -       };
> +       struct timespec64 now, ts;
>
> +       ts = ns_to_timespec64(ns);
>         ktime_get_ts64(&now);
>         return timespec64_add_safe(now, ts);
>  }
>
> +static inline struct timespec64 ep_set_mstimeout(long ms)
> +{
> +       return ep_set_nstimeout(NSEC_PER_MSEC * ms);
> +}
> +
>  /**
>   * ep_poll - Retrieves ready events, and delivers them to the caller supplied
>   *           event buffer.
> @@ -1826,7 +1832,10 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>         lockdep_assert_irqs_enabled();
>
>         if (timeout > 0) {
> -               struct timespec64 end_time = ep_set_mstimeout(timeout);
> +               struct timespec64 end_time;
> +
> +               end_time = ep->nstimeout ? ep_set_nstimeout(timeout) :
> +                                          ep_set_mstimeout(timeout);
>
>                 slack = select_estimate_accuracy(&end_time);
>                 to = &expires;
> @@ -2046,7 +2055,7 @@ static int do_epoll_create(int flags)
>         /* Check the EPOLL_* constant for consistency.  */
>         BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
>
> -       if (flags & ~EPOLL_CLOEXEC)
> +       if (flags & ~(EPOLL_CLOEXEC | EPOLL_NSTIMEO))
>                 return -EINVAL;
>         /*
>          * Create the internal data structure ("struct eventpoll").
> @@ -2054,6 +2063,9 @@ static int do_epoll_create(int flags)
>         error = ep_alloc(&ep);
>         if (error < 0)
>                 return error;
> +
> +       ep->nstimeout = !!(flags & EPOLL_NSTIMEO);
> +
>         /*
>          * Creates all the items needed to setup an eventpoll file. That is,
>          * a file structure and a free file descriptor.
> diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
> index 8a3432d0f0dc..f6ef9c9f8ac2 100644
> --- a/include/uapi/linux/eventpoll.h
> +++ b/include/uapi/linux/eventpoll.h
> @@ -21,6 +21,7 @@
>
>  /* Flags for epoll_create1.  */
>  #define EPOLL_CLOEXEC O_CLOEXEC
> +#define EPOLL_NSTIMEO 0x1
>
>  /* Valid opcodes to issue to sys_epoll_ctl() */
>  #define EPOLL_CTL_ADD 1
> --
> 2.29.0.rc1.297.gfa9743e501-goog
>

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, back to index

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-26 16:08 [PATCH] epoll: add nsec timeout support Willem de Bruijn
2020-10-26 16:13 ` Soheil Hassas Yeganeh

Linux-Fsdevel Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-fsdevel/0 linux-fsdevel/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-fsdevel linux-fsdevel/ https://lore.kernel.org/linux-fsdevel \
		linux-fsdevel@vger.kernel.org
	public-inbox-index linux-fsdevel

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-fsdevel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git