All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
@ 2010-08-08 22:45 Shawn Bohrer
  2010-08-26 22:31 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Shawn Bohrer @ 2010-08-08 22:45 UTC (permalink / raw)
  To: Alexander Viro; +Cc: linux-fsdevel, linux-kernel, Shawn Bohrer

This make epoll use hrtimers for the timeout value which prevents
epoll_wait() from timing out up to a millisecond early.

This mirrors the behavior of select() and poll().

Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>
---
 fs/eventpoll.c       |   35 +++++++++++++++++++----------------
 fs/select.c          |    2 +-
 include/linux/poll.h |    2 ++
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3817149..728f56c 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -77,9 +77,6 @@
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
-/* Maximum msec timeout value storeable in a long int */
-#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
-
 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 #define EP_UNACTIVE_PTR ((void *) -1L)
@@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		   int maxevents, long timeout)
 {
-	int res, eavail;
+	int res, eavail, timed_out = 0;
 	unsigned long flags;
-	long jtimeout;
+	long slack;
 	wait_queue_t wait;
-
-	/*
-	 * Calculate the timeout by checking for the "infinite" value (-1)
-	 * and the overflow condition. The passed timeout is in milliseconds,
-	 * that why (t * HZ) / 1000.
-	 */
-	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
-		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+	struct timespec end_time;
+	ktime_t expires, *to = NULL;
+
+	if (timeout > 0) {
+		ktime_get_ts(&end_time);
+		timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
+		slack = estimate_accuracy(&end_time);
+		to = &expires;
+		*to = timespec_to_ktime(end_time);
+	} else if (timeout == 0) {
+		timed_out = 1;
+	}
 
 retry:
 	spin_lock_irqsave(&ep->lock, flags);
@@ -1149,7 +1150,7 @@ retry:
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (!list_empty(&ep->rdllist) || !jtimeout)
+			if (!list_empty(&ep->rdllist) || timed_out)
 				break;
 			if (signal_pending(current)) {
 				res = -EINTR;
@@ -1157,7 +1158,9 @@ retry:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			jtimeout = schedule_timeout(jtimeout);
+			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+				timed_out = 1;
+
 			spin_lock_irqsave(&ep->lock, flags);
 		}
 		__remove_wait_queue(&ep->wq, &wait);
@@ -1175,7 +1178,7 @@ retry:
 	 * more luck.
 	 */
 	if (!res && eavail &&
-	    !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
+	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
 		goto retry;
 
 	return res;
diff --git a/fs/select.c b/fs/select.c
index 500a669..003cb77 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
 	return slack;
 }
 
-static long estimate_accuracy(struct timespec *tv)
+long estimate_accuracy(struct timespec *tv)
 {
 	unsigned long ret;
 	struct timespec now;
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 600cc1f..52be81f 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -73,6 +73,8 @@ extern void poll_initwait(struct poll_wqueues *pwq);
 extern void poll_freewait(struct poll_wqueues *pwq);
 extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 				 ktime_t *expires, unsigned long slack);
+extern long estimate_accuracy(struct timespec *tv);
+
 
 static inline int poll_schedule(struct poll_wqueues *pwq, int state)
 {
-- 
1.7.2.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-08-08 22:45 [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature Shawn Bohrer
@ 2010-08-26 22:31 ` Andrew Morton
  2010-08-26 22:45 ` Davide Libenzi
  2010-11-24  8:33   ` Mike Frysinger
  2 siblings, 0 replies; 13+ messages in thread
From: Andrew Morton @ 2010-08-26 22:31 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, linux-kernel, Davide Libenzi


I'd say this is an epoll patch, not an hrtimer patch.  So I renamed it
to "epoll: make epoll_wait() use the hrtimer range feature".

Davide looks after epoll, so let's cc him.

On Sun,  8 Aug 2010 17:45:32 -0500
Shawn Bohrer <shawn.bohrer@gmail.com> wrote:

> This make epoll use hrtimers for the timeout value which prevents
> epoll_wait() from timing out up to a millisecond early.
> 
> This mirrors the behavior of select() and poll().
> 
> ...
>
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c

Davide stuff ;)

> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
>  	return slack;
>  }
>  
> -static long estimate_accuracy(struct timespec *tv)
> +long estimate_accuracy(struct timespec *tv)

"estimate_accuracy" is a rotten name for a global symbol.  I queued a
preparatory patch which renames this to "select_estimate_accuracy".


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-08-08 22:45 [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature Shawn Bohrer
  2010-08-26 22:31 ` Andrew Morton
@ 2010-08-26 22:45 ` Davide Libenzi
  2010-08-26 23:02   ` Thomas Gleixner
  2010-11-24  8:33   ` Mike Frysinger
  2 siblings, 1 reply; 13+ messages in thread
From: Davide Libenzi @ 2010-08-26 22:45 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, Linux Kernel Mailing List

On Sun, 8 Aug 2010, Shawn Bohrer wrote:

> This make epoll use hrtimers for the timeout value which prevents
> epoll_wait() from timing out up to a millisecond early.
> 
> This mirrors the behavior of select() and poll().

I saw this now, since I got notifications from Andrew's patch machine.
Is this really needed?
Is that really worth the extra code in the fast path?
Aren't we breaking existing behavior?
I have some doubts.


- Davide



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-08-26 22:45 ` Davide Libenzi
@ 2010-08-26 23:02   ` Thomas Gleixner
  2010-08-26 23:23     ` Davide Libenzi
  0 siblings, 1 reply; 13+ messages in thread
From: Thomas Gleixner @ 2010-08-26 23:02 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Shawn Bohrer, Alexander Viro, linux-fsdevel,
	Linux Kernel Mailing List, Arjan van de Ven

On Thu, 26 Aug 2010, Davide Libenzi wrote:
> On Sun, 8 Aug 2010, Shawn Bohrer wrote:
> 
> > This make epoll use hrtimers for the timeout value which prevents
> > epoll_wait() from timing out up to a millisecond early.
> > 
> > This mirrors the behavior of select() and poll().
> 
> I saw this now, since I got notifications from Andrew's patch machine.
> Is this really needed?

Yes, as it is the last user space interface which is jiffies based.

> Is that really worth the extra code in the fast path?

It's not that much overhead and we had no complaints when we converted
everything else

> Aren't we breaking existing behavior?

We had no problems when we moved all the other interfaces over

> I have some doubts.

/me not - though I need to look at the patch itself

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-08-26 23:02   ` Thomas Gleixner
@ 2010-08-26 23:23     ` Davide Libenzi
  0 siblings, 0 replies; 13+ messages in thread
From: Davide Libenzi @ 2010-08-26 23:23 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Shawn Bohrer, Alexander Viro, linux-fsdevel,
	Linux Kernel Mailing List, Arjan van de Ven

On Fri, 27 Aug 2010, Thomas Gleixner wrote:

> On Thu, 26 Aug 2010, Davide Libenzi wrote:
> > On Sun, 8 Aug 2010, Shawn Bohrer wrote:
> > 
> > > This make epoll use hrtimers for the timeout value which prevents
> > > epoll_wait() from timing out up to a millisecond early.
> > > 
> > > This mirrors the behavior of select() and poll().
> > 
> > I saw this now, since I got notifications from Andrew's patch machine.
> > Is this really needed?
> 
> Yes, as it is the last user space interface which is jiffies based.

What? You killed jiffies?! :)
Alright then, the patch itself, besides the different bahavior on timeout 
greater/equal to EP_MAX_MSTIMEO (which is not documented, hence does not 
exist), looks OK to me.



- Davide



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-08-08 22:45 [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature Shawn Bohrer
@ 2010-11-24  8:33   ` Mike Frysinger
  2010-08-26 22:45 ` Davide Libenzi
  2010-11-24  8:33   ` Mike Frysinger
  2 siblings, 0 replies; 13+ messages in thread
From: Mike Frysinger @ 2010-11-24  8:33 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 3168 bytes --]

On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
> @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
>  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>                   int maxevents, long timeout)
>  {
> -       int res, eavail;
> +       int res, eavail, timed_out = 0;
>        unsigned long flags;
> -       long jtimeout;
> +       long slack;
>        wait_queue_t wait;
> -
> -       /*
> -        * Calculate the timeout by checking for the "infinite" value (-1)
> -        * and the overflow condition. The passed timeout is in milliseconds,
> -        * that why (t * HZ) / 1000.
> -        */
> -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
> -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
> +       struct timespec end_time;
> +       ktime_t expires, *to = NULL;
> +
> +       if (timeout > 0) {
> +               ktime_get_ts(&end_time);
> +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
> +               slack = estimate_accuracy(&end_time);
> +               to = &expires;
> +               *to = timespec_to_ktime(end_time);
> +       } else if (timeout == 0) {
> +               timed_out = 1;
> +       }
>
>  retry:
>        spin_lock_irqsave(&ep->lock, flags);
> @@ -1149,7 +1150,7 @@ retry:
>                         * to TASK_INTERRUPTIBLE before doing the checks.
>                         */
>                        set_current_state(TASK_INTERRUPTIBLE);
> -                       if (!list_empty(&ep->rdllist) || !jtimeout)
> +                       if (!list_empty(&ep->rdllist) || timed_out)
>                                break;
>                        if (signal_pending(current)) {
>                                res = -EINTR;
> @@ -1157,7 +1158,9 @@ retry:
>                        }
>
>                        spin_unlock_irqrestore(&ep->lock, flags);
> -                       jtimeout = schedule_timeout(jtimeout);
> +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
> +                               timed_out = 1;
> +
>                        spin_lock_irqsave(&ep->lock, flags);
>                }
>                __remove_wait_queue(&ep->wq, &wait);

this code introduces a warning:
fs/eventpoll.c: In function ‘ep_poll’:
fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function

looks to me like you arent properly handling negative timeouts.
certainly epoll_wait() passes the timeout value straight from
userspace to ep_poll() without any error checking, so if userspace
passes a negative timeout value, it looks like "slack" will be used
uninitialized.
-mike
ÿôèº{.nÇ+‰·Ÿ®‰­†+%ŠËÿ±éݶ\x17¥Šwÿº{.nÇ+‰·¥Š{±þG«éÿŠ{ayº\x1dʇڙë,j\a­¢f£¢·hšïêÿ‘êçz_è®\x03(­éšŽŠÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?™¨è­Ú&£ø§~á¶iO•æ¬z·švØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?–I¥

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
@ 2010-11-24  8:33   ` Mike Frysinger
  0 siblings, 0 replies; 13+ messages in thread
From: Mike Frysinger @ 2010-11-24  8:33 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
> @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
>  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>                   int maxevents, long timeout)
>  {
> -       int res, eavail;
> +       int res, eavail, timed_out = 0;
>        unsigned long flags;
> -       long jtimeout;
> +       long slack;
>        wait_queue_t wait;
> -
> -       /*
> -        * Calculate the timeout by checking for the "infinite" value (-1)
> -        * and the overflow condition. The passed timeout is in milliseconds,
> -        * that why (t * HZ) / 1000.
> -        */
> -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
> -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
> +       struct timespec end_time;
> +       ktime_t expires, *to = NULL;
> +
> +       if (timeout > 0) {
> +               ktime_get_ts(&end_time);
> +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
> +               slack = estimate_accuracy(&end_time);
> +               to = &expires;
> +               *to = timespec_to_ktime(end_time);
> +       } else if (timeout == 0) {
> +               timed_out = 1;
> +       }
>
>  retry:
>        spin_lock_irqsave(&ep->lock, flags);
> @@ -1149,7 +1150,7 @@ retry:
>                         * to TASK_INTERRUPTIBLE before doing the checks.
>                         */
>                        set_current_state(TASK_INTERRUPTIBLE);
> -                       if (!list_empty(&ep->rdllist) || !jtimeout)
> +                       if (!list_empty(&ep->rdllist) || timed_out)
>                                break;
>                        if (signal_pending(current)) {
>                                res = -EINTR;
> @@ -1157,7 +1158,9 @@ retry:
>                        }
>
>                        spin_unlock_irqrestore(&ep->lock, flags);
> -                       jtimeout = schedule_timeout(jtimeout);
> +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
> +                               timed_out = 1;
> +
>                        spin_lock_irqsave(&ep->lock, flags);
>                }
>                __remove_wait_queue(&ep->wq, &wait);

this code introduces a warning:
fs/eventpoll.c: In function ‘ep_poll’:
fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function

looks to me like you arent properly handling negative timeouts.
certainly epoll_wait() passes the timeout value straight from
userspace to ep_poll() without any error checking, so if userspace
passes a negative timeout value, it looks like "slack" will be used
uninitialized.
-mike

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-11-24  8:33   ` Mike Frysinger
@ 2010-11-24 14:52     ` Shawn Bohrer
  -1 siblings, 0 replies; 13+ messages in thread
From: Shawn Bohrer @ 2010-11-24 14:52 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Wed, Nov 24, 2010 at 03:33:02AM -0500, Mike Frysinger wrote:
> On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
> > @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
> >  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
> >                   int maxevents, long timeout)
> >  {
> > -       int res, eavail;
> > +       int res, eavail, timed_out = 0;
> >        unsigned long flags;
> > -       long jtimeout;
> > +       long slack;
> >        wait_queue_t wait;
> > -
> > -       /*
> > -        * Calculate the timeout by checking for the "infinite" value (-1)
> > -        * and the overflow condition. The passed timeout is in milliseconds,
> > -        * that why (t * HZ) / 1000.
> > -        */
> > -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
> > -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
> > +       struct timespec end_time;
> > +       ktime_t expires, *to = NULL;
> > +
> > +       if (timeout > 0) {
> > +               ktime_get_ts(&end_time);
> > +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
> > +               slack = estimate_accuracy(&end_time);
> > +               to = &expires;
> > +               *to = timespec_to_ktime(end_time);
> > +       } else if (timeout == 0) {
> > +               timed_out = 1;
> > +       }
> >
> >  retry:
> >        spin_lock_irqsave(&ep->lock, flags);
> > @@ -1149,7 +1150,7 @@ retry:
> >                         * to TASK_INTERRUPTIBLE before doing the checks.
> >                         */
> >                        set_current_state(TASK_INTERRUPTIBLE);
> > -                       if (!list_empty(&ep->rdllist) || !jtimeout)
> > +                       if (!list_empty(&ep->rdllist) || timed_out)
> >                                break;
> >                        if (signal_pending(current)) {
> >                                res = -EINTR;
> > @@ -1157,7 +1158,9 @@ retry:
> >                        }
> >
> >                        spin_unlock_irqrestore(&ep->lock, flags);
> > -                       jtimeout = schedule_timeout(jtimeout);
> > +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
> > +                               timed_out = 1;
> > +
> >                        spin_lock_irqsave(&ep->lock, flags);
> >                }
> >                __remove_wait_queue(&ep->wq, &wait);
> 
> this code introduces a warning:
> fs/eventpoll.c: In function ‘ep_poll’:
> fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function
> 
> looks to me like you arent properly handling negative timeouts.
> certainly epoll_wait() passes the timeout value straight from
> userspace to ep_poll() without any error checking, so if userspace
> passes a negative timeout value, it looks like "slack" will be used
> uninitialized.

If a negative timeout is passed in then 'to' remains NULL.  When 'to
is NULL schedule_hrtimeout_range() has an infinite timeout and the
'slack' parameter is never used.  So technically everything should be
fine here.

Of course it would be safest and best to simply initialize slack to 0.
I can send a patch this evening with the fix.

--
Shawn

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
@ 2010-11-24 14:52     ` Shawn Bohrer
  0 siblings, 0 replies; 13+ messages in thread
From: Shawn Bohrer @ 2010-11-24 14:52 UTC (permalink / raw)
  To: Mike Frysinger; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Wed, Nov 24, 2010 at 03:33:02AM -0500, Mike Frysinger wrote:
> On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
> > @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
> >  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
> >                   int maxevents, long timeout)
> >  {
> > -       int res, eavail;
> > +       int res, eavail, timed_out = 0;
> >        unsigned long flags;
> > -       long jtimeout;
> > +       long slack;
> >        wait_queue_t wait;
> > -
> > -       /*
> > -        * Calculate the timeout by checking for the "infinite" value (-1)
> > -        * and the overflow condition. The passed timeout is in milliseconds,
> > -        * that why (t * HZ) / 1000.
> > -        */
> > -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
> > -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
> > +       struct timespec end_time;
> > +       ktime_t expires, *to = NULL;
> > +
> > +       if (timeout > 0) {
> > +               ktime_get_ts(&end_time);
> > +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
> > +               slack = estimate_accuracy(&end_time);
> > +               to = &expires;
> > +               *to = timespec_to_ktime(end_time);
> > +       } else if (timeout == 0) {
> > +               timed_out = 1;
> > +       }
> >
> >  retry:
> >        spin_lock_irqsave(&ep->lock, flags);
> > @@ -1149,7 +1150,7 @@ retry:
> >                         * to TASK_INTERRUPTIBLE before doing the checks.
> >                         */
> >                        set_current_state(TASK_INTERRUPTIBLE);
> > -                       if (!list_empty(&ep->rdllist) || !jtimeout)
> > +                       if (!list_empty(&ep->rdllist) || timed_out)
> >                                break;
> >                        if (signal_pending(current)) {
> >                                res = -EINTR;
> > @@ -1157,7 +1158,9 @@ retry:
> >                        }
> >
> >                        spin_unlock_irqrestore(&ep->lock, flags);
> > -                       jtimeout = schedule_timeout(jtimeout);
> > +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
> > +                               timed_out = 1;
> > +
> >                        spin_lock_irqsave(&ep->lock, flags);
> >                }
> >                __remove_wait_queue(&ep->wq, &wait);
> 
> this code introduces a warning:
> fs/eventpoll.c: In function ‘ep_poll’:
> fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function
> 
> looks to me like you arent properly handling negative timeouts.
> certainly epoll_wait() passes the timeout value straight from
> userspace to ep_poll() without any error checking, so if userspace
> passes a negative timeout value, it looks like "slack" will be used
> uninitialized.

If a negative timeout is passed in then 'to' remains NULL.  When 'to
is NULL schedule_hrtimeout_range() has an infinite timeout and the
'slack' parameter is never used.  So technically everything should be
fine here.

Of course it would be safest and best to simply initialize slack to 0.
I can send a patch this evening with the fix.

--
Shawn
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
  2010-11-24 14:52     ` Shawn Bohrer
@ 2010-11-24 20:57       ` Mike Frysinger
  -1 siblings, 0 replies; 13+ messages in thread
From: Mike Frysinger @ 2010-11-24 20:57 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Wed, Nov 24, 2010 at 09:52, Shawn Bohrer wrote:
> On Wed, Nov 24, 2010 at 03:33:02AM -0500, Mike Frysinger wrote:
>> On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
>> > @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
>> >  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>> >                   int maxevents, long timeout)
>> >  {
>> > -       int res, eavail;
>> > +       int res, eavail, timed_out = 0;
>> >        unsigned long flags;
>> > -       long jtimeout;
>> > +       long slack;
>> >        wait_queue_t wait;
>> > -
>> > -       /*
>> > -        * Calculate the timeout by checking for the "infinite" value (-1)
>> > -        * and the overflow condition. The passed timeout is in milliseconds,
>> > -        * that why (t * HZ) / 1000.
>> > -        */
>> > -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
>> > -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
>> > +       struct timespec end_time;
>> > +       ktime_t expires, *to = NULL;
>> > +
>> > +       if (timeout > 0) {
>> > +               ktime_get_ts(&end_time);
>> > +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
>> > +               slack = estimate_accuracy(&end_time);
>> > +               to = &expires;
>> > +               *to = timespec_to_ktime(end_time);
>> > +       } else if (timeout == 0) {
>> > +               timed_out = 1;
>> > +       }
>> >
>> >  retry:
>> >        spin_lock_irqsave(&ep->lock, flags);
>> > @@ -1149,7 +1150,7 @@ retry:
>> >                         * to TASK_INTERRUPTIBLE before doing the checks.
>> >                         */
>> >                        set_current_state(TASK_INTERRUPTIBLE);
>> > -                       if (!list_empty(&ep->rdllist) || !jtimeout)
>> > +                       if (!list_empty(&ep->rdllist) || timed_out)
>> >                                break;
>> >                        if (signal_pending(current)) {
>> >                                res = -EINTR;
>> > @@ -1157,7 +1158,9 @@ retry:
>> >                        }
>> >
>> >                        spin_unlock_irqrestore(&ep->lock, flags);
>> > -                       jtimeout = schedule_timeout(jtimeout);
>> > +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
>> > +                               timed_out = 1;
>> > +
>> >                        spin_lock_irqsave(&ep->lock, flags);
>> >                }
>> >                __remove_wait_queue(&ep->wq, &wait);
>>
>> this code introduces a warning:
>> fs/eventpoll.c: In function ‘ep_poll’:
>> fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function
>>
>> looks to me like you arent properly handling negative timeouts.
>> certainly epoll_wait() passes the timeout value straight from
>> userspace to ep_poll() without any error checking, so if userspace
>> passes a negative timeout value, it looks like "slack" will be used
>> uninitialized.
>
> If a negative timeout is passed in then 'to' remains NULL.  When 'to
> is NULL schedule_hrtimeout_range() has an infinite timeout and the
> 'slack' parameter is never used.  So technically everything should be
> fine here.

ok, but that depends on an external function never changing behavior
and makes changing the API pretty hard since all callers must be
closely analyzed

> Of course it would be safest and best to simply initialize slack to 0.
> I can send a patch this evening with the fix.

thanks !
-mike

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature
@ 2010-11-24 20:57       ` Mike Frysinger
  0 siblings, 0 replies; 13+ messages in thread
From: Mike Frysinger @ 2010-11-24 20:57 UTC (permalink / raw)
  To: Shawn Bohrer; +Cc: Alexander Viro, linux-fsdevel, linux-kernel

On Wed, Nov 24, 2010 at 09:52, Shawn Bohrer wrote:
> On Wed, Nov 24, 2010 at 03:33:02AM -0500, Mike Frysinger wrote:
>> On Sun, Aug 8, 2010 at 18:45, Shawn Bohrer wrote:
>> > @@ -1116,18 +1113,22 @@ static int ep_send_events(struct eventpoll *ep,
>> >  static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>> >                   int maxevents, long timeout)
>> >  {
>> > -       int res, eavail;
>> > +       int res, eavail, timed_out = 0;
>> >        unsigned long flags;
>> > -       long jtimeout;
>> > +       long slack;
>> >        wait_queue_t wait;
>> > -
>> > -       /*
>> > -        * Calculate the timeout by checking for the "infinite" value (-1)
>> > -        * and the overflow condition. The passed timeout is in milliseconds,
>> > -        * that why (t * HZ) / 1000.
>> > -        */
>> > -       jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
>> > -               MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
>> > +       struct timespec end_time;
>> > +       ktime_t expires, *to = NULL;
>> > +
>> > +       if (timeout > 0) {
>> > +               ktime_get_ts(&end_time);
>> > +               timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
>> > +               slack = estimate_accuracy(&end_time);
>> > +               to = &expires;
>> > +               *to = timespec_to_ktime(end_time);
>> > +       } else if (timeout == 0) {
>> > +               timed_out = 1;
>> > +       }
>> >
>> >  retry:
>> >        spin_lock_irqsave(&ep->lock, flags);
>> > @@ -1149,7 +1150,7 @@ retry:
>> >                         * to TASK_INTERRUPTIBLE before doing the checks.
>> >                         */
>> >                        set_current_state(TASK_INTERRUPTIBLE);
>> > -                       if (!list_empty(&ep->rdllist) || !jtimeout)
>> > +                       if (!list_empty(&ep->rdllist) || timed_out)
>> >                                break;
>> >                        if (signal_pending(current)) {
>> >                                res = -EINTR;
>> > @@ -1157,7 +1158,9 @@ retry:
>> >                        }
>> >
>> >                        spin_unlock_irqrestore(&ep->lock, flags);
>> > -                       jtimeout = schedule_timeout(jtimeout);
>> > +                       if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
>> > +                               timed_out = 1;
>> > +
>> >                        spin_lock_irqsave(&ep->lock, flags);
>> >                }
>> >                __remove_wait_queue(&ep->wq, &wait);
>>
>> this code introduces a warning:
>> fs/eventpoll.c: In function ‘ep_poll’:
>> fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function
>>
>> looks to me like you arent properly handling negative timeouts.
>> certainly epoll_wait() passes the timeout value straight from
>> userspace to ep_poll() without any error checking, so if userspace
>> passes a negative timeout value, it looks like "slack" will be used
>> uninitialized.
>
> If a negative timeout is passed in then 'to' remains NULL.  When 'to
> is NULL schedule_hrtimeout_range() has an infinite timeout and the
> 'slack' parameter is never used.  So technically everything should be
> fine here.

ok, but that depends on an external function never changing behavior
and makes changing the API pretty hard since all callers must be
closely analyzed

> Of course it would be safest and best to simply initialize slack to 0.
> I can send a patch this evening with the fix.

thanks !
-mike
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] epoll: initialize slack for negative timeout values
  2010-11-24 20:57       ` Mike Frysinger
  (?)
@ 2010-11-25  3:31       ` Shawn Bohrer
  2010-11-27 18:58         ` Davide Libenzi
  -1 siblings, 1 reply; 13+ messages in thread
From: Shawn Bohrer @ 2010-11-25  3:31 UTC (permalink / raw)
  To: Davide Libenzi
  Cc: Mike Frysinger, Alexander Viro, linux-fsdevel, linux-kernel,
	Shawn Bohrer

When a negative timeout value is passed to epoll the 'slack' variable is
currently uninitialized:

fs/eventpoll.c: In function ‘ep_poll’:
fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function

In this case a NULL pointer is passed to schedule_hrtimeout_range()
specifying an infinite timeout.  The current implementation of
schedule_hrtimeout_range() does not use slack in this case, but we
should still initialize slack to 0 in case future implementations use it.

Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>
---
 fs/eventpoll.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8cf0724..c24a032 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1116,7 +1116,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 {
 	int res, eavail, timed_out = 0;
 	unsigned long flags;
-	long slack;
+	long slack = 0;
 	wait_queue_t wait;
 	struct timespec end_time;
 	ktime_t expires, *to = NULL;
-- 
1.7.3.2


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH] epoll: initialize slack for negative timeout values
  2010-11-25  3:31       ` [PATCH] epoll: initialize slack for negative timeout values Shawn Bohrer
@ 2010-11-27 18:58         ` Davide Libenzi
  0 siblings, 0 replies; 13+ messages in thread
From: Davide Libenzi @ 2010-11-27 18:58 UTC (permalink / raw)
  To: Shawn Bohrer
  Cc: Andrew Morton, Mike Frysinger, Alexander Viro, linux-fsdevel,
	Linux Kernel Mailing List

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1198 bytes --]

On Wed, 24 Nov 2010, Shawn Bohrer wrote:

> When a negative timeout value is passed to epoll the 'slack' variable is
> currently uninitialized:
> 
> fs/eventpoll.c: In function ‘ep_poll’:
> fs/eventpoll.c:1119: warning: ‘slack’ may be used uninitialized in this function
> 
> In this case a NULL pointer is passed to schedule_hrtimeout_range()
> specifying an infinite timeout.  The current implementation of
> schedule_hrtimeout_range() does not use slack in this case, but we
> should still initialize slack to 0 in case future implementations use it.

Thanks.


> Signed-off-by: Shawn Bohrer <shawn.bohrer@gmail.com>

Acked-by: Davide Libenzi <davidel@xmailserver.org>



> ---
>  fs/eventpoll.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 8cf0724..c24a032 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -1116,7 +1116,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  {
>  	int res, eavail, timed_out = 0;
>  	unsigned long flags;
> -	long slack;
> +	long slack = 0;
>  	wait_queue_t wait;
>  	struct timespec end_time;
>  	ktime_t expires, *to = NULL;



- Davide


^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2010-11-27 19:00 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-08-08 22:45 [PATCH] hrtimer: make epoll_wait() use the hrtimer range feature Shawn Bohrer
2010-08-26 22:31 ` Andrew Morton
2010-08-26 22:45 ` Davide Libenzi
2010-08-26 23:02   ` Thomas Gleixner
2010-08-26 23:23     ` Davide Libenzi
2010-11-24  8:33 ` Mike Frysinger
2010-11-24  8:33   ` Mike Frysinger
2010-11-24 14:52   ` Shawn Bohrer
2010-11-24 14:52     ` Shawn Bohrer
2010-11-24 20:57     ` Mike Frysinger
2010-11-24 20:57       ` Mike Frysinger
2010-11-25  3:31       ` [PATCH] epoll: initialize slack for negative timeout values Shawn Bohrer
2010-11-27 18:58         ` Davide Libenzi

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.