LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Roman Penyaev <rpenyaev@suse.de>
To: unlisted-recipients:; (no To-header on input)
Cc: Azat Khuzhin <azat@libevent.org>,
	Roman Penyaev <rpenyaev@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Al Viro <viro@zeniv.linux.org.uk>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH v3 04/13] epoll: some sanity flags checks for epoll syscalls for polling from userspace
Date: Thu, 16 May 2019 10:58:01 +0200
Message-ID: <20190516085810.31077-5-rpenyaev@suse.de> (raw)
In-Reply-To: <20190516085810.31077-1-rpenyaev@suse.de>

There are various of limitations if epfd is polled by user:

 1. Expect always EPOLLET flag (Edge Triggered behavior)

 2. No support for EPOLLWAKEUP
       events are consumed from userspace, thus no way to call __pm_relax()

 3. No support for EPOLLEXCLUSIVE
       If device does not pass pollflags to wake_up() there is no way to
       call poll() from the context under spinlock, thus special work is
       scheduled to offload polling.  In this specific case we can't
       support exclusive wakeups, because we do not know actual result
       of scheduled work.

4. epoll_wait() for epfd, created with EPOLL_USERPOLL flag, accepts events
   as NULL and maxevents as 0.  No other values are accepted.

Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index e3f82ff0b26b..81da4571f1e0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -423,6 +423,11 @@ static inline unsigned int ep_to_items_bm_length(unsigned int nr)
 	return PAGE_ALIGN(ALIGN(nr, 8) >> 3);
 }
 
+static inline bool ep_polled_by_user(struct eventpoll *ep)
+{
+	return !!ep->user_header;
+}
+
 /**
  * ep_events_available - Checks if ready events might be available.
  *
@@ -518,13 +523,17 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
 #ifdef CONFIG_PM_SLEEP
-static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
+static inline void ep_take_care_of_epollwakeup(struct eventpoll *ep,
+					       struct epoll_event *epev)
 {
-	if ((epev->events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
-		epev->events &= ~EPOLLWAKEUP;
+	if (epev->events & EPOLLWAKEUP) {
+		if (!capable(CAP_BLOCK_SUSPEND) || ep_polled_by_user(ep))
+			epev->events &= ~EPOLLWAKEUP;
+	}
 }
 #else
-static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
+static inline void ep_take_care_of_epollwakeup(struct eventpoll *ep,
+					       struct epoll_event *epev)
 {
 	epev->events &= ~EPOLLWAKEUP;
 }
@@ -2275,10 +2284,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	if (!file_can_poll(tf.file))
 		goto error_tgt_fput;
 
-	/* Check if EPOLLWAKEUP is allowed */
-	if (ep_op_has_event(op))
-		ep_take_care_of_epollwakeup(&epds);
-
 	/*
 	 * We have to check that the file structure underneath the file descriptor
 	 * the user passed to us _is_ an eventpoll file. And also we do not permit
@@ -2288,10 +2293,18 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 	if (f.file == tf.file || !is_file_epoll(f.file))
 		goto error_tgt_fput;
 
+	/*
+	 * At this point it is safe to assume that the "private_data" contains
+	 * our own data structure.
+	 */
+	ep = f.file->private_data;
+
 	/*
 	 * epoll adds to the wakeup queue at EPOLL_CTL_ADD time only,
 	 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
-	 * Also, we do not currently supported nested exclusive wakeups.
+	 * Also, we do not currently supported nested exclusive wakeups
+	 * and EPOLLEXCLUSIVE is not supported for epoll which is polled
+	 * from userspace.
 	 */
 	if (ep_op_has_event(op) && (epds.events & EPOLLEXCLUSIVE)) {
 		if (op == EPOLL_CTL_MOD)
@@ -2299,13 +2312,18 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
 		if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
 				(epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
 			goto error_tgt_fput;
+		if (ep_polled_by_user(ep))
+			goto error_tgt_fput;
 	}
 
-	/*
-	 * At this point it is safe to assume that the "private_data" contains
-	 * our own data structure.
-	 */
-	ep = f.file->private_data;
+	if (ep_op_has_event(op)) {
+		if (ep_polled_by_user(ep) && !(epds.events & EPOLLET))
+			/* Polled by user has only edge triggered behaviour */
+			goto error_tgt_fput;
+
+		/* Check if EPOLLWAKEUP is allowed */
+		ep_take_care_of_epollwakeup(ep, &epds);
+	}
 
 	/*
 	 * When we insert an epoll file descriptor, inside another epoll file
@@ -2407,14 +2425,6 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
 	struct fd f;
 	struct eventpoll *ep;
 
-	/* The maximum number of event must be greater than zero */
-	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
-		return -EINVAL;
-
-	/* Verify that the area passed by the user is writeable */
-	if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
-		return -EFAULT;
-
 	/* Get the "struct file *" for the eventpoll file */
 	f = fdget(epfd);
 	if (!f.file)
@@ -2433,6 +2443,20 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
 	 * our own data structure.
 	 */
 	ep = f.file->private_data;
+	if (!ep_polled_by_user(ep)) {
+		/* The maximum number of event must be greater than zero */
+		if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
+			goto error_fput;
+
+		/* Verify that the area passed by the user is writeable */
+		error = -EFAULT;
+		if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
+			goto error_fput;
+	} else {
+		/* Use ring instead */
+		if (maxevents != 0 || events != NULL)
+			goto error_fput;
+	}
 
 	/* Time to fish for events ... */
 	error = ep_poll(ep, events, maxevents, timeout);
-- 
2.21.0


  parent reply index

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-16  8:57 [PATCH v3 00/13] epoll: support pollable epoll " Roman Penyaev
2019-05-16  8:57 ` [PATCH v3 01/13] epoll: move private helpers from a header to the source Roman Penyaev
2019-05-16  8:57 ` [PATCH v3 02/13] epoll: introduce user structures for polling from userspace Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 03/13] epoll: allocate user header and user events ring " Roman Penyaev
2019-05-16  8:58 ` Roman Penyaev [this message]
2019-05-16  8:58 ` [PATCH v3 05/13] epoll: offload polling to a work in case of epfd polled " Roman Penyaev
2019-05-21  7:51   ` Eric Wong
2019-05-22 12:54     ` Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 06/13] epoll: introduce helpers for adding/removing events to uring Roman Penyaev
2019-05-31  9:55   ` Peter Zijlstra
2019-05-31 11:24     ` Roman Penyaev
2019-05-31 13:11       ` Peter Zijlstra
2019-05-31  9:56   ` Peter Zijlstra
2019-05-31 11:15     ` Roman Penyaev
2019-05-31 12:53       ` Peter Zijlstra
2019-05-31 14:28         ` Roman Penyaev
2019-05-31 16:53           ` Peter Zijlstra
2019-05-31 12:56       ` Peter Zijlstra
2019-05-31 14:21         ` Roman Penyaev
2019-05-31 16:51           ` Peter Zijlstra
2019-05-31 18:58             ` Roman Penyaev
2019-06-03  9:09               ` Peter Zijlstra
2019-06-03 10:02                 ` Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 07/13] epoll: call ep_add_event_to_uring() from ep_poll_callback() Roman Penyaev
2019-05-31  9:56   ` Peter Zijlstra
2019-05-31 11:22     ` Roman Penyaev
2019-05-31 13:05       ` Peter Zijlstra
2019-05-31 15:05         ` Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 08/13] epoll: support polling from userspace for ep_insert() Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 09/13] epoll: support polling from userspace for ep_remove() Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 10/13] epoll: support polling from userspace for ep_modify() Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 11/13] epoll: support polling from userspace for ep_poll() Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 12/13] epoll: support mapping for epfd when polled from userspace Roman Penyaev
2019-05-16  8:58 ` [PATCH v3 13/13] epoll: implement epoll_create2() syscall Roman Penyaev
2019-05-16 10:03   ` Arnd Bergmann
2019-05-16 10:20     ` Roman Penyaev
2019-05-16 10:57       ` Arnd Bergmann
2019-05-22  2:33       ` Andrew Morton
2019-05-22  9:11         ` Roman Penyaev
2019-05-22 11:14         ` Arnd Bergmann
2019-05-22 18:36           ` Andrew Morton
2019-05-31  9:55 ` [PATCH v3 00/13] epoll: support pollable epoll from userspace Peter Zijlstra
2019-05-31 14:48 ` Jens Axboe
2019-05-31 16:02   ` Roman Penyaev
2019-05-31 16:54     ` Jens Axboe
2019-05-31 19:45       ` Roman Penyaev
2019-05-31 21:09         ` Jens Axboe
2019-06-05  6:17           ` Roman Penyaev
2019-05-31 16:33 ` Peter Zijlstra
2019-05-31 18:50   ` Roman Penyaev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190516085810.31077-5-rpenyaev@suse.de \
    --to=rpenyaev@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=azat@libevent.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git