LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Roman Penyaev <rpenyaev@suse.de>
To: unlisted-recipients:; (no To-header on input)
Cc: Roman Penyaev <rpenyaev@suse.de>,
	Andrew Morton <akpm@linux-foundation.org>,
	Davidlohr Bueso <dbueso@suse.de>, Jason Baron <jbaron@akamai.com>,
	Al Viro <viro@zeniv.linux.org.uk>,
	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrea Parri <andrea.parri@amarulasolutions.com>,
	linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [RFC PATCH 15/15] epoll: support mapping for epfd when polled from userspace
Date: Wed,  9 Jan 2019 17:40:25 +0100
Message-ID: <20190109164025.24554-16-rpenyaev@suse.de> (raw)
In-Reply-To: <20190109164025.24554-1-rpenyaev@suse.de>

User has to mmap user_header and user_index vmalloce'd pointers in order to
consume events from userspace.  Support mapping with possibility to mremap()
in the future, i.e. vma does not have VM_DONTEXPAND flag set.

User mmaps two pointers: header and index in order to expand both calling
mremap().

Expanding is made with support of the fault callback, where page is mmaped
with all appropriate size checks.

Signed-off-by: Roman Penyaev <rpenyaev@suse.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 fs/eventpoll.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5de640fcf28b..2849b238f80b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1388,11 +1388,96 @@ static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 }
 #endif
 
+static vm_fault_t ep_eventpoll_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct eventpoll *ep = vma->vm_file->private_data;
+	size_t off = vmf->address - vma->vm_start;
+	vm_fault_t ret;
+	int rc;
+
+	mutex_lock(&ep->mtx);
+	ret = VM_FAULT_SIGBUS;
+	if (!vma->vm_pgoff) {
+		if (ep->header_length < (off + PAGE_SIZE))
+			goto unlock_and_out;
+
+		rc = remap_vmalloc_range_partial(vma, vmf->address,
+						 ep->user_header + off,
+						 PAGE_SIZE);
+	} else {
+		if (ep->index_length < (off + PAGE_SIZE))
+			goto unlock_and_out;
+
+		rc = remap_vmalloc_range_partial(vma, vmf->address,
+						 ep->user_index + off,
+						 PAGE_SIZE);
+	}
+	if (likely(!rc)) {
+		/* Success path */
+		vma->vm_flags &= ~VM_DONTEXPAND;
+		ret = VM_FAULT_NOPAGE;
+	}
+unlock_and_out:
+	mutex_unlock(&ep->mtx);
+
+	return ret;
+}
+
+static const struct vm_operations_struct eventpoll_vm_ops = {
+	.fault = ep_eventpoll_fault,
+};
+
+static int ep_eventpoll_mmap(struct file *filep, struct vm_area_struct *vma)
+{
+	struct eventpoll *ep = vma->vm_file->private_data;
+	size_t size;
+	int rc;
+
+	if (!ep_polled_by_user(ep))
+		return -ENOTSUPP;
+
+	mutex_lock(&ep->mtx);
+	rc = -ENXIO;
+	size = vma->vm_end - vma->vm_start;
+	if (!vma->vm_pgoff && size > ep->header_length)
+		goto unlock_and_out;
+	if (vma->vm_pgoff && ep->header_length != (vma->vm_pgoff << PAGE_SHIFT))
+		/*
+		 * Index ring starts exactly after header. In future vm_pgoff
+		 * is not used, only as indication what kernel ptr is mapped.
+		 */
+		goto unlock_and_out;
+	if (vma->vm_pgoff && size > ep->index_length)
+		goto unlock_and_out;
+
+	/*
+	 * vm_pgoff is used *only* for indication, what is mapped: user header
+	 * or user index ring.
+	 */
+	if (!vma->vm_pgoff)
+		rc = remap_vmalloc_range_partial(vma, vma->vm_start,
+						 ep->user_header, size);
+	else
+		rc = remap_vmalloc_range_partial(vma, vma->vm_start,
+						 ep->user_index, size);
+
+	if (likely(!rc)) {
+		vma->vm_flags &= ~VM_DONTEXPAND;
+		vma->vm_ops = &eventpoll_vm_ops;
+	}
+unlock_and_out:
+	mutex_unlock(&ep->mtx);
+
+	return rc;
+}
+
 /* File callbacks that implement the eventpoll file behaviour */
 static const struct file_operations eventpoll_fops = {
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo	= ep_show_fdinfo,
 #endif
+	.mmap		= ep_eventpoll_mmap,
 	.release	= ep_eventpoll_release,
 	.poll		= ep_eventpoll_poll,
 	.llseek		= noop_llseek,
-- 
2.19.1


      parent reply index

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-09 16:40 [RFC 00/15] epoll: support pollable epoll " Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 01/15] mm/vmalloc: add new 'alignment' field for vm_struct structure Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 02/15] mm/vmalloc: move common logic from __vmalloc_area_node to a separate func Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 03/15] mm/vmalloc: introduce new vrealloc() call and its subsidiary reach analog Roman Penyaev
2019-01-09 16:50   ` Matthew Wilcox
2019-01-10 10:08     ` Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 04/15] epoll: move private helpers from a header to the source Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 05/15] epoll: introduce user header structure and user index for polling from userspace Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 06/15] epoll: introduce various of helpers for user structure lengths calculations Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 07/15] epoll: extend epitem struct with new members for polling from userspace Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 08/15] epoll: some sanity flags checks for epoll syscalls for polled epfd " Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 09/15] epoll: introduce stand-alone helpers for polling " Roman Penyaev
2019-01-09 17:29   ` Linus Torvalds
2019-01-10 10:03     ` Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 10/15] epoll: support polling from userspace for ep_insert() Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 11/15] epoll: offload polling to a work in case of epfd polled from userspace Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 12/15] epoll: support polling from userspace for ep_remove() Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 13/15] epoll: support polling from userspace for ep_modify() Roman Penyaev
2019-01-09 16:40 ` [RFC PATCH 14/15] epoll: support polling from userspace for ep_poll() Roman Penyaev
2019-01-09 16:40 ` Roman Penyaev [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190109164025.24554-16-rpenyaev@suse.de \
    --to=rpenyaev@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=andrea.parri@amarulasolutions.com \
    --cc=dbueso@suse.de \
    --cc=jbaron@akamai.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paulmck@linux.vnet.ibm.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git