All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrea Arcangeli <aarcange@redhat.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, Michael Rapoport <RAPOPORT@il.ibm.com>,
	"Dr. David Alan Gilbert"@v2.random, " <dgilbert@redhat.com>,
	 Mike Kravetz <mike.kravetz@oracle.com>,
	 Shaohua Li <shli@fb.com>,
	 Pavel Emelyanov <xemul@parallels.com>"@v2.random
Subject: [PATCH 06/33] userfaultfd: non-cooperative: Add ability to report non-PF events from uffd descriptor
Date: Wed,  2 Nov 2016 20:33:38 +0100	[thread overview]
Message-ID: <1478115245-32090-7-git-send-email-aarcange@redhat.com> (raw)
In-Reply-To: <1478115245-32090-1-git-send-email-aarcange@redhat.com>

From: Pavel Emelyanov <xemul@parallels.com>

The custom events are queued in ctx->event_wqh not to disturb the
fast-path-ed PF queue-wait-wakeup functions.

The events to be generated (other than PF-s) are requested in UFFD_API
ioctl with the uffd_api.features bits. Those, known by the kernel, are
then turned on and reported back to the user-space.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
---
 fs/userfaultfd.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 96 insertions(+), 2 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index b4f790f..76205b3 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -12,6 +12,7 @@
  *  mm/ksm.c (mm hashing).
  */
 
+#include <linux/list.h>
 #include <linux/hashtable.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
@@ -45,12 +46,16 @@ struct userfaultfd_ctx {
 	wait_queue_head_t fault_wqh;
 	/* waitqueue head for the pseudo fd to wakeup poll/read */
 	wait_queue_head_t fd_wqh;
+	/* waitqueue head for events */
+	wait_queue_head_t event_wqh;
 	/* a refile sequence protected by fault_pending_wqh lock */
 	struct seqcount refile_seq;
 	/* pseudo fd refcounting */
 	atomic_t refcount;
 	/* userfaultfd syscall flags */
 	unsigned int flags;
+	/* features requested from the userspace */
+	unsigned int features;
 	/* state machine */
 	enum userfaultfd_state state;
 	/* released */
@@ -135,6 +140,8 @@ static void userfaultfd_ctx_put(struct userfaultfd_ctx *ctx)
 		VM_BUG_ON(waitqueue_active(&ctx->fault_pending_wqh));
 		VM_BUG_ON(spin_is_locked(&ctx->fault_wqh.lock));
 		VM_BUG_ON(waitqueue_active(&ctx->fault_wqh));
+		VM_BUG_ON(spin_is_locked(&ctx->event_wqh.lock));
+		VM_BUG_ON(waitqueue_active(&ctx->event_wqh));
 		VM_BUG_ON(spin_is_locked(&ctx->fd_wqh.lock));
 		VM_BUG_ON(waitqueue_active(&ctx->fd_wqh));
 		mmdrop(ctx->mm);
@@ -423,6 +430,59 @@ int handle_userfault(struct fault_env *fe, unsigned long reason)
 	return ret;
 }
 
+static int __maybe_unused userfaultfd_event_wait_completion(
+		struct userfaultfd_ctx *ctx,
+		struct userfaultfd_wait_queue *ewq)
+{
+	int ret = 0;
+
+	ewq->ctx = ctx;
+	init_waitqueue_entry(&ewq->wq, current);
+
+	spin_lock(&ctx->event_wqh.lock);
+	/*
+	 * After the __add_wait_queue the uwq is visible to userland
+	 * through poll/read().
+	 */
+	__add_wait_queue(&ctx->event_wqh, &ewq->wq);
+	for (;;) {
+		set_current_state(TASK_KILLABLE);
+		if (ewq->msg.event == 0)
+			break;
+		if (ACCESS_ONCE(ctx->released) ||
+		    fatal_signal_pending(current)) {
+			ret = -1;
+			__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+			break;
+		}
+
+		spin_unlock(&ctx->event_wqh.lock);
+
+		wake_up_poll(&ctx->fd_wqh, POLLIN);
+		schedule();
+
+		spin_lock(&ctx->event_wqh.lock);
+	}
+	__set_current_state(TASK_RUNNING);
+	spin_unlock(&ctx->event_wqh.lock);
+
+	/*
+	 * ctx may go away after this if the userfault pseudo fd is
+	 * already released.
+	 */
+
+	userfaultfd_ctx_put(ctx);
+	return ret;
+}
+
+static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx,
+				       struct userfaultfd_wait_queue *ewq)
+{
+	ewq->msg.event = 0;
+	wake_up_locked(&ctx->event_wqh);
+	__remove_wait_queue(&ctx->event_wqh, &ewq->wq);
+}
+
 static int userfaultfd_release(struct inode *inode, struct file *file)
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
@@ -511,6 +571,12 @@ static inline struct userfaultfd_wait_queue *find_userfault(
 	return find_userfault_in(&ctx->fault_pending_wqh);
 }
 
+static inline struct userfaultfd_wait_queue *find_userfault_evt(
+		struct userfaultfd_ctx *ctx)
+{
+	return find_userfault_in(&ctx->event_wqh);
+}
+
 static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
@@ -542,6 +608,9 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 		smp_mb();
 		if (waitqueue_active(&ctx->fault_pending_wqh))
 			ret = POLLIN;
+		else if (waitqueue_active(&ctx->event_wqh))
+			ret = POLLIN;
+
 		return ret;
 	default:
 		WARN_ON_ONCE(1);
@@ -606,6 +675,19 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 			break;
 		}
 		spin_unlock(&ctx->fault_pending_wqh.lock);
+
+		spin_lock(&ctx->event_wqh.lock);
+		uwq = find_userfault_evt(ctx);
+		if (uwq) {
+			*msg = uwq->msg;
+
+			userfaultfd_event_complete(ctx, uwq);
+			spin_unlock(&ctx->event_wqh.lock);
+			ret = 0;
+			break;
+		}
+		spin_unlock(&ctx->event_wqh.lock);
+
 		if (signal_pending(current)) {
 			ret = -ERESTARTSYS;
 			break;
@@ -1149,6 +1231,14 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 	return ret;
 }
 
+static inline unsigned int uffd_ctx_features(__u64 user_features)
+{
+	/*
+	 * For the current set of features the bits just coincide
+	 */
+	return (unsigned int)user_features;
+}
+
 /*
  * userland asks for a certain API version and we return which bits
  * and ioctl commands are implemented in this kernel for such API
@@ -1167,19 +1257,21 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
 		goto out;
-	if (uffdio_api.api != UFFD_API || uffdio_api.features) {
+	if (uffdio_api.api != UFFD_API ||
+	    (uffdio_api.features & ~UFFD_API_FEATURES)) {
 		memset(&uffdio_api, 0, sizeof(uffdio_api));
 		if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 			goto out;
 		ret = -EINVAL;
 		goto out;
 	}
-	uffdio_api.features = UFFD_API_FEATURES;
+	uffdio_api.features &= UFFD_API_FEATURES;
 	uffdio_api.ioctls = UFFD_API_IOCTLS;
 	ret = -EFAULT;
 	if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 		goto out;
 	ctx->state = UFFD_STATE_RUNNING;
+	ctx->features = uffd_ctx_features(uffdio_api.features);
 	ret = 0;
 out:
 	return ret;
@@ -1266,6 +1358,7 @@ static void init_once_userfaultfd_ctx(void *mem)
 
 	init_waitqueue_head(&ctx->fault_pending_wqh);
 	init_waitqueue_head(&ctx->fault_wqh);
+	init_waitqueue_head(&ctx->event_wqh);
 	init_waitqueue_head(&ctx->fd_wqh);
 	seqcount_init(&ctx->refile_seq);
 }
@@ -1306,6 +1399,7 @@ static struct file *userfaultfd_file_create(int flags)
 
 	atomic_set(&ctx->refcount, 1);
 	ctx->flags = flags;
+	ctx->features = 0;
 	ctx->state = UFFD_STATE_WAIT_API;
 	ctx->released = false;
 	ctx->mm = current->mm;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2016-11-02 19:40 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-11-02 19:33 [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 01/33] userfaultfd: document _IOR/_IOW Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 02/33] userfaultfd: correct comment about UFFD_FEATURE_PAGEFAULT_FLAG_WP Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 03/33] userfaultfd: convert BUG() to WARN_ON_ONCE() Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 04/33] userfaultfd: use vma_is_anonymous Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 05/33] userfaultfd: non-cooperative: Split the find_userfault() routine Andrea Arcangeli
2016-11-02 19:33 ` Andrea Arcangeli [this message]
2016-11-02 19:33 ` [PATCH 07/33] userfaultfd: non-cooperative: report all available features to userland Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 08/33] userfaultfd: non-cooperative: Add fork() event Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 09/33] userfaultfd: non-cooperative: Add fork() event, build warning fix Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 10/33] userfaultfd: non-cooperative: dup_userfaultfd: use mm_count instead of mm_users Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 11/33] userfaultfd: non-cooperative: Add mremap() event Andrea Arcangeli
2016-11-03  7:41   ` Hillf Danton
2016-11-03 17:52     ` Mike Rapoport
2016-11-04 15:40     ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Andrea Arcangeli
2016-11-03  8:01   ` Hillf Danton
2016-11-03 17:24     ` Mike Rapoport
2016-11-04 16:40       ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED requestg Andrea Arcangeli
2016-11-04 15:42     ` [PATCH 12/33] userfaultfd: non-cooperative: Add madvise() event for MADV_DONTNEED request Mike Rapoport
2016-11-02 19:33 ` [PATCH 13/33] userfaultfd: hugetlbfs: add copy_huge_page_from_user for hugetlb userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 14/33] userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for " Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 15/33] userfaultfd: hugetlbfs: add __mcopy_atomic_hugetlb for huge page UFFDIO_COPY Andrea Arcangeli
2016-11-03 10:15   ` Hillf Danton
2016-11-03 17:33     ` Mike Kravetz
2016-11-03 19:14       ` Mike Kravetz
2016-11-04  6:43         ` Hillf Danton
2016-11-04 19:36         ` Andrea Arcangeli
2016-11-04 20:34           ` Mike Kravetz
2016-11-08 21:06           ` Mike Kravetz
2016-11-16 18:28             ` Andrea Arcangeli
2016-11-16 18:53               ` Mike Kravetz
2016-11-17 15:40                 ` Andrea Arcangeli
2016-11-17 19:26                   ` Mike Kravetz
2016-11-18  0:05                     ` Andrea Arcangeli
2016-11-18  5:52                       ` Mike Kravetz
2016-11-22  1:16                         ` Mike Kravetz
2016-11-23  6:38                           ` Hillf Danton
2016-12-15 19:02                             ` Andrea Arcangeli
2016-12-16  3:54                               ` Hillf Danton
2016-11-17 19:41               ` Mike Kravetz
2016-11-04 16:35       ` Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 16/33] userfaultfd: hugetlbfs: add userfaultfd hugetlb hook Andrea Arcangeli
2016-11-04  7:02   ` Hillf Danton
2016-11-02 19:33 ` [PATCH 17/33] userfaultfd: hugetlbfs: allow registration of ranges containing huge pages Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 18/33] userfaultfd: hugetlbfs: add userfaultfd_hugetlb test Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 19/33] userfaultfd: hugetlbfs: userfaultfd_huge_must_wait for hugepmd ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 20/33] userfaultfd: introduce vma_can_userfault Andrea Arcangeli
2016-11-04  7:39   ` Hillf Danton
2016-11-02 19:33 ` [PATCH 21/33] userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 22/33] userfaultfd: shmem: introduce vma_is_shmem Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 23/33] userfaultfd: shmem: add tlbflush.h header for microblaze Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 24/33] userfaultfd: shmem: use shmem_mcopy_atomic_pte for shared memory Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 25/33] userfaultfd: shmem: add userfaultfd hook for shared memory faults Andrea Arcangeli
2016-11-04  8:59   ` Hillf Danton
2016-11-04 14:53     ` Mike Rapoport
2016-11-04 15:44     ` Mike Rapoport
2016-11-04 16:56       ` Andrea Arcangeli
2016-11-18  0:37       ` Andrea Arcangeli
2016-11-20 12:10         ` Mike Rapoport
2016-11-02 19:33 ` [PATCH 26/33] userfaultfd: shmem: allow registration of shared memory ranges Andrea Arcangeli
2016-11-02 19:33 ` [PATCH 27/33] userfaultfd: shmem: add userfaultfd_shmem test Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 28/33] userfaultfd: shmem: lock the page before adding it to pagecache Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 29/33] userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 30/33] userfaultfd: non-cooperative: selftest: introduce userfaultfd_open Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 31/33] userfaultfd: non-cooperative: selftest: add ufd parameter to copy_page Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 32/33] userfaultfd: non-cooperative: selftest: add test for FORK, MADVDONTNEED and REMAP events Andrea Arcangeli
2016-11-02 19:34 ` [PATCH 33/33] mm: mprotect: use pmd_trans_unstable instead of taking the pmd_lock Andrea Arcangeli
2016-11-02 20:07 ` [PATCH 00/33] userfaultfd tmpfs/hugetlbfs/non-cooperative Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1478115245-32090-7-git-send-email-aarcange@redhat.com \
    --to=aarcange@redhat.com \
    --cc=" <dgilbert@redhat.com>,  Mike Kravetz <mike.kravetz@oracle.com>,  Shaohua Li <shli@fb.com>,  Pavel Emelyanov <xemul@parallels.com>"@v2.random \
    --cc="Dr. David Alan Gilbert"@v2.random \
    --cc=RAPOPORT@il.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.