All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul@parallels.com>
To: Andrea Arcangeli <aarcange@redhat.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Linux MM <linux-mm@kvack.org>,
	Linux API <linux-api@vger.kernel.org>
Cc: Sanidhya Kashyap <sanidhya.gatech@gmail.com>
Subject: [PATCH 2/3] uffd: Introduce the v2 API
Date: Wed, 18 Mar 2015 22:35:17 +0300	[thread overview]
Message-ID: <5509D375.7000809@parallels.com> (raw)
In-Reply-To: <5509D342.7000403@parallels.com>

The new API will report more than just the page-faults. The
reason for this is -- when the task whose mm we monitor with 
uffd and the monitor task itself cannot cooperate with each
other, the former one can screw things up. Like this.

If task fork()-s the child process is detached from uffd and
thus all not-yet-faulted-in memory gets mapped with zero-pages
on touch.

Another example is mremap(). When the victim remaps the uffd-ed
region and starts touching it the monitor would receive fault
messages with addresses that were not register-ed with uffd
ioctl before it. Thus monitor will have no idea how to handle
those faults.

To address both we can send more events to the monitor. In
particular, on fork() we can create another uffd context,
register the same set of regions in it and "send" the descriptor
to monitor.

For mremap() we can send the message describing what change
has been performed.

So this patch prepares to ground for the described above feature
by introducing the v2 API of uffd. With new API the kernel would
respond with a message containing the event type (pagefault,
fork or remap) and argument (fault address, new uffd descriptor
or region change respectively).

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 fs/userfaultfd.c                 | 56 ++++++++++++++++++++++++++++++----------
 include/uapi/linux/userfaultfd.h | 21 ++++++++++++++-
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 6c9a2d6..bd629b4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -41,6 +41,8 @@ struct userfaultfd_ctx {
 	wait_queue_head_t fd_wqh;
 	/* userfaultfd syscall flags */
 	unsigned int flags;
+	/* features flags */
+	unsigned int features;
 	/* state machine */
 	enum userfaultfd_state state;
 	/* released */
@@ -49,6 +51,8 @@ struct userfaultfd_ctx {
 	struct mm_struct *mm;
 };
 
+#define UFFD_FEATURE_LONGMSG	0x1
+
 struct userfaultfd_wait_queue {
 	unsigned long address;
 	wait_queue_t wq;
@@ -369,7 +373,7 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 }
 
 static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
-				    __u64 *addr)
+				    __u64 *mtype, __u64 *addr)
 {
 	ssize_t ret;
 	DECLARE_WAITQUEUE(wait, current);
@@ -383,6 +387,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 		if (find_userfault(ctx, &uwq)) {
 			uwq->pending = false;
 			/* careful to always initialize addr if ret == 0 */
+			*mtype = UFFD_PAGEFAULT;
 			*addr = uwq->address;
 			ret = 0;
 			break;
@@ -411,8 +416,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
 	ssize_t _ret, ret = 0;
-	/* careful to always initialize addr if ret == 0 */
-	__u64 uninitialized_var(addr);
 	int no_wait = file->f_flags & O_NONBLOCK;
 
 	if (ctx->state == UFFD_STATE_WAIT_API)
@@ -420,16 +423,34 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 	BUG_ON(ctx->state != UFFD_STATE_RUNNING);
 
 	for (;;) {
-		if (count < sizeof(addr))
-			return ret ? ret : -EINVAL;
-		_ret = userfaultfd_ctx_read(ctx, no_wait, &addr);
-		if (_ret < 0)
-			return ret ? ret : _ret;
-		if (put_user(addr, (__u64 __user *) buf))
-			return ret ? ret : -EFAULT;
-		ret += sizeof(addr);
-		buf += sizeof(addr);
-		count -= sizeof(addr);
+		if (!(ctx->features & UFFD_FEATURE_LONGMSG)) {
+			/* careful to always initialize addr if ret == 0 */
+			__u64 uninitialized_var(addr);
+			__u64 uninitialized_var(mtype);
+			if (count < sizeof(addr))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &mtype, &addr);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			BUG_ON(mtype != UFFD_PAGEFAULT);
+			if (put_user(addr, (__u64 __user *) buf))
+				return ret ? ret : -EFAULT;
+			_ret = sizeof(addr);
+		} else {
+			struct uffd_v2_msg msg;
+			if (count < sizeof(msg))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &msg.type, &msg.arg);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			if (copy_to_user(buf, &msg, sizeof(msg)))
+				return ret ? ret : -EINVAL;
+			_ret = sizeof(msg);
+		}
+
+		ret += _ret;
+		buf += _ret;
+		count -= _ret;
 		/*
 		 * Allow to read more than one fault at time but only
 		 * block if waiting for the very first one.
@@ -981,7 +1002,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_api, buf, sizeof(__u64)))
 		goto out;
-	if (uffdio_api.api != UFFD_API) {
+	if (uffdio_api.api != UFFD_API && uffdio_api.api != UFFD_API_V2) {
 		/* careful not to leak info, we only read the first 8 bytes */
 		memset(&uffdio_api, 0, sizeof(uffdio_api));
 		if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
@@ -992,6 +1013,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	/* careful not to leak info, we only read the first 8 bytes */
 	uffdio_api.bits = UFFD_API_BITS;
 	uffdio_api.ioctls = UFFD_API_IOCTLS;
+
+	if (uffdio_api.api == UFFD_API_V2) {
+		ctx->features |= UFFD_FEATURE_LONGMSG;
+		uffdio_api.bits |= UFFD_API_V2_BITS;
+	}
+
 	ret = -EFAULT;
 	if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 		goto out;
@@ -1109,6 +1136,7 @@ static struct file *userfaultfd_file_create(int flags)
 
 	ctx->flags = flags;
 	ctx->state = UFFD_STATE_WAIT_API;
+	ctx->features = 0;
 	ctx->mm = current->mm;
 	/* prevent the mm struct to be freed */
 	atomic_inc(&ctx->mm->mm_count);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index db6e99a..4e169b8 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -9,7 +9,9 @@
 #ifndef _LINUX_USERFAULTFD_H
 #define _LINUX_USERFAULTFD_H
 
-#define UFFD_API ((__u64)0xAA)
+#define UFFD_API 	((__u64)0xAA)
+#define UFFD_API_V2	((__u64)0xAB)
+
 /* FIXME: add "|UFFD_BIT_WP" to UFFD_API_BITS after implementing it */
 #define UFFD_API_BITS (UFFD_BIT_WRITE)
 #define UFFD_API_IOCTLS				\
@@ -147,4 +149,21 @@ struct uffdio_remap {
 	__s64 wake;
 };
 
+struct uffd_v2_msg {
+	__u64	type;
+	__u64	arg;
+};
+
+#define UFFD_PAGEFAULT	0x1
+
+#define UFFD_PAGEFAULT_BIT	(1 << (UFFD_PAGEFAULT - 1))
+#define __UFFD_API_V2_BITS	(UFFD_PAGEFAULT_BIT)
+
+/*
+ * Lower PAGE_SHIFT bits are used to report those supported
+ * by the pagefault message itself. Other bits are used to
+ * report the message types v2 API supports
+ */
+#define UFFD_API_V2_BITS	(__UFFD_API_V2_BITS << 12)
+
 #endif /* _LINUX_USERFAULTFD_H */
-- 
1.8.4.2



WARNING: multiple messages have this Message-ID (diff)
From: Pavel Emelyanov <xemul-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
To: Andrea Arcangeli
	<aarcange-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>,
	Linux Kernel Mailing List
	<linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	Linux MM <linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>,
	Linux API <linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>
Cc: Sanidhya Kashyap
	<sanidhya.gatech-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Subject: [PATCH 2/3] uffd: Introduce the v2 API
Date: Wed, 18 Mar 2015 22:35:17 +0300	[thread overview]
Message-ID: <5509D375.7000809@parallels.com> (raw)
In-Reply-To: <5509D342.7000403-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

The new API will report more than just the page-faults. The
reason for this is -- when the task whose mm we monitor with 
uffd and the monitor task itself cannot cooperate with each
other, the former one can screw things up. Like this.

If task fork()-s the child process is detached from uffd and
thus all not-yet-faulted-in memory gets mapped with zero-pages
on touch.

Another example is mremap(). When the victim remaps the uffd-ed
region and starts touching it the monitor would receive fault
messages with addresses that were not register-ed with uffd
ioctl before it. Thus monitor will have no idea how to handle
those faults.

To address both we can send more events to the monitor. In
particular, on fork() we can create another uffd context,
register the same set of regions in it and "send" the descriptor
to monitor.

For mremap() we can send the message describing what change
has been performed.

So this patch prepares to ground for the described above feature
by introducing the v2 API of uffd. With new API the kernel would
respond with a message containing the event type (pagefault,
fork or remap) and argument (fault address, new uffd descriptor
or region change respectively).

Signed-off-by: Pavel Emelyanov <xemul-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
---
 fs/userfaultfd.c                 | 56 ++++++++++++++++++++++++++++++----------
 include/uapi/linux/userfaultfd.h | 21 ++++++++++++++-
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 6c9a2d6..bd629b4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -41,6 +41,8 @@ struct userfaultfd_ctx {
 	wait_queue_head_t fd_wqh;
 	/* userfaultfd syscall flags */
 	unsigned int flags;
+	/* features flags */
+	unsigned int features;
 	/* state machine */
 	enum userfaultfd_state state;
 	/* released */
@@ -49,6 +51,8 @@ struct userfaultfd_ctx {
 	struct mm_struct *mm;
 };
 
+#define UFFD_FEATURE_LONGMSG	0x1
+
 struct userfaultfd_wait_queue {
 	unsigned long address;
 	wait_queue_t wq;
@@ -369,7 +373,7 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 }
 
 static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
-				    __u64 *addr)
+				    __u64 *mtype, __u64 *addr)
 {
 	ssize_t ret;
 	DECLARE_WAITQUEUE(wait, current);
@@ -383,6 +387,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 		if (find_userfault(ctx, &uwq)) {
 			uwq->pending = false;
 			/* careful to always initialize addr if ret == 0 */
+			*mtype = UFFD_PAGEFAULT;
 			*addr = uwq->address;
 			ret = 0;
 			break;
@@ -411,8 +416,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
 	ssize_t _ret, ret = 0;
-	/* careful to always initialize addr if ret == 0 */
-	__u64 uninitialized_var(addr);
 	int no_wait = file->f_flags & O_NONBLOCK;
 
 	if (ctx->state == UFFD_STATE_WAIT_API)
@@ -420,16 +423,34 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 	BUG_ON(ctx->state != UFFD_STATE_RUNNING);
 
 	for (;;) {
-		if (count < sizeof(addr))
-			return ret ? ret : -EINVAL;
-		_ret = userfaultfd_ctx_read(ctx, no_wait, &addr);
-		if (_ret < 0)
-			return ret ? ret : _ret;
-		if (put_user(addr, (__u64 __user *) buf))
-			return ret ? ret : -EFAULT;
-		ret += sizeof(addr);
-		buf += sizeof(addr);
-		count -= sizeof(addr);
+		if (!(ctx->features & UFFD_FEATURE_LONGMSG)) {
+			/* careful to always initialize addr if ret == 0 */
+			__u64 uninitialized_var(addr);
+			__u64 uninitialized_var(mtype);
+			if (count < sizeof(addr))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &mtype, &addr);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			BUG_ON(mtype != UFFD_PAGEFAULT);
+			if (put_user(addr, (__u64 __user *) buf))
+				return ret ? ret : -EFAULT;
+			_ret = sizeof(addr);
+		} else {
+			struct uffd_v2_msg msg;
+			if (count < sizeof(msg))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &msg.type, &msg.arg);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			if (copy_to_user(buf, &msg, sizeof(msg)))
+				return ret ? ret : -EINVAL;
+			_ret = sizeof(msg);
+		}
+
+		ret += _ret;
+		buf += _ret;
+		count -= _ret;
 		/*
 		 * Allow to read more than one fault at time but only
 		 * block if waiting for the very first one.
@@ -981,7 +1002,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_api, buf, sizeof(__u64)))
 		goto out;
-	if (uffdio_api.api != UFFD_API) {
+	if (uffdio_api.api != UFFD_API && uffdio_api.api != UFFD_API_V2) {
 		/* careful not to leak info, we only read the first 8 bytes */
 		memset(&uffdio_api, 0, sizeof(uffdio_api));
 		if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
@@ -992,6 +1013,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	/* careful not to leak info, we only read the first 8 bytes */
 	uffdio_api.bits = UFFD_API_BITS;
 	uffdio_api.ioctls = UFFD_API_IOCTLS;
+
+	if (uffdio_api.api == UFFD_API_V2) {
+		ctx->features |= UFFD_FEATURE_LONGMSG;
+		uffdio_api.bits |= UFFD_API_V2_BITS;
+	}
+
 	ret = -EFAULT;
 	if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 		goto out;
@@ -1109,6 +1136,7 @@ static struct file *userfaultfd_file_create(int flags)
 
 	ctx->flags = flags;
 	ctx->state = UFFD_STATE_WAIT_API;
+	ctx->features = 0;
 	ctx->mm = current->mm;
 	/* prevent the mm struct to be freed */
 	atomic_inc(&ctx->mm->mm_count);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index db6e99a..4e169b8 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -9,7 +9,9 @@
 #ifndef _LINUX_USERFAULTFD_H
 #define _LINUX_USERFAULTFD_H
 
-#define UFFD_API ((__u64)0xAA)
+#define UFFD_API 	((__u64)0xAA)
+#define UFFD_API_V2	((__u64)0xAB)
+
 /* FIXME: add "|UFFD_BIT_WP" to UFFD_API_BITS after implementing it */
 #define UFFD_API_BITS (UFFD_BIT_WRITE)
 #define UFFD_API_IOCTLS				\
@@ -147,4 +149,21 @@ struct uffdio_remap {
 	__s64 wake;
 };
 
+struct uffd_v2_msg {
+	__u64	type;
+	__u64	arg;
+};
+
+#define UFFD_PAGEFAULT	0x1
+
+#define UFFD_PAGEFAULT_BIT	(1 << (UFFD_PAGEFAULT - 1))
+#define __UFFD_API_V2_BITS	(UFFD_PAGEFAULT_BIT)
+
+/*
+ * Lower PAGE_SHIFT bits are used to report those supported
+ * by the pagefault message itself. Other bits are used to
+ * report the message types v2 API supports
+ */
+#define UFFD_API_V2_BITS	(__UFFD_API_V2_BITS << 12)
+
 #endif /* _LINUX_USERFAULTFD_H */
-- 
1.8.4.2

WARNING: multiple messages have this Message-ID (diff)
From: Pavel Emelyanov <xemul@parallels.com>
To: Andrea Arcangeli <aarcange@redhat.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Linux MM <linux-mm@kvack.org>,
	Linux API <linux-api@vger.kernel.org>
Cc: Sanidhya Kashyap <sanidhya.gatech@gmail.com>
Subject: [PATCH 2/3] uffd: Introduce the v2 API
Date: Wed, 18 Mar 2015 22:35:17 +0300	[thread overview]
Message-ID: <5509D375.7000809@parallels.com> (raw)
In-Reply-To: <5509D342.7000403@parallels.com>

The new API will report more than just the page-faults. The
reason for this is -- when the task whose mm we monitor with 
uffd and the monitor task itself cannot cooperate with each
other, the former one can screw things up. Like this.

If task fork()-s the child process is detached from uffd and
thus all not-yet-faulted-in memory gets mapped with zero-pages
on touch.

Another example is mremap(). When the victim remaps the uffd-ed
region and starts touching it the monitor would receive fault
messages with addresses that were not register-ed with uffd
ioctl before it. Thus monitor will have no idea how to handle
those faults.

To address both we can send more events to the monitor. In
particular, on fork() we can create another uffd context,
register the same set of regions in it and "send" the descriptor
to monitor.

For mremap() we can send the message describing what change
has been performed.

So this patch prepares to ground for the described above feature
by introducing the v2 API of uffd. With new API the kernel would
respond with a message containing the event type (pagefault,
fork or remap) and argument (fault address, new uffd descriptor
or region change respectively).

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
---
 fs/userfaultfd.c                 | 56 ++++++++++++++++++++++++++++++----------
 include/uapi/linux/userfaultfd.h | 21 ++++++++++++++-
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 6c9a2d6..bd629b4 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -41,6 +41,8 @@ struct userfaultfd_ctx {
 	wait_queue_head_t fd_wqh;
 	/* userfaultfd syscall flags */
 	unsigned int flags;
+	/* features flags */
+	unsigned int features;
 	/* state machine */
 	enum userfaultfd_state state;
 	/* released */
@@ -49,6 +51,8 @@ struct userfaultfd_ctx {
 	struct mm_struct *mm;
 };
 
+#define UFFD_FEATURE_LONGMSG	0x1
+
 struct userfaultfd_wait_queue {
 	unsigned long address;
 	wait_queue_t wq;
@@ -369,7 +373,7 @@ static unsigned int userfaultfd_poll(struct file *file, poll_table *wait)
 }
 
 static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
-				    __u64 *addr)
+				    __u64 *mtype, __u64 *addr)
 {
 	ssize_t ret;
 	DECLARE_WAITQUEUE(wait, current);
@@ -383,6 +387,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
 		if (find_userfault(ctx, &uwq)) {
 			uwq->pending = false;
 			/* careful to always initialize addr if ret == 0 */
+			*mtype = UFFD_PAGEFAULT;
 			*addr = uwq->address;
 			ret = 0;
 			break;
@@ -411,8 +416,6 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 {
 	struct userfaultfd_ctx *ctx = file->private_data;
 	ssize_t _ret, ret = 0;
-	/* careful to always initialize addr if ret == 0 */
-	__u64 uninitialized_var(addr);
 	int no_wait = file->f_flags & O_NONBLOCK;
 
 	if (ctx->state == UFFD_STATE_WAIT_API)
@@ -420,16 +423,34 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,
 	BUG_ON(ctx->state != UFFD_STATE_RUNNING);
 
 	for (;;) {
-		if (count < sizeof(addr))
-			return ret ? ret : -EINVAL;
-		_ret = userfaultfd_ctx_read(ctx, no_wait, &addr);
-		if (_ret < 0)
-			return ret ? ret : _ret;
-		if (put_user(addr, (__u64 __user *) buf))
-			return ret ? ret : -EFAULT;
-		ret += sizeof(addr);
-		buf += sizeof(addr);
-		count -= sizeof(addr);
+		if (!(ctx->features & UFFD_FEATURE_LONGMSG)) {
+			/* careful to always initialize addr if ret == 0 */
+			__u64 uninitialized_var(addr);
+			__u64 uninitialized_var(mtype);
+			if (count < sizeof(addr))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &mtype, &addr);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			BUG_ON(mtype != UFFD_PAGEFAULT);
+			if (put_user(addr, (__u64 __user *) buf))
+				return ret ? ret : -EFAULT;
+			_ret = sizeof(addr);
+		} else {
+			struct uffd_v2_msg msg;
+			if (count < sizeof(msg))
+				return ret ? ret : -EINVAL;
+			_ret = userfaultfd_ctx_read(ctx, no_wait, &msg.type, &msg.arg);
+			if (_ret < 0)
+				return ret ? ret : _ret;
+			if (copy_to_user(buf, &msg, sizeof(msg)))
+				return ret ? ret : -EINVAL;
+			_ret = sizeof(msg);
+		}
+
+		ret += _ret;
+		buf += _ret;
+		count -= _ret;
 		/*
 		 * Allow to read more than one fault at time but only
 		 * block if waiting for the very first one.
@@ -981,7 +1002,7 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	ret = -EFAULT;
 	if (copy_from_user(&uffdio_api, buf, sizeof(__u64)))
 		goto out;
-	if (uffdio_api.api != UFFD_API) {
+	if (uffdio_api.api != UFFD_API && uffdio_api.api != UFFD_API_V2) {
 		/* careful not to leak info, we only read the first 8 bytes */
 		memset(&uffdio_api, 0, sizeof(uffdio_api));
 		if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
@@ -992,6 +1013,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
 	/* careful not to leak info, we only read the first 8 bytes */
 	uffdio_api.bits = UFFD_API_BITS;
 	uffdio_api.ioctls = UFFD_API_IOCTLS;
+
+	if (uffdio_api.api == UFFD_API_V2) {
+		ctx->features |= UFFD_FEATURE_LONGMSG;
+		uffdio_api.bits |= UFFD_API_V2_BITS;
+	}
+
 	ret = -EFAULT;
 	if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
 		goto out;
@@ -1109,6 +1136,7 @@ static struct file *userfaultfd_file_create(int flags)
 
 	ctx->flags = flags;
 	ctx->state = UFFD_STATE_WAIT_API;
+	ctx->features = 0;
 	ctx->mm = current->mm;
 	/* prevent the mm struct to be freed */
 	atomic_inc(&ctx->mm->mm_count);
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index db6e99a..4e169b8 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -9,7 +9,9 @@
 #ifndef _LINUX_USERFAULTFD_H
 #define _LINUX_USERFAULTFD_H
 
-#define UFFD_API ((__u64)0xAA)
+#define UFFD_API 	((__u64)0xAA)
+#define UFFD_API_V2	((__u64)0xAB)
+
 /* FIXME: add "|UFFD_BIT_WP" to UFFD_API_BITS after implementing it */
 #define UFFD_API_BITS (UFFD_BIT_WRITE)
 #define UFFD_API_IOCTLS				\
@@ -147,4 +149,21 @@ struct uffdio_remap {
 	__s64 wake;
 };
 
+struct uffd_v2_msg {
+	__u64	type;
+	__u64	arg;
+};
+
+#define UFFD_PAGEFAULT	0x1
+
+#define UFFD_PAGEFAULT_BIT	(1 << (UFFD_PAGEFAULT - 1))
+#define __UFFD_API_V2_BITS	(UFFD_PAGEFAULT_BIT)
+
+/*
+ * Lower PAGE_SHIFT bits are used to report those supported
+ * by the pagefault message itself. Other bits are used to
+ * report the message types v2 API supports
+ */
+#define UFFD_API_V2_BITS	(__UFFD_API_V2_BITS << 12)
+
 #endif /* _LINUX_USERFAULTFD_H */
-- 
1.8.4.2


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-03-18 19:35 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-03-18 19:34 [PATCH 0/3] UserfaultFD: Extension for non cooperative uffd usage Pavel Emelyanov
2015-03-18 19:34 ` Pavel Emelyanov
2015-03-18 19:34 ` Pavel Emelyanov
2015-03-18 19:34 ` [PATCH 1/3] uffd: Tossing bits around Pavel Emelyanov
2015-03-18 19:34   ` Pavel Emelyanov
2015-03-18 19:34   ` Pavel Emelyanov
2015-03-18 19:35 ` Pavel Emelyanov [this message]
2015-03-18 19:35   ` [PATCH 2/3] uffd: Introduce the v2 API Pavel Emelyanov
2015-03-18 19:35   ` Pavel Emelyanov
2015-04-21 12:18   ` Andrea Arcangeli
2015-04-21 12:18     ` Andrea Arcangeli
2015-04-21 12:18     ` Andrea Arcangeli
2015-04-23  6:29     ` Pavel Emelyanov
2015-04-23  6:29       ` Pavel Emelyanov
2015-04-27 21:12       ` Andrea Arcangeli
2015-04-27 21:12         ` Andrea Arcangeli
2015-04-27 21:12         ` Andrea Arcangeli
2015-04-30  9:50         ` Pavel Emelyanov
2015-04-30  9:50           ` Pavel Emelyanov
2015-03-18 19:35 ` [PATCH 3/3] uffd: Introduce fork() notification Pavel Emelyanov
2015-03-18 19:35   ` Pavel Emelyanov
2015-03-18 19:35   ` Pavel Emelyanov
2015-04-21 12:02 ` [PATCH 0/3] UserfaultFD: Extension for non cooperative uffd usage Andrea Arcangeli
2015-04-21 12:02   ` Andrea Arcangeli
2015-04-21 12:02   ` Andrea Arcangeli
2015-04-23  6:34   ` Pavel Emelyanov
2015-04-23  6:34     ` Pavel Emelyanov
2015-04-23  6:34     ` Pavel Emelyanov
     [not found]     ` <20150427211650.GC24035@redhat.com>
2015-04-30 16:38       ` [PATCH] UserfaultFD: Rename uffd_api.bits into .features Pavel Emelyanov
2015-05-07 13:42         ` Andrea Arcangeli
2015-05-07 14:28           ` Pavel Emelyanov
2015-05-07 14:33             ` Andrea Arcangeli
2015-05-07 14:42               ` Pavel Emelyanov
2015-05-07 15:11                 ` Andrea Arcangeli
2015-05-07 15:20                   ` Pavel Emelyanov
2015-05-07 17:08                     ` Andrea Arcangeli
2015-05-07 18:35                       ` Pavel Emelyanov
2015-05-08 13:39                       ` Pavel Emelyanov
2015-05-08 14:07                         ` [PATCH] UserfaultFD: Fix stack corruption when zeroing uffd_msg Pavel Emelyanov
2015-05-08 17:54                           ` Andrea Arcangeli

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5509D375.7000809@parallels.com \
    --to=xemul@parallels.com \
    --cc=aarcange@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=sanidhya.gatech@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.