All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] poll()able DM events
@ 2017-05-09 19:10 Andy Grover
  2017-05-09 19:10 ` [PATCH 1/3] dm: a basic support for using the select or poll function Andy Grover
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Andy Grover @ 2017-05-09 19:10 UTC (permalink / raw)
  To: dm-devel, snitzer; +Cc: mpatocka

Hi Mike,

We've been working off-list to develop a method of getting DM events
that is reliable, and also can monitor events from multiple DM devices
from a single thread.

In short, these enable an open file descriptor to /dev/mapper/control
to be polled. If the fd indicates POLLIN, some DM device has
experienced an event. Figuring out which is made easier by adding
event_nr in a backwards-compatible way to DM_LIST_DEVICES, so
userspace can compare against previously seen per-device
event_nrs. A new ioctl is also added, to rearm the fd so it may be
re-poll()ed. See individual patch commitlogs for more details.

Thanks to Mikulas for redeveloping earlier versions into this patchset,
as well as the other RH LVM devs who provided early internal feedback!

Regards -- Andy

Mikulas Patocka (3):
  dm: a basic support for using the select or poll function
  dm-ioctl: add a new DM_DEV_ARM_POLL ioctl
  dm-ioctl: report event number in DM_LIST_DEVICES

 drivers/md/dm-core.h          |   3 ++
 drivers/md/dm-ioctl.c         | 109 +++++++++++++++++++++++++++++++++---------
 drivers/md/dm.c               |   5 ++
 include/uapi/linux/dm-ioctl.h |   6 ++-
 4 files changed, 99 insertions(+), 24 deletions(-)

-- 
2.9.3

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
@ 2017-05-09 19:10 ` Andy Grover
  2017-05-11  9:39   ` Martin Wilck
  2017-05-09 19:10 ` [PATCH 2/3] dm-ioctl: add a new DM_DEV_ARM_POLL ioctl Andy Grover
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 21+ messages in thread
From: Andy Grover @ 2017-05-09 19:10 UTC (permalink / raw)
  To: dm-devel, snitzer; +Cc: mpatocka

From: Mikulas Patocka <mpatocka@redhat.com>

This is the very simple patch for polling on the /dev/mapper/control
device. The select or poll function waits until any event happens on any
dm device since opening the /dev/mapper/control device. When select or
poll returns the device as readable, we must close and reopen the device
to wait for new dm events.

Usage:
1. open the /dev/mapper/control device
2. scan the event numbers of all devices we are interested in and process
   them
3. call select, poll or epoll on the handle (it waits until some new event
   happens since opening the device)
4. close the /dev/mapper/control handle
5. go to step 1

The next patch allows to re-arm the polling without closing and reopening
the device.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-core.h          |  3 +++
 drivers/md/dm-ioctl.c         | 49 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/dm.c               |  5 +++++
 include/uapi/linux/dm-ioctl.h |  2 +-
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 97db4d1..c15df7b 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -146,4 +146,7 @@ static inline bool dm_message_test_buffer_overflow(char *result, unsigned maxlen
 	return !maxlen || strlen(result) + 1 >= maxlen;
 }
 
+extern atomic_t dm_global_event_nr;
+extern wait_queue_head_t dm_global_eventq;
+
 #endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 2d5d706..855f7c9 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -23,6 +23,14 @@
 #define DM_MSG_PREFIX "ioctl"
 #define DM_DRIVER_EMAIL "dm-devel@redhat.com"
 
+struct dm_file {
+	/*
+	 * poll will wait until the global event number is greater than
+	 * this value.
+	 */
+	unsigned global_event_nr;
+};
+
 /*-----------------------------------------------------------------
  * The ioctl interface needs to be able to look up devices by
  * name or uuid.
@@ -1872,8 +1880,47 @@ static long dm_compat_ctl_ioctl(struct file *file, uint command, ulong u)
 #define dm_compat_ctl_ioctl NULL
 #endif
 
+int dm_open(struct inode *inode, struct file *filp)
+{
+	int r;
+	struct dm_file *priv;
+
+	r = nonseekable_open(inode, filp);
+	if (unlikely(r))
+		return r;
+
+	priv = filp->private_data = kmalloc(sizeof(struct dm_file), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->global_event_nr = atomic_read(&dm_global_event_nr);
+
+	return 0;
+}
+
+int dm_release(struct inode *inode, struct file *filp)
+{
+	kfree(filp->private_data);
+	return 0;
+}
+
+static unsigned dm_poll(struct file *filp, poll_table *wait)
+{
+	struct dm_file *priv = filp->private_data;
+	unsigned mask = 0;
+
+	poll_wait(filp, &dm_global_eventq, wait);
+
+	if ((int)(atomic_read(&dm_global_event_nr) - priv->global_event_nr) > 0)
+		mask |= POLLIN;
+
+	return mask;
+}
+
 static const struct file_operations _ctl_fops = {
-	.open = nonseekable_open,
+	.open    = dm_open,
+	.release = dm_release,
+	.poll    = dm_poll,
 	.unlocked_ioctl	 = dm_ctl_ioctl,
 	.compat_ioctl = dm_compat_ctl_ioctl,
 	.owner	 = THIS_MODULE,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 268edf4..d628592 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -57,6 +57,9 @@ static DECLARE_WORK(deferred_remove_work, do_deferred_remove);
 
 static struct workqueue_struct *deferred_remove_workqueue;
 
+atomic_t dm_global_event_nr = ATOMIC_INIT(0);
+DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
+
 /*
  * One of these is allocated per bio.
  */
@@ -1717,7 +1720,9 @@ static void event_callback(void *context)
 	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
 
 	atomic_inc(&md->event_nr);
+	atomic_inc(&dm_global_event_nr);
 	wake_up(&md->eventq);
+	wake_up(&dm_global_eventq);
 }
 
 /*
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 4bf9f1e..1f0605d 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,7 +267,7 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	35
+#define DM_VERSION_MINOR	36
 #define DM_VERSION_PATCHLEVEL	0
 #define DM_VERSION_EXTRA	"-ioctl (2016-06-23)"
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/3] dm-ioctl: add a new DM_DEV_ARM_POLL ioctl
  2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
  2017-05-09 19:10 ` [PATCH 1/3] dm: a basic support for using the select or poll function Andy Grover
@ 2017-05-09 19:10 ` Andy Grover
  2017-05-09 19:10 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Andy Grover
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 21+ messages in thread
From: Andy Grover @ 2017-05-09 19:10 UTC (permalink / raw)
  To: dm-devel, snitzer; +Cc: mpatocka

From: Mikulas Patocka <mpatocka@redhat.com>

This ioctl will record the current global event number in the structure
dm_file, so that next select or poll call will wait until new events
arrived since this ioctl.

The DM_DEV_ARM_POLL ioctl has the same effect as closing and reopening the
handle.

This patch is optional - if the userspace is OK with closing and reopening
the /dev/mapper/control handle after select or poll, we don't need this
patch.

Usage:
1. open the /dev/mapper/control device
2. send the DM_DEV_ARM_POLL ioctl
3. scan the event numbers of all devices we are interested in and process
   them
4. call select, poll or epoll on the handle (it waits until some new event
   happens since the ARM ioctl)
5. go to step 2

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-ioctl.c         | 56 +++++++++++++++++++++++++++----------------
 include/uapi/linux/dm-ioctl.h |  4 +++-
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 855f7c9..84930a3 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -28,7 +28,7 @@ struct dm_file {
 	 * poll will wait until the global event number is greater than
 	 * this value.
 	 */
-	unsigned global_event_nr;
+	volatile unsigned global_event_nr;
 };
 
 /*-----------------------------------------------------------------
@@ -464,9 +464,9 @@ void dm_deferred_remove(void)
  * All the ioctl commands get dispatched to functions with this
  * prototype.
  */
-typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
+typedef int (*ioctl_fn)(struct file *filp, struct dm_ioctl *param, size_t param_size);
 
-static int remove_all(struct dm_ioctl *param, size_t param_size)
+static int remove_all(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	dm_hash_remove_all(true, !!(param->flags & DM_DEFERRED_REMOVE), false);
 	param->data_size = 0;
@@ -499,7 +499,7 @@ static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
 	return ((void *) param) + param->data_start;
 }
 
-static int list_devices(struct dm_ioctl *param, size_t param_size)
+static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	unsigned int i;
 	struct hash_cell *hc;
@@ -590,7 +590,7 @@ static void list_version_get_info(struct target_type *tt, void *param)
     info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
 }
 
-static int list_versions(struct dm_ioctl *param, size_t param_size)
+static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	size_t len, needed = 0;
 	struct dm_target_versions *vers;
@@ -732,7 +732,7 @@ static void __dev_status(struct mapped_device *md, struct dm_ioctl *param)
 	}
 }
 
-static int dev_create(struct dm_ioctl *param, size_t param_size)
+static int dev_create(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r, m = DM_ANY_MINOR;
 	struct mapped_device *md;
@@ -824,7 +824,7 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
 	return md;
 }
 
-static int dev_remove(struct dm_ioctl *param, size_t param_size)
+static int dev_remove(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	struct hash_cell *hc;
 	struct mapped_device *md;
@@ -889,7 +889,7 @@ static int invalid_str(char *str, void *end)
 	return -EINVAL;
 }
 
-static int dev_rename(struct dm_ioctl *param, size_t param_size)
+static int dev_rename(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r;
 	char *new_data = (char *) param + param->data_start;
@@ -919,7 +919,7 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 	return 0;
 }
 
-static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
+static int dev_set_geometry(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r = -EINVAL, x;
 	struct mapped_device *md;
@@ -1068,7 +1068,7 @@ static int do_resume(struct dm_ioctl *param)
  * Set or unset the suspension state of a device.
  * If the device already is in the requested state we just return its status.
  */
-static int dev_suspend(struct dm_ioctl *param, size_t param_size)
+static int dev_suspend(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	if (param->flags & DM_SUSPEND_FLAG)
 		return do_suspend(param);
@@ -1080,7 +1080,7 @@ static int dev_suspend(struct dm_ioctl *param, size_t param_size)
  * Copies device info back to user space, used by
  * the create and info ioctls.
  */
-static int dev_status(struct dm_ioctl *param, size_t param_size)
+static int dev_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	struct mapped_device *md;
 
@@ -1171,7 +1171,7 @@ static void retrieve_status(struct dm_table *table,
 /*
  * Wait for a device to report an event
  */
-static int dev_wait(struct dm_ioctl *param, size_t param_size)
+static int dev_wait(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r = 0;
 	struct mapped_device *md;
@@ -1208,6 +1208,19 @@ static int dev_wait(struct dm_ioctl *param, size_t param_size)
 	return r;
 }
 
+/*
+ * Remember the global event number and make it possible to poll
+ * for further events.
+ */
+static int dev_arm_poll(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+	struct dm_file *priv = filp->private_data;
+
+	priv->global_event_nr = atomic_read(&dm_global_event_nr);
+
+	return 0;
+}
+
 static inline fmode_t get_mode(struct dm_ioctl *param)
 {
 	fmode_t mode = FMODE_READ | FMODE_WRITE;
@@ -1277,7 +1290,7 @@ static bool is_valid_type(enum dm_queue_mode cur, enum dm_queue_mode new)
 	return false;
 }
 
-static int table_load(struct dm_ioctl *param, size_t param_size)
+static int table_load(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r;
 	struct hash_cell *hc;
@@ -1364,7 +1377,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
 	return r;
 }
 
-static int table_clear(struct dm_ioctl *param, size_t param_size)
+static int table_clear(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	struct hash_cell *hc;
 	struct mapped_device *md;
@@ -1438,7 +1451,7 @@ static void retrieve_deps(struct dm_table *table,
 	param->data_size = param->data_start + needed;
 }
 
-static int table_deps(struct dm_ioctl *param, size_t param_size)
+static int table_deps(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	struct mapped_device *md;
 	struct dm_table *table;
@@ -1464,7 +1477,7 @@ static int table_deps(struct dm_ioctl *param, size_t param_size)
  * Return the status of a device as a text string for each
  * target.
  */
-static int table_status(struct dm_ioctl *param, size_t param_size)
+static int table_status(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	struct mapped_device *md;
 	struct dm_table *table;
@@ -1519,7 +1532,7 @@ static int message_for_md(struct mapped_device *md, unsigned argc, char **argv,
 /*
  * Pass a message to the target that's at the supplied device offset.
  */
-static int target_message(struct dm_ioctl *param, size_t param_size)
+static int target_message(struct file *filp, struct dm_ioctl *param, size_t param_size)
 {
 	int r, argc;
 	char **argv;
@@ -1636,7 +1649,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 		{DM_LIST_VERSIONS_CMD, 0, list_versions},
 
 		{DM_TARGET_MSG_CMD, 0, target_message},
-		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry}
+		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
+		{DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
 	};
 
 	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
@@ -1795,7 +1809,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param)
 	return 0;
 }
 
-static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
+static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *user)
 {
 	int r = 0;
 	int ioctl_flags;
@@ -1849,7 +1863,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 		goto out;
 
 	param->data_size = offsetof(struct dm_ioctl, data);
-	r = fn(param, input_param_size);
+	r = fn(file, param, input_param_size);
 
 	if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) &&
 	    unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
@@ -1868,7 +1882,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user)
 
 static long dm_ctl_ioctl(struct file *file, uint command, ulong u)
 {
-	return (long)ctl_ioctl(command, (struct dm_ioctl __user *)u);
+	return (long)ctl_ioctl(file, command, (struct dm_ioctl __user *)u);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 1f0605d..7cdc0b4 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -240,7 +240,8 @@ enum {
 	/* Added later */
 	DM_LIST_VERSIONS_CMD,
 	DM_TARGET_MSG_CMD,
-	DM_DEV_SET_GEOMETRY_CMD
+	DM_DEV_SET_GEOMETRY_CMD,
+	DM_DEV_ARM_POLL_CMD,
 };
 
 #define DM_IOCTL 0xfd
@@ -255,6 +256,7 @@ enum {
 #define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
 #define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
 #define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+#define DM_DEV_ARM_POLL  _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl)
 
 #define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
 #define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES
  2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
  2017-05-09 19:10 ` [PATCH 1/3] dm: a basic support for using the select or poll function Andy Grover
  2017-05-09 19:10 ` [PATCH 2/3] dm-ioctl: add a new DM_DEV_ARM_POLL ioctl Andy Grover
@ 2017-05-09 19:10 ` Andy Grover
  2017-05-09 21:14 ` [PATCH 0/3] poll()able DM events Mike Snitzer
  2017-09-17  2:41 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Eugene Syromiatnikov
  4 siblings, 0 replies; 21+ messages in thread
From: Andy Grover @ 2017-05-09 19:10 UTC (permalink / raw)
  To: dm-devel, snitzer; +Cc: mpatocka

From: Mikulas Patocka <mpatocka@redhat.com>

This patch reports the event numbers for all the devices, so that the
user doesn't have to ask them one by one. The event number is reported
after the name field in the dm_name_list structure.

The location of the next record is specified in the dm_name_list->next
field, that means that we can put the new data after the end of name and
it is backward compatible with the old code. The old code just skips the
event number without interpreting it.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-ioctl.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 84930a3..0029776 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -518,6 +518,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 			needed += sizeof(struct dm_name_list);
 			needed += strlen(hc->name) + 1;
 			needed += ALIGN_MASK;
+			needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK;
 		}
 	}
 
@@ -538,6 +539,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 	 */
 	for (i = 0; i < NUM_BUCKETS; i++) {
 		list_for_each_entry (hc, _name_buckets + i, name_list) {
+			uint32_t *event_nr;
 			if (old_nl)
 				old_nl->next = (uint32_t) ((void *) nl -
 							   (void *) old_nl);
@@ -547,7 +549,9 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
 			strcpy(nl->name, hc->name);
 
 			old_nl = nl;
-			nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
+			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
+			*event_nr = dm_get_event_nr(hc->md);
+			nl = align_ptr(event_nr + 1);
 		}
 	}
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 0/3] poll()able DM events
  2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
                   ` (2 preceding siblings ...)
  2017-05-09 19:10 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Andy Grover
@ 2017-05-09 21:14 ` Mike Snitzer
  2017-09-17  2:41 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Eugene Syromiatnikov
  4 siblings, 0 replies; 21+ messages in thread
From: Mike Snitzer @ 2017-05-09 21:14 UTC (permalink / raw)
  To: Andy Grover; +Cc: dm-devel, mpatocka

On Tue, May 09 2017 at  3:10pm -0400,
Andy Grover <agrover@redhat.com> wrote:

> Hi Mike,
> 
> We've been working off-list to develop a method of getting DM events
> that is reliable, and also can monitor events from multiple DM devices
> from a single thread.
> 
> In short, these enable an open file descriptor to /dev/mapper/control
> to be polled. If the fd indicates POLLIN, some DM device has
> experienced an event. Figuring out which is made easier by adding
> event_nr in a backwards-compatible way to DM_LIST_DEVICES, so
> userspace can compare against previously seen per-device
> event_nrs. A new ioctl is also added, to rearm the fd so it may be
> re-poll()ed. See individual patch commitlogs for more details.
> 
> Thanks to Mikulas for redeveloping earlier versions into this patchset,
> as well as the other RH LVM devs who provided early internal feedback!

I've picked this patchset up for 4.13 but this branch will likely see
quite a few more rebases until 4.12-rcX stabilizes:
https://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm.git/log/?h=for-4.13/dm

I tweaked headers slightly and move the declaration of the variable used
in the last commit to be outside the loop.

Thought about splitting the additional 'struct file *filp' arg being
sprinkled all over in the 2nd commit, into a separate earlier commit,
but just left it as is in the end.

Mike

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-09 19:10 ` [PATCH 1/3] dm: a basic support for using the select or poll function Andy Grover
@ 2017-05-11  9:39   ` Martin Wilck
  2017-05-11  9:43     ` Martin Wilck
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Wilck @ 2017-05-11  9:39 UTC (permalink / raw)
  To: Andy Grover, dm-devel, snitzer; +Cc: mpatocka

On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
> From: Mikulas Patocka <mpatocka@redhat.com>
> 
> This is the very simple patch for polling on the /dev/mapper/control
> device. The select or poll function waits until any event happens on
> any
> dm device since opening the /dev/mapper/control device. When select
> or
> poll returns the device as readable, we must close and reopen the
> device
> to wait for new dm events.

Why have you done it that way? Couldn't you just save the
dm_global_event_nr at the time poll() is called?

Regards
Martin

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107SUSE
Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham NortonHRB
21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11  9:39   ` Martin Wilck
@ 2017-05-11  9:43     ` Martin Wilck
  2017-05-11 13:21       ` Mike Snitzer
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Wilck @ 2017-05-11  9:43 UTC (permalink / raw)
  To: Andy Grover, dm-devel, snitzer; +Cc: mpatocka

On Thu, 2017-05-11 at 11:39 +0200, Martin Wilck wrote:
> On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
> > From: Mikulas Patocka <mpatocka@redhat.com>
> >  
> > This is the very simple patch for polling on the
> > /dev/mapper/control
> > device. The select or poll function waits until any event happens
> > on
> > any
> > dm device since opening the /dev/mapper/control device. When select
> > or
> > poll returns the device as readable, we must close and reopen the
> > device
> > to wait for new dm events.
> 
> Why have you done it that way? Couldn't you just save the
> dm_global_event_nr at the time poll() is called?

I should have read your patch 2/3 before posting ... but I'm still
missing why the counter can't simply be set at poll() time.

Regards
Martin
-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11  9:43     ` Martin Wilck
@ 2017-05-11 13:21       ` Mike Snitzer
  2017-05-11 18:10         ` Andy Grover
  2017-05-11 19:30         ` Martin Wilck
  0 siblings, 2 replies; 21+ messages in thread
From: Mike Snitzer @ 2017-05-11 13:21 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel, Andy Grover, mpatocka

On Thu, May 11 2017 at  5:43am -0400,
Martin Wilck <mwilck@suse.com> wrote:

> On Thu, 2017-05-11 at 11:39 +0200, Martin Wilck wrote:
> > On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
> > > From: Mikulas Patocka <mpatocka@redhat.com>
> > >  
> > > This is the very simple patch for polling on the
> > > /dev/mapper/control
> > > device. The select or poll function waits until any event happens
> > > on
> > > any
> > > dm device since opening the /dev/mapper/control device. When select
> > > or
> > > poll returns the device as readable, we must close and reopen the
> > > device
> > > to wait for new dm events.
> > 
> > Why have you done it that way? Couldn't you just save the
> > dm_global_event_nr at the time poll() is called?
> 
> I should have read your patch 2/3 before posting ... but I'm still
> missing why the counter can't simply be set at poll() time.

If you did that then you would have a race where:

1) userspace has recorded events prior to poll()
2) an event triggers an increment of dm_global_event_nr before userspace
calls poll()
3) then userspace calls poll() -- only to find that after poll() returns
multiple events have occurred.

Which implies missed handling of events.

But if I'm wrong I'm sure Andy or Mikulas will correct me.

Thanks,
Mike

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 13:21       ` Mike Snitzer
@ 2017-05-11 18:10         ` Andy Grover
  2017-05-11 19:49           ` Martin Wilck
  2017-05-11 19:30         ` Martin Wilck
  1 sibling, 1 reply; 21+ messages in thread
From: Andy Grover @ 2017-05-11 18:10 UTC (permalink / raw)
  To: Mike Snitzer, Martin Wilck; +Cc: dm-devel, mpatocka

On 05/11/2017 06:21 AM, Mike Snitzer wrote:
> On Thu, May 11 2017 at  5:43am -0400,
> Martin Wilck <mwilck@suse.com> wrote:
> 
>> On Thu, 2017-05-11 at 11:39 +0200, Martin Wilck wrote:
>>> On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
>>>> From: Mikulas Patocka <mpatocka@redhat.com>
>>>>   
>>>> This is the very simple patch for polling on the
>>>> /dev/mapper/control
>>>> device. The select or poll function waits until any event happens
>>>> on
>>>> any
>>>> dm device since opening the /dev/mapper/control device. When select
>>>> or
>>>> poll returns the device as readable, we must close and reopen the
>>>> device
>>>> to wait for new dm events.
>>>
>>> Why have you done it that way? Couldn't you just save the
>>> dm_global_event_nr at the time poll() is called?
>>
>> I should have read your patch 2/3 before posting ... but I'm still
>> missing why the counter can't simply be set at poll() time.
> 
> If you did that then you would have a race where:
> 
> 1) userspace has recorded events prior to poll()
> 2) an event triggers an increment of dm_global_event_nr before userspace
> calls poll()
> 3) then userspace calls poll() -- only to find that after poll() returns
> multiple events have occurred.
> 
> Which implies missed handling of events.
> 
> But if I'm wrong I'm sure Andy or Mikulas will correct me.

I think getting one readiness notification for multiple events should be ok.

But, the semantics of poll(2) are level-triggered, meaning if a fd is 
ready then it will stay ready through multiple invocations of poll() 
until something is done to satisfy it (e.g. for a TCP socket, read()ing 
all the available data). Therefore we can't make it "edge-triggered" by 
auto-clearing - it has to be a separate action.

my 2c -- Regards -- Andy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 13:21       ` Mike Snitzer
  2017-05-11 18:10         ` Andy Grover
@ 2017-05-11 19:30         ` Martin Wilck
  2017-05-11 21:46           ` Andy Grover
                             ` (2 more replies)
  1 sibling, 3 replies; 21+ messages in thread
From: Martin Wilck @ 2017-05-11 19:30 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: dm-devel, Andy Grover, mpatocka

On Thu, 2017-05-11 at 09:21 -0400, Mike Snitzer wrote:
> On Thu, May 11 2017 at  5:43am -0400,
> Martin Wilck <mwilck@suse.com> wrote:
> 
> > On Thu, 2017-05-11 at 11:39 +0200, Martin Wilck wrote:
> > > On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
> > > > From: Mikulas Patocka <mpatocka@redhat.com>
> > > >  
> > > > This is the very simple patch for polling on the
> > > > /dev/mapper/control
> > > > device. The select or poll function waits until any event
> > > > happens
> > > > on
> > > > any
> > > > dm device since opening the /dev/mapper/control device. When
> > > > select
> > > > or
> > > > poll returns the device as readable, we must close and reopen
> > > > the
> > > > device
> > > > to wait for new dm events.
> > > 
> > > Why have you done it that way? Couldn't you just save the
> > > dm_global_event_nr at the time poll() is called?
> > 
> > I should have read your patch 2/3 before posting ... but I'm still
> > missing why the counter can't simply be set at poll() time.
> 
> If you did that then you would have a race where:
> 
> 1) userspace has recorded events prior to poll()
> 2) an event triggers an increment of dm_global_event_nr before
> userspace
> calls poll()
> 3) then userspace calls poll() -- only to find that after poll()
> returns
> multiple events have occurred.
> 
> Which implies missed handling of events.

I see - but I don't see yet how the ioctl approach (or the
close()/open() based one, for that matter) would avoid this race.

 1) application processes event N
 2) event N+1 arrives in the kernel
 3) user space issues ioctl or close()/open() sequence, N+1 is recorded
    in priv->global_event_nr
 4) user space runs poll()
 5) event N+2 arrives 4 weeks later and poll returns

... meaning that event N+1 has been left unprocessed for 4 weeks.
Or what I am missing?

AFAICS, the only way for user space to make sure it misses no events
would be passing the number of the last processed event down the kernel
in the ioctl call (and the kernel would use that value, rather than its
internal counter, for initializing priv->global_event_nr).

Regards
Martin



> But if I'm wrong I'm sure Andy or Mikulas will correct me.
> 
> Thanks,
> Mike
> 

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 18:10         ` Andy Grover
@ 2017-05-11 19:49           ` Martin Wilck
  2017-05-11 20:21             ` Alasdair G Kergon
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Wilck @ 2017-05-11 19:49 UTC (permalink / raw)
  To: dm-devel, Andy Grover; +Cc: mpatocka, snitzer

On Thu, 2017-05-11 at 11:10 -0700, Andy Grover wrote:
> 
> I think getting one readiness notification for multiple events should
> be ok.
> 
> But, the semantics of poll(2) are level-triggered, meaning if a fd
> is 
> ready then it will stay ready through multiple invocations of poll() 
> until something is done to satisfy it (e.g. for a TCP socket,
> read()ing 
> all the available data). Therefore we can't make it "edge-triggered"
> by 
> auto-clearing - it has to be a separate action.

Hm, perhaps.

Here is an idea: the best way to avoid the race mentioned by Mike might
be to set priv->global_event_nr to the highest event nr reported by the
DM_LIST_DEVICES ioctl when this ioctl returns. DM_LIST_DEVICES would
then represent your "separate action", which would fit quite well, and
the DM_DEV_ARM_POLL ioctl wouldn't be needed. It would be guaranteed
that if any event had occured after the previous DM_LIST_DEVICES,
whether or not that had been before or after the poll() call, userspace
would notice.

Cheers,
Martin

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 19:49           ` Martin Wilck
@ 2017-05-11 20:21             ` Alasdair G Kergon
  2017-05-11 20:45               ` Martin Wilck
  0 siblings, 1 reply; 21+ messages in thread
From: Alasdair G Kergon @ 2017-05-11 20:21 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel, Andy Grover, mpatocka, snitzer

On Thu, May 11, 2017 at 09:49:14PM +0200, Martin Wilck wrote:
> Here is an idea: the best way to avoid the race mentioned by Mike might
> be to set priv->global_event_nr to the highest event nr reported by the
> DM_LIST_DEVICES ioctl when this ioctl returns. DM_LIST_DEVICES would
> then represent your "separate action", which would fit quite well, and
> the DM_DEV_ARM_POLL ioctl wouldn't be needed. It would be guaranteed
> that if any event had occured after the previous DM_LIST_DEVICES,
> whether or not that had been before or after the poll() call, userspace
> would notice.
 
It's not really good practice to overload operations by adding and
relying upon side-effects.  Yes, you could add a flag to control the
side-effect, but a separate ioctl seems to be a perfectly reasonable
approach in the present case, allowing for flexible use and not forcing
you to generate the list when you don't need it.  We've already got
examples of both other approaches - DM_DEV_WAIT_CMD taking the last
known event_nr as input, and @stats_print_clear reporting and resetting
counters in the same operation.

Alasdair

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 20:21             ` Alasdair G Kergon
@ 2017-05-11 20:45               ` Martin Wilck
  2017-05-11 21:38                 ` Alasdair G Kergon
  0 siblings, 1 reply; 21+ messages in thread
From: Martin Wilck @ 2017-05-11 20:45 UTC (permalink / raw)
  To: Alasdair G Kergon; +Cc: dm-devel, Andy Grover, mpatocka, snitzer

On Thu, 2017-05-11 at 21:21 +0100, Alasdair G Kergon wrote:
> On Thu, May 11, 2017 at 09:49:14PM +0200, Martin Wilck wrote:
> > Here is an idea: the best way to avoid the race mentioned by Mike
> > might
> > be to set priv->global_event_nr to the highest event nr reported by
> > the
> > DM_LIST_DEVICES ioctl when this ioctl returns. DM_LIST_DEVICES
> > would
> > then represent your "separate action", which would fit quite well,
> > and
> > the DM_DEV_ARM_POLL ioctl wouldn't be needed. It would be
> > guaranteed
> > that if any event had occured after the previous DM_LIST_DEVICES,
> > whether or not that had been before or after the poll() call,
> > userspace
> > would notice.
> 
>  
> It's not really good practice to overload operations by adding and
> relying upon side-effects.  Yes, you could add a flag to control the
> side-effect, but a separate ioctl seems to be a perfectly reasonable
> approach in the present case, allowing for flexible use and not
> forcing
> you to generate the list when you don't need it. 

Except that it can cause userspace to miss events, as I tried to point
out in my reply to Mike.

My point is: the system call that resets the base counter should be the
same system call that retrieves the events since the last reset
operation. Unless I'm missing something essential, that's the only way
to avoid userspace receiving events with a potentially large delay.

If that system call can't be ioctl(DM_DEVICE_INFO) as you argue, it
should be something else, but it should be _one_.

Regards
Martin

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 20:45               ` Martin Wilck
@ 2017-05-11 21:38                 ` Alasdair G Kergon
  0 siblings, 0 replies; 21+ messages in thread
From: Alasdair G Kergon @ 2017-05-11 21:38 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel, Andy Grover, mpatocka, snitzer, Alasdair G Kergon

On Thu, May 11, 2017 at 10:45:27PM +0200, Martin Wilck wrote:
> Except that it can cause userspace to miss events, as I tried to point
> out in my reply to Mike.

It should never miss new events.  Because the calls are separate,
occasionally it might trigger when it didn't need to, but userspace will
see nothing new happened and just go back into poll.  Combining the two
calls would avoid this, but the authors decided on balance it was
unnecessary.

I've already requested a libdevmapper/dmsetup implementation as a matter
of urgency so that everyone has easy command-line access to the new
features and example code and can test them - normally patches like
these shouldn't go upstream until a supported userspace implementation
is available.  Once we have this, questions like the ones you pose can
be checked practically.

Alasdair

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 19:30         ` Martin Wilck
@ 2017-05-11 21:46           ` Andy Grover
  2017-05-12  6:48             ` Martin Wilck
  2017-05-11 21:50           ` Alasdair G Kergon
  2017-05-12 15:52           ` Mikulas Patocka
  2 siblings, 1 reply; 21+ messages in thread
From: Andy Grover @ 2017-05-11 21:46 UTC (permalink / raw)
  To: Martin Wilck, Mike Snitzer; +Cc: dm-devel, mpatocka

On 05/11/2017 12:30 PM, Martin Wilck wrote:
> 
> I see - but I don't see yet how the ioctl approach (or the
> close()/open() based one, for that matter) would avoid this race.
> 
>   1) application processes event N
>   2) event N+1 arrives in the kernel
>   3) user space issues ioctl or close()/open() sequence, N+1 is recorded
>      in priv->global_event_nr
>   4) user space runs poll()
>   5) event N+2 arrives 4 weeks later and poll returns
> 
> ... meaning that event N+1 has been left unprocessed for 4 weeks.
> Or what I am missing?
> 
> AFAICS, the only way for user space to make sure it misses no events
> would be passing the number of the last processed event down the kernel
> in the ioctl call (and the kernel would use that value, rather than its
> internal counter, for initializing priv->global_event_nr).

a) Application notes initial event_nrs for all relevant devices
while (1)
     b) Application calls ARM_POLL
     c) Application gets updated event_nrs from LIST_DEVICES and handles
     d) Application calls poll()

If events arrive between b and c, app sees them.
If events arrive between c and d, dm_global_event_nr is greater than 
priv->global_event_nr and poll() will indicate fd is ready immediately 
w/o sleeping.
If events arrive after d, app sleeps in poll() until fd is ready.

The difference is that you have to ARM_POLL and *then* process events 
before calling poll(). The only small negative consequence is that if an 
event arrives between b and c, it will be handled by c but poll() will 
still see readiness and pop out of the poll() and loop again when it 
strictly didn't have to. But events won't be lost.

Regards -- Andy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 19:30         ` Martin Wilck
  2017-05-11 21:46           ` Andy Grover
@ 2017-05-11 21:50           ` Alasdair G Kergon
  2017-05-12 15:52           ` Mikulas Patocka
  2 siblings, 0 replies; 21+ messages in thread
From: Alasdair G Kergon @ 2017-05-11 21:50 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel, Andy Grover, mpatocka, Mike Snitzer

On Thu, May 11, 2017 at 09:30:43PM +0200, Martin Wilck wrote:
> I see - but I don't see yet how the ioctl approach (or the
> close()/open() based one, for that matter) would avoid this race.
>  1) application processes event N
>  2) event N+1 arrives in the kernel
>  3) user space issues ioctl or close()/open() sequence, N+1 is recorded
>     in priv->global_event_nr
>  4) user space runs poll()
>  5) event N+2 arrives 4 weeks later and poll returns
 
Well that userspace design is obviously not compatible with the set of 
patches that has been posted and is not what was proposed in the patch
headers.

Userspace records and compares event numbers.

ARM occurs immediately before LIST.  Any userspace processing in response
happens later.

Alasdair

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 21:46           ` Andy Grover
@ 2017-05-12  6:48             ` Martin Wilck
  0 siblings, 0 replies; 21+ messages in thread
From: Martin Wilck @ 2017-05-12  6:48 UTC (permalink / raw)
  To: Andy Grover, Mike Snitzer; +Cc: dm-devel, mpatocka

On Thu, 2017-05-11 at 14:46 -0700, Andy Grover wrote:
> 
> a) Application notes initial event_nrs for all relevant devices
> while (1)
>      b) Application calls ARM_POLL
>      c) Application gets updated event_nrs from LIST_DEVICES and
> handles
>      d) Application calls poll()
> 
> If events arrive between b and c, app sees them.
> If events arrive between c and d, dm_global_event_nr is greater than 
> priv->global_event_nr and poll() will indicate fd is ready
> immediately 
> w/o sleeping.
> If events arrive after d, app sleeps in poll() until fd is ready.
> 
> The difference is that you have to ARM_POLL and *then* process
> events 
> before calling poll(). The only small negative consequence is that if
> an 
> event arrives between b and c, it will be handled by c but poll()
> will 
> still see readiness and pop out of the poll() and loop again when it 
> strictly didn't have to. But events won't be lost.

I see, that would work. The call sequence is reversed compared to
"classical" poll() usage. Not a big problem, just something to be aware
of. Will there be some wrapper in libdm to make sure applications
adhere to these semantics?

And how does user space find out whether the kernel supports this
functionality?

Regards,
Martin

-- 
Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/3] dm: a basic support for using the select or poll function
  2017-05-11 19:30         ` Martin Wilck
  2017-05-11 21:46           ` Andy Grover
  2017-05-11 21:50           ` Alasdair G Kergon
@ 2017-05-12 15:52           ` Mikulas Patocka
  2 siblings, 0 replies; 21+ messages in thread
From: Mikulas Patocka @ 2017-05-12 15:52 UTC (permalink / raw)
  To: Martin Wilck; +Cc: dm-devel, Andy Grover, Mike Snitzer

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3003 bytes --]



On Thu, 11 May 2017, Martin Wilck wrote:

> On Thu, 2017-05-11 at 09:21 -0400, Mike Snitzer wrote:
> > On Thu, May 11 2017 at  5:43am -0400,
> > Martin Wilck <mwilck@suse.com> wrote:
> > 
> > > On Thu, 2017-05-11 at 11:39 +0200, Martin Wilck wrote:
> > > > On Tue, 2017-05-09 at 12:10 -0700, Andy Grover wrote:
> > > > > From: Mikulas Patocka <mpatocka@redhat.com>
> > > > >  
> > > > > This is the very simple patch for polling on the
> > > > > /dev/mapper/control
> > > > > device. The select or poll function waits until any event
> > > > > happens
> > > > > on
> > > > > any
> > > > > dm device since opening the /dev/mapper/control device. When
> > > > > select
> > > > > or
> > > > > poll returns the device as readable, we must close and reopen
> > > > > the
> > > > > device
> > > > > to wait for new dm events.
> > > > 
> > > > Why have you done it that way? Couldn't you just save the
> > > > dm_global_event_nr at the time poll() is called?
> > > 
> > > I should have read your patch 2/3 before posting ... but I'm still
> > > missing why the counter can't simply be set at poll() time.
> > 
> > If you did that then you would have a race where:
> > 
> > 1) userspace has recorded events prior to poll()
> > 2) an event triggers an increment of dm_global_event_nr before
> > userspace
> > calls poll()
> > 3) then userspace calls poll() -- only to find that after poll()
> > returns
> > multiple events have occurred.
> > 
> > Which implies missed handling of events.
> 
> I see - but I don't see yet how the ioctl approach (or the
> close()/open() based one, for that matter) would avoid this race.
> 
>  1) application processes event N
>  2) event N+1 arrives in the kernel
>  3) user space issues ioctl or close()/open() sequence, N+1 is recorded
>     in priv->global_event_nr
>  4) user space runs poll()
>  5) event N+2 arrives 4 weeks later and poll returns
> 
> ... meaning that event N+1 has been left unprocessed for 4 weeks.
> Or what I am missing?

The proper sequence is:
1) open /dev/mapper/control
2) check if the event we are waiting for already happened
3) call poll()

- if you call poll immediatelly after opening, it is incorrect use and 
there is a possibility for a race condition.

> > > > Why have you done it that way? Couldn't you just save the
> > > > dm_global_event_nr at the time poll() is called?

- if we did it this way, the race condition would be unavoidable.

Mikulas

> AFAICS, the only way for user space to make sure it misses no events
> would be passing the number of the last processed event down the kernel
> in the ioctl call (and the kernel would use that value, rather than its
> internal counter, for initializing priv->global_event_nr).
> 
> Regards
> Martin
> 
> 
> 
> > But if I'm wrong I'm sure Andy or Mikulas will correct me.
> > 
> > Thanks,
> > Mike
> > 
> 
> -- 
> Dr. Martin Wilck <mwilck@suse.com>, Tel. +49 (0)911 74053 2107
> SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton
> HRB 21284 (AG Nürnberg)
> 

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES
  2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
                   ` (3 preceding siblings ...)
  2017-05-09 21:14 ` [PATCH 0/3] poll()able DM events Mike Snitzer
@ 2017-09-17  2:41 ` Eugene Syromiatnikov
  2017-09-19 16:47   ` Andy Grover
  2017-09-20 11:29   ` [PATCH] dm-ioctl: fix alignment of event number in the device list Mikulas Patocka
  4 siblings, 2 replies; 21+ messages in thread
From: Eugene Syromiatnikov @ 2017-09-17  2:41 UTC (permalink / raw)
  To: dm-devel; +Cc: agrover, mpatocka

> +			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
If my understanding is correct, (nl + 1) differs between 32-bit and
64-bit kernels (as the structure contains of one 8-byte and one 4-byte field),
so this logic is probably broken for compat.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES
  2017-09-17  2:41 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Eugene Syromiatnikov
@ 2017-09-19 16:47   ` Andy Grover
  2017-09-20 11:29   ` [PATCH] dm-ioctl: fix alignment of event number in the device list Mikulas Patocka
  1 sibling, 0 replies; 21+ messages in thread
From: Andy Grover @ 2017-09-19 16:47 UTC (permalink / raw)
  To: dm-devel

On 09/16/2017 07:41 PM, Eugene Syromiatnikov wrote:
>> +			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
> If my understanding is correct, (nl + 1) differs between 32-bit and
> 64-bit kernels (as the structure contains of one 8-byte and one 4-byte field),
> so this logic is probably broken for compat.

I don't understand.. how does this change the result from the 
calculation depending on the platform arch? 4 + 8 = 12.

Thanks -- Andy

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH] dm-ioctl: fix alignment of event number in the device list
  2017-09-17  2:41 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Eugene Syromiatnikov
  2017-09-19 16:47   ` Andy Grover
@ 2017-09-20 11:29   ` Mikulas Patocka
  1 sibling, 0 replies; 21+ messages in thread
From: Mikulas Patocka @ 2017-09-20 11:29 UTC (permalink / raw)
  To: Eugene Syromiatnikov; +Cc: dm-devel, agrover, Mike Snitzer, Alasdair G. Kergon



On Sun, 17 Sep 2017, Eugene Syromiatnikov wrote:

> > +			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
>
> If my understanding is correct, (nl + 1) differs between 32-bit and
> 64-bit kernels (as the structure contains of one 8-byte and one 4-byte field),
> so this logic is probably broken for compat.

You're right.

I've created this patch for this problem. The patch also increments 
version number (so that userspace can detect the fix) and it also triggers 
global event for device create/remove/rename/table change, so that we can 
monitor for these events.

Mikulas


From: Mikulas Patocka <mpatocka@redhat.com>

The size of struct dm_name_list is different on 32-bit and 64-bit kernels.

This mismatch caused some harmless difference in padding when using 32-bit
or 64-bit kernel. The patch 23d70c5e52dd added reporting event number in
the output of DM_LIST_DEVICES_CMD. This difference in padding makes it
impossible for userspace to determine the location of the event number
(the location would be different when running on 32-bit and 64-bit
kernel).

This patch fixes the padding is uses offsetof(struct dm_name_list, name)
instead of sizeof(struct dm_name_list) when determining the location of
entries.

The patch also increments version number to 37, so that the userspace can
use the version number to determine that the event number is present and
correctly located.

When we change it, I also added a code that raises global event when a dm
device is created, removed, renamed or when table is swapped, so that the
user can monitor for device changes.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Eugene Syromiatnikov <esyr@redhat.com>
Fixes: 23d70c5e52dd ("dm ioctl: report event number in DM_LIST_DEVICES")
Cc: stable@vger.kernel.org	# 4.13+

---
 drivers/md/dm-core.h          |    1 +
 drivers/md/dm-ioctl.c         |   37 ++++++++++++++++++++++++-------------
 drivers/md/dm.c               |   10 ++++++++--
 include/uapi/linux/dm-ioctl.h |    4 ++--
 4 files changed, 35 insertions(+), 17 deletions(-)

Index: linux-4.14-rc1/drivers/md/dm-ioctl.c
===================================================================
--- linux-4.14-rc1.orig/drivers/md/dm-ioctl.c
+++ linux-4.14-rc1/drivers/md/dm-ioctl.c
@@ -477,9 +477,13 @@ static int remove_all(struct file *filp,
  * Round up the ptr to an 8-byte boundary.
  */
 #define ALIGN_MASK 7
+static inline size_t align_val(size_t val)
+{
+	return (val + ALIGN_MASK) & ~ALIGN_MASK;
+}
 static inline void *align_ptr(void *ptr)
 {
-	return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
+	return (void *)align_val((size_t)ptr);
 }
 
 /*
@@ -505,7 +509,7 @@ static int list_devices(struct file *fil
 	struct hash_cell *hc;
 	size_t len, needed = 0;
 	struct gendisk *disk;
-	struct dm_name_list *nl, *old_nl = NULL;
+	struct dm_name_list *orig_nl, *nl, *old_nl = NULL;
 	uint32_t *event_nr;
 
 	down_write(&_hash_lock);
@@ -516,17 +520,15 @@ static int list_devices(struct file *fil
 	 */
 	for (i = 0; i < NUM_BUCKETS; i++) {
 		list_for_each_entry (hc, _name_buckets + i, name_list) {
-			needed += sizeof(struct dm_name_list);
-			needed += strlen(hc->name) + 1;
-			needed += ALIGN_MASK;
-			needed += (sizeof(uint32_t) + ALIGN_MASK) & ~ALIGN_MASK;
+			needed += align_val(offsetof(struct dm_name_list, name) + strlen(hc->name) + 1);
+			needed += align_val(sizeof(uint32_t));
 		}
 	}
 
 	/*
 	 * Grab our output buffer.
 	 */
-	nl = get_result_buffer(param, param_size, &len);
+	nl = orig_nl = get_result_buffer(param, param_size, &len);
 	if (len < needed) {
 		param->flags |= DM_BUFFER_FULL_FLAG;
 		goto out;
@@ -549,11 +551,16 @@ static int list_devices(struct file *fil
 			strcpy(nl->name, hc->name);
 
 			old_nl = nl;
-			event_nr = align_ptr(((void *) (nl + 1)) + strlen(hc->name) + 1);
+			event_nr = align_ptr(nl->name + strlen(hc->name) + 1);
 			*event_nr = dm_get_event_nr(hc->md);
 			nl = align_ptr(event_nr + 1);
 		}
 	}
+	/*
+	 * If mismatch happens, security may be compromised due to buffer
+	 * overflow, so it's better to crash.
+	 */
+	BUG_ON((char *)nl - (char *)orig_nl != needed);
 
  out:
 	up_write(&_hash_lock);
@@ -1621,7 +1628,8 @@ static int target_message(struct file *f
  * which has a variable size, is not used by the function processing
  * the ioctl.
  */
-#define IOCTL_FLAGS_NO_PARAMS	1
+#define IOCTL_FLAGS_NO_PARAMS		1
+#define IOCTL_FLAGS_ISSUE_GLOBAL_EVENT	2
 
 /*-----------------------------------------------------------------
  * Implementation of open/close/ioctl on the special char
@@ -1635,12 +1643,12 @@ static ioctl_fn lookup_ioctl(unsigned in
 		ioctl_fn fn;
 	} _ioctls[] = {
 		{DM_VERSION_CMD, 0, NULL}, /* version is dealt with elsewhere */
-		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS, remove_all},
+		{DM_REMOVE_ALL_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, remove_all},
 		{DM_LIST_DEVICES_CMD, 0, list_devices},
 
-		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_create},
-		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS, dev_remove},
-		{DM_DEV_RENAME_CMD, 0, dev_rename},
+		{DM_DEV_CREATE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_create},
+		{DM_DEV_REMOVE_CMD, IOCTL_FLAGS_NO_PARAMS | IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_remove},
+		{DM_DEV_RENAME_CMD, IOCTL_FLAGS_ISSUE_GLOBAL_EVENT, dev_rename},
 		{DM_DEV_SUSPEND_CMD, IOCTL_FLAGS_NO_PARAMS, dev_suspend},
 		{DM_DEV_STATUS_CMD, IOCTL_FLAGS_NO_PARAMS, dev_status},
 		{DM_DEV_WAIT_CMD, 0, dev_wait},
@@ -1869,6 +1877,9 @@ static int ctl_ioctl(struct file *file,
 	    unlikely(ioctl_flags & IOCTL_FLAGS_NO_PARAMS))
 		DMERR("ioctl %d tried to output some data but has IOCTL_FLAGS_NO_PARAMS set", cmd);
 
+	if (!r && ioctl_flags & IOCTL_FLAGS_ISSUE_GLOBAL_EVENT)
+		dm_issue_global_event();
+
 	/*
 	 * Copy the results back to userland.
 	 */
Index: linux-4.14-rc1/include/uapi/linux/dm-ioctl.h
===================================================================
--- linux-4.14-rc1.orig/include/uapi/linux/dm-ioctl.h
+++ linux-4.14-rc1/include/uapi/linux/dm-ioctl.h
@@ -269,9 +269,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	36
+#define DM_VERSION_MINOR	37
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2017-06-09)"
+#define DM_VERSION_EXTRA	"-ioctl (2017-09-20)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
Index: linux-4.14-rc1/drivers/md/dm.c
===================================================================
--- linux-4.14-rc1.orig/drivers/md/dm.c
+++ linux-4.14-rc1/drivers/md/dm.c
@@ -52,6 +52,12 @@ static struct workqueue_struct *deferred
 atomic_t dm_global_event_nr = ATOMIC_INIT(0);
 DECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
 
+void dm_issue_global_event(void)
+{
+	atomic_inc(&dm_global_event_nr);
+	wake_up(&dm_global_eventq);
+}
+
 /*
  * One of these is allocated per bio.
  */
@@ -1902,9 +1908,8 @@ static void event_callback(void *context
 	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
 
 	atomic_inc(&md->event_nr);
-	atomic_inc(&dm_global_event_nr);
 	wake_up(&md->eventq);
-	wake_up(&dm_global_eventq);
+	dm_issue_global_event();
 }
 
 /*
@@ -2321,6 +2326,7 @@ struct dm_table *dm_swap_table(struct ma
 	}
 
 	map = __bind(md, table, &limits);
+	dm_issue_global_event();
 
 out:
 	mutex_unlock(&md->suspend_lock);
Index: linux-4.14-rc1/drivers/md/dm-core.h
===================================================================
--- linux-4.14-rc1.orig/drivers/md/dm-core.h
+++ linux-4.14-rc1/drivers/md/dm-core.h
@@ -150,5 +150,6 @@ static inline bool dm_message_test_buffe
 
 extern atomic_t dm_global_event_nr;
 extern wait_queue_head_t dm_global_eventq;
+void dm_issue_global_event(void);
 
 #endif

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2017-09-20 11:29 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-09 19:10 [PATCH 0/3] poll()able DM events Andy Grover
2017-05-09 19:10 ` [PATCH 1/3] dm: a basic support for using the select or poll function Andy Grover
2017-05-11  9:39   ` Martin Wilck
2017-05-11  9:43     ` Martin Wilck
2017-05-11 13:21       ` Mike Snitzer
2017-05-11 18:10         ` Andy Grover
2017-05-11 19:49           ` Martin Wilck
2017-05-11 20:21             ` Alasdair G Kergon
2017-05-11 20:45               ` Martin Wilck
2017-05-11 21:38                 ` Alasdair G Kergon
2017-05-11 19:30         ` Martin Wilck
2017-05-11 21:46           ` Andy Grover
2017-05-12  6:48             ` Martin Wilck
2017-05-11 21:50           ` Alasdair G Kergon
2017-05-12 15:52           ` Mikulas Patocka
2017-05-09 19:10 ` [PATCH 2/3] dm-ioctl: add a new DM_DEV_ARM_POLL ioctl Andy Grover
2017-05-09 19:10 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Andy Grover
2017-05-09 21:14 ` [PATCH 0/3] poll()able DM events Mike Snitzer
2017-09-17  2:41 ` [PATCH 3/3] dm-ioctl: report event number in DM_LIST_DEVICES Eugene Syromiatnikov
2017-09-19 16:47   ` Andy Grover
2017-09-20 11:29   ` [PATCH] dm-ioctl: fix alignment of event number in the device list Mikulas Patocka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.