All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libceph: Complete stuck requests to OSD with EIO
@ 2017-02-09 13:04 Artur Molchanov
  2017-02-09 14:27 ` Ilya Dryomov
  2017-02-10 11:31 ` Jeff Layton
  0 siblings, 2 replies; 14+ messages in thread
From: Artur Molchanov @ 2017-02-09 13:04 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel

From: Artur Molchanov <artur.molchanov@synesis.ru>

Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
If osd_request_timeout equals to 0 (default value) then do nothing with
hung requests (keep default behavior).

Create RBD map option osd_request_timeout to set timeout in seconds. Set
osd_request_timeout to 0 by default.

Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
---
  include/linux/ceph/libceph.h    |  8 +++--
  include/linux/ceph/osd_client.h |  1 +
  net/ceph/ceph_common.c          | 12 +++++++
  net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
  4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e..10d8acb 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
  	unsigned long mount_timeout;		/* jiffies */
  	unsigned long osd_idle_ttl;		/* jiffies */
  	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */

  	/*
  	 * any type that can't be simply compared or doesn't need need
@@ -65,9 +66,10 @@ struct ceph_options {
  /*
   * defaults
   */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
-#define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
-#define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_MOUNT_TIMEOUT_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT		msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT	msecs_to_jiffies(0 * 1000)

  #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
  #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653..e45cba0 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -279,6 +279,7 @@ struct ceph_osd_client {
  	atomic_t               num_homeless;
  	struct delayed_work    timeout_work;
  	struct delayed_work    osds_timeout_work;
+	struct delayed_work    osd_request_timeout_work;
  #ifdef CONFIG_DEBUG_FS
  	struct dentry 	       *debugfs_file;
  #endif
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e885..81876c8 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
  	Opt_osdkeepalivetimeout,
  	Opt_mount_timeout,
  	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
  	Opt_last_int,
  	/* int args above */
  	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
  	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
  	{Opt_mount_timeout, "mount_timeout=%d"},
  	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
  	/* int args above */
  	{Opt_fsid, "fsid=%s"},
  	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
  	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
  	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
  	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;

  	/* get mon ip(s) */
  	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
  			}
  			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
  			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;

  		case Opt_share:
  			opt->flags &= ~CEPH_OPT_NOSHARE;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049..2f5a024 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
  			      osdc->client->options->osd_keepalive_timeout);
  }

+/*
+ * Complete request to OSD with error err if it started before cutoff.
+ */
+static void complete_osd_stuck_request(struct ceph_osd_request *req,
+				       unsigned long cutoff,
+				       int err)
+{
+	if (!time_before(req->r_stamp, cutoff))
+		return;
+
+	pr_warn_ratelimited("osd req on osd%d timeout expired\n",
+			    req->r_osd->o_osd);
+
+	complete_request(req, err);
+}
+
+/*
+ * Complete all requests to OSD that has been active for more than timeout.
+ */
+static void complete_osd_stuck_requests(struct ceph_osd *osd,
+					unsigned long timeout,
+					int err)
+{
+	struct ceph_osd_request *req, *n;
+	const unsigned long cutoff = jiffies - timeout;
+
+	rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
+		complete_osd_stuck_request(req, cutoff, -EIO);
+	}
+}
+
+/*
+ * Timeout callback, called every N seconds. When 1 or more OSD
+ * requests that has been active for more than N seconds,
+ * we complete it with error EIO.
+ */
+static void handle_osd_request_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client,
+			     osd_request_timeout_work.work);
+	struct ceph_osd *osd, *n;
+	struct ceph_options *opts = osdc->client->options;
+	const unsigned long timeout = opts->osd_request_timeout;
+
+	dout("%s osdc %p\n", __func__, osdc);
+
+	if (!timeout)
+		return;
+
+	down_write(&osdc->lock);
+
+	rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
+		complete_osd_stuck_requests(osd, timeout, -EIO);
+	}
+
+	up_write(&osdc->lock);
+
+	if (!atomic_read(&osdc->num_homeless))
+		goto out;
+
+	down_write(&osdc->lock);
+	complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
+	up_write(&osdc->lock);
+
+out:
+	schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
+}
+
  static void handle_osds_timeout(struct work_struct *work)
  {
  	struct ceph_osd_client *osdc =
@@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
ceph_client *client)
  	osdc->linger_map_checks = RB_ROOT;
  	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
  	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);

  	err = -ENOMEM;
  	osdc->osdmap = ceph_osdmap_alloc();
@@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
ceph_client *client)
  			      osdc->client->options->osd_keepalive_timeout);
  	schedule_delayed_work(&osdc->osds_timeout_work,
  	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
+	schedule_delayed_work(&osdc->osd_request_timeout_work,
+	    round_jiffies_relative(osdc->client->options->osd_request_timeout));

  	return 0;

-- 
2.7.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-09 13:04 [PATCH] libceph: Complete stuck requests to OSD with EIO Artur Molchanov
@ 2017-02-09 14:27 ` Ilya Dryomov
  2017-02-09 15:51   ` Artur Molchanov
  2017-02-10 11:31 ` Jeff Layton
  1 sibling, 1 reply; 14+ messages in thread
From: Ilya Dryomov @ 2017-02-09 14:27 UTC (permalink / raw)
  To: Artur Molchanov; +Cc: Ceph Development

On Thu, Feb 9, 2017 at 2:04 PM, Artur Molchanov
<artur.molchanov@synesis.ru> wrote:
> From: Artur Molchanov <artur.molchanov@synesis.ru>
>
> Complete stuck requests to OSD with error EIO after osd_request_timeout
> expired.
> If osd_request_timeout equals to 0 (default value) then do nothing with
> hung requests (keep default behavior).
>
> Create RBD map option osd_request_timeout to set timeout in seconds. Set
> osd_request_timeout to 0 by default.

Hi Artur,

What is the use case?  What do you expect this timeout to be in your
environment?

I actually have a slightly more compact version of this somewhere,
which I deliberately excluded when updating the OSD client a few kernel
releases ago.  IIRC complete_request() wasn't meant to be called
arbitrarily, but we can probably fix that.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-09 14:27 ` Ilya Dryomov
@ 2017-02-09 15:51   ` Artur Molchanov
  2017-02-09 16:11     ` Wido den Hollander
  0 siblings, 1 reply; 14+ messages in thread
From: Artur Molchanov @ 2017-02-09 15:51 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Ceph Development

Hi Ilya,

Currently we're working on moving our product to kubernetes / ceph stack. We're 
actively tuning ceph configuration and ceph cluster often need to be redeployed. 
We have to warm reboot all machines with mounted RBDs as a result. This patch 
allows us to remap and remount RBDs without time consuming reboot.

On 02/09/2017 05:27 PM, Ilya Dryomov wrote:
> What is the use case?  What do you expect this timeout to be in your
> environment?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-09 15:51   ` Artur Molchanov
@ 2017-02-09 16:11     ` Wido den Hollander
  0 siblings, 0 replies; 14+ messages in thread
From: Wido den Hollander @ 2017-02-09 16:11 UTC (permalink / raw)
  To: Artur Molchanov, Ilya Dryomov; +Cc: Ceph Development


> Op 9 februari 2017 om 16:51 schreef Artur Molchanov <artur.molchanov@synesis.ru>:
> 
> 
> Hi Ilya,
> 
> Currently we're working on moving our product to kubernetes / ceph stack. We're 
> actively tuning ceph configuration and ceph cluster often need to be redeployed. 
> We have to warm reboot all machines with mounted RBDs as a result. This patch 
> allows us to remap and remount RBDs without time consuming reboot.
> 

I understand the usecase. You don't always want to block, sometimes you want to timeout after some time.

Wido

> On 02/09/2017 05:27 PM, Ilya Dryomov wrote:
> > What is the use case?  What do you expect this timeout to be in your
> > environment?
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-09 13:04 [PATCH] libceph: Complete stuck requests to OSD with EIO Artur Molchanov
  2017-02-09 14:27 ` Ilya Dryomov
@ 2017-02-10 11:31 ` Jeff Layton
  2017-02-10 12:25   ` Ilya Dryomov
  2017-02-11 20:30   ` Artur Molchanov
  1 sibling, 2 replies; 14+ messages in thread
From: Jeff Layton @ 2017-02-10 11:31 UTC (permalink / raw)
  To: Artur Molchanov, Ilya Dryomov; +Cc: ceph-devel

On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
> From: Artur Molchanov <artur.molchanov@synesis.ru>
> 
> Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
> If osd_request_timeout equals to 0 (default value) then do nothing with
> hung requests (keep default behavior).
> 
> Create RBD map option osd_request_timeout to set timeout in seconds. Set
> osd_request_timeout to 0 by default.
> 
Also, what exactly are the requests blocked on when this occurs? Is the
ceph_osd_request_target ending up paused? I wonder if we might be better
off with something that returns a hard error under the circumstances
where you're hanging, rather than depending on timeouts.


> Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
> ---
>   include/linux/ceph/libceph.h    |  8 +++--
>   include/linux/ceph/osd_client.h |  1 +
>   net/ceph/ceph_common.c          | 12 +++++++
>   net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
>   4 files changed, 90 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> index 1816c5e..10d8acb 100644
> --- a/include/linux/ceph/libceph.h
> +++ b/include/linux/ceph/libceph.h
> @@ -48,6 +48,7 @@ struct ceph_options {
>   	unsigned long mount_timeout;		/* jiffies */
>   	unsigned long osd_idle_ttl;		/* jiffies */
>   	unsigned long osd_keepalive_timeout;	/* jiffies */
> +	unsigned long osd_request_timeout;	/* jiffies */
> 
>   	/*
>   	 * any type that can't be simply compared or doesn't need need
> @@ -65,9 +66,10 @@ struct ceph_options {
>   /*
>    * defaults
>    */
> -#define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
> -#define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
> -#define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
> +#define CEPH_MOUNT_TIMEOUT_DEFAULT		msecs_to_jiffies(60 * 1000)
> +#define CEPH_OSD_KEEPALIVE_DEFAULT		msecs_to_jiffies(5 * 1000)
> +#define CEPH_OSD_IDLE_TTL_DEFAULT		msecs_to_jiffies(60 * 1000)
> +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT	msecs_to_jiffies(0 * 1000)
> 
>   #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
>   #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index 03a6653..e45cba0 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -279,6 +279,7 @@ struct ceph_osd_client {
>   	atomic_t               num_homeless;
>   	struct delayed_work    timeout_work;
>   	struct delayed_work    osds_timeout_work;
> +	struct delayed_work    osd_request_timeout_work;
>   #ifdef CONFIG_DEBUG_FS
>   	struct dentry 	       *debugfs_file;
>   #endif
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index 464e885..81876c8 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -230,6 +230,7 @@ enum {
>   	Opt_osdkeepalivetimeout,
>   	Opt_mount_timeout,
>   	Opt_osd_idle_ttl,
> +	Opt_osd_request_timeout,
>   	Opt_last_int,
>   	/* int args above */
>   	Opt_fsid,
> @@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
>   	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
>   	{Opt_mount_timeout, "mount_timeout=%d"},
>   	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
> +	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
>   	/* int args above */
>   	{Opt_fsid, "fsid=%s"},
>   	{Opt_name, "name=%s"},
> @@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
>   	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
>   	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
>   	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
> +	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
> 
>   	/* get mon ip(s) */
>   	/* ip1[:port1][,ip2[:port2]...] */
> @@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
>   			}
>   			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
>   			break;
> +		case Opt_osd_request_timeout:
> +			/* 0 is "wait forever" (i.e. infinite timeout) */
> +			if (intval < 0 || intval > INT_MAX / 1000) {
> +				pr_err("osd_request_timeout out of range\n");
> +				err = -EINVAL;
> +				goto out;
> +			}
> +			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
> +			break;
> 
>   		case Opt_share:
>   			opt->flags &= ~CEPH_OPT_NOSHARE;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 842f049..2f5a024 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
>   			      osdc->client->options->osd_keepalive_timeout);
>   }
> 
> +/*
> + * Complete request to OSD with error err if it started before cutoff.
> + */
> +static void complete_osd_stuck_request(struct ceph_osd_request *req,
> +				       unsigned long cutoff,
> +				       int err)
> +{
> +	if (!time_before(req->r_stamp, cutoff))
> +		return;
> +
> +	pr_warn_ratelimited("osd req on osd%d timeout expired\n",
> +			    req->r_osd->o_osd);
> +
> +	complete_request(req, err);
> +}
> +
> +/*
> + * Complete all requests to OSD that has been active for more than timeout.
> + */
> +static void complete_osd_stuck_requests(struct ceph_osd *osd,
> +					unsigned long timeout,
> +					int err)
> +{
> +	struct ceph_osd_request *req, *n;
> +	const unsigned long cutoff = jiffies - timeout;
> +
> +	rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
> +		complete_osd_stuck_request(req, cutoff, -EIO);
> +	}
> +}
> +
> +/*
> + * Timeout callback, called every N seconds. When 1 or more OSD
> + * requests that has been active for more than N seconds,
> + * we complete it with error EIO.
> + */
> +static void handle_osd_request_timeout(struct work_struct *work)
> +{
> +	struct ceph_osd_client *osdc =
> +		container_of(work, struct ceph_osd_client,
> +			     osd_request_timeout_work.work);
> +	struct ceph_osd *osd, *n;
> +	struct ceph_options *opts = osdc->client->options;
> +	const unsigned long timeout = opts->osd_request_timeout;
> +
> +	dout("%s osdc %p\n", __func__, osdc);
> +
> +	if (!timeout)
> +		return;
> +
> +	down_write(&osdc->lock);
> +
> +	rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
> +		complete_osd_stuck_requests(osd, timeout, -EIO);
> +	}
> +
> +	up_write(&osdc->lock);
> +
> +	if (!atomic_read(&osdc->num_homeless))
> +		goto out;
> +
> +	down_write(&osdc->lock);
> +	complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
> +	up_write(&osdc->lock);
> +
> +out:
> +	schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
> +}
> +
>   static void handle_osds_timeout(struct work_struct *work)
>   {
>   	struct ceph_osd_client *osdc =
> @@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
> ceph_client *client)
>   	osdc->linger_map_checks = RB_ROOT;
>   	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
>   	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
> +	INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);
> 
>   	err = -ENOMEM;
>   	osdc->osdmap = ceph_osdmap_alloc();
> @@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
> ceph_client *client)
>   			      osdc->client->options->osd_keepalive_timeout);
>   	schedule_delayed_work(&osdc->osds_timeout_work,
>   	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
> +	schedule_delayed_work(&osdc->osd_request_timeout_work,
> +	    round_jiffies_relative(osdc->client->options->osd_request_timeout));
> 
>   	return 0;
> 

Having a job that has to wake up every second or so isn't ideal. Perhaps
you would be better off scheduling the delayed work in the request
submission codepath, and only rearm it when the tree isn't empty after
calling complete_osd_stuck_requests?

Also, I don't see where this job is ever cancelled when the osdc is torn
down. That needs to occur or you'll cause a use-after-free oops...

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-10 11:31 ` Jeff Layton
@ 2017-02-10 12:25   ` Ilya Dryomov
  2017-02-13  9:11     ` Ilya Dryomov
  2017-02-11 20:30   ` Artur Molchanov
  1 sibling, 1 reply; 14+ messages in thread
From: Ilya Dryomov @ 2017-02-10 12:25 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Artur Molchanov, Ceph Development

On Fri, Feb 10, 2017 at 12:31 PM, Jeff Layton <jlayton@redhat.com> wrote:
> On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
>> From: Artur Molchanov <artur.molchanov@synesis.ru>
>>
>> Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
>> If osd_request_timeout equals to 0 (default value) then do nothing with
>> hung requests (keep default behavior).
>>
>> Create RBD map option osd_request_timeout to set timeout in seconds. Set
>> osd_request_timeout to 0 by default.
>>
> Also, what exactly are the requests blocked on when this occurs? Is the
> ceph_osd_request_target ending up paused? I wonder if we might be better
> off with something that returns a hard error under the circumstances
> where you're hanging, rather than depending on timeouts.
>
>
>> Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
>> ---
>>   include/linux/ceph/libceph.h    |  8 +++--
>>   include/linux/ceph/osd_client.h |  1 +
>>   net/ceph/ceph_common.c          | 12 +++++++
>>   net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 90 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
>> index 1816c5e..10d8acb 100644
>> --- a/include/linux/ceph/libceph.h
>> +++ b/include/linux/ceph/libceph.h
>> @@ -48,6 +48,7 @@ struct ceph_options {
>>       unsigned long mount_timeout;            /* jiffies */
>>       unsigned long osd_idle_ttl;             /* jiffies */
>>       unsigned long osd_keepalive_timeout;    /* jiffies */
>> +     unsigned long osd_request_timeout;      /* jiffies */
>>
>>       /*
>>        * any type that can't be simply compared or doesn't need need
>> @@ -65,9 +66,10 @@ struct ceph_options {
>>   /*
>>    * defaults
>>    */
>> -#define CEPH_MOUNT_TIMEOUT_DEFAULT   msecs_to_jiffies(60 * 1000)
>> -#define CEPH_OSD_KEEPALIVE_DEFAULT   msecs_to_jiffies(5 * 1000)
>> -#define CEPH_OSD_IDLE_TTL_DEFAULT    msecs_to_jiffies(60 * 1000)
>> +#define CEPH_MOUNT_TIMEOUT_DEFAULT           msecs_to_jiffies(60 * 1000)
>> +#define CEPH_OSD_KEEPALIVE_DEFAULT           msecs_to_jiffies(5 * 1000)
>> +#define CEPH_OSD_IDLE_TTL_DEFAULT            msecs_to_jiffies(60 * 1000)
>> +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT     msecs_to_jiffies(0 * 1000)
>>
>>   #define CEPH_MONC_HUNT_INTERVAL             msecs_to_jiffies(3 * 1000)
>>   #define CEPH_MONC_PING_INTERVAL             msecs_to_jiffies(10 * 1000)
>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>> index 03a6653..e45cba0 100644
>> --- a/include/linux/ceph/osd_client.h
>> +++ b/include/linux/ceph/osd_client.h
>> @@ -279,6 +279,7 @@ struct ceph_osd_client {
>>       atomic_t               num_homeless;
>>       struct delayed_work    timeout_work;
>>       struct delayed_work    osds_timeout_work;
>> +     struct delayed_work    osd_request_timeout_work;
>>   #ifdef CONFIG_DEBUG_FS
>>       struct dentry          *debugfs_file;
>>   #endif
>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>> index 464e885..81876c8 100644
>> --- a/net/ceph/ceph_common.c
>> +++ b/net/ceph/ceph_common.c
>> @@ -230,6 +230,7 @@ enum {
>>       Opt_osdkeepalivetimeout,
>>       Opt_mount_timeout,
>>       Opt_osd_idle_ttl,
>> +     Opt_osd_request_timeout,
>>       Opt_last_int,
>>       /* int args above */
>>       Opt_fsid,
>> @@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
>>       {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
>>       {Opt_mount_timeout, "mount_timeout=%d"},
>>       {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
>> +     {Opt_osd_request_timeout, "osd_request_timeout=%d"},
>>       /* int args above */
>>       {Opt_fsid, "fsid=%s"},
>>       {Opt_name, "name=%s"},
>> @@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
>>       opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
>>       opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
>>       opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
>> +     opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
>>
>>       /* get mon ip(s) */
>>       /* ip1[:port1][,ip2[:port2]...] */
>> @@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
>>                       }
>>                       opt->mount_timeout = msecs_to_jiffies(intval * 1000);
>>                       break;
>> +             case Opt_osd_request_timeout:
>> +                     /* 0 is "wait forever" (i.e. infinite timeout) */
>> +                     if (intval < 0 || intval > INT_MAX / 1000) {
>> +                             pr_err("osd_request_timeout out of range\n");
>> +                             err = -EINVAL;
>> +                             goto out;
>> +                     }
>> +                     opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
>> +                     break;
>>
>>               case Opt_share:
>>                       opt->flags &= ~CEPH_OPT_NOSHARE;
>> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
>> index 842f049..2f5a024 100644
>> --- a/net/ceph/osd_client.c
>> +++ b/net/ceph/osd_client.c
>> @@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
>>                             osdc->client->options->osd_keepalive_timeout);
>>   }
>>
>> +/*
>> + * Complete request to OSD with error err if it started before cutoff.
>> + */
>> +static void complete_osd_stuck_request(struct ceph_osd_request *req,
>> +                                    unsigned long cutoff,
>> +                                    int err)
>> +{
>> +     if (!time_before(req->r_stamp, cutoff))
>> +             return;
>> +
>> +     pr_warn_ratelimited("osd req on osd%d timeout expired\n",
>> +                         req->r_osd->o_osd);
>> +
>> +     complete_request(req, err);
>> +}
>> +
>> +/*
>> + * Complete all requests to OSD that has been active for more than timeout.
>> + */
>> +static void complete_osd_stuck_requests(struct ceph_osd *osd,
>> +                                     unsigned long timeout,
>> +                                     int err)
>> +{
>> +     struct ceph_osd_request *req, *n;
>> +     const unsigned long cutoff = jiffies - timeout;
>> +
>> +     rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
>> +             complete_osd_stuck_request(req, cutoff, -EIO);
>> +     }
>> +}
>> +
>> +/*
>> + * Timeout callback, called every N seconds. When 1 or more OSD
>> + * requests that has been active for more than N seconds,
>> + * we complete it with error EIO.
>> + */
>> +static void handle_osd_request_timeout(struct work_struct *work)
>> +{
>> +     struct ceph_osd_client *osdc =
>> +             container_of(work, struct ceph_osd_client,
>> +                          osd_request_timeout_work.work);
>> +     struct ceph_osd *osd, *n;
>> +     struct ceph_options *opts = osdc->client->options;
>> +     const unsigned long timeout = opts->osd_request_timeout;
>> +
>> +     dout("%s osdc %p\n", __func__, osdc);
>> +
>> +     if (!timeout)
>> +             return;
>> +
>> +     down_write(&osdc->lock);
>> +
>> +     rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
>> +             complete_osd_stuck_requests(osd, timeout, -EIO);
>> +     }
>> +
>> +     up_write(&osdc->lock);
>> +
>> +     if (!atomic_read(&osdc->num_homeless))
>> +             goto out;
>> +
>> +     down_write(&osdc->lock);
>> +     complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
>> +     up_write(&osdc->lock);
>> +
>> +out:
>> +     schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
>> +}
>> +
>>   static void handle_osds_timeout(struct work_struct *work)
>>   {
>>       struct ceph_osd_client *osdc =
>> @@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
>> ceph_client *client)
>>       osdc->linger_map_checks = RB_ROOT;
>>       INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
>>       INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
>> +     INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);
>>
>>       err = -ENOMEM;
>>       osdc->osdmap = ceph_osdmap_alloc();
>> @@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
>> ceph_client *client)
>>                             osdc->client->options->osd_keepalive_timeout);
>>       schedule_delayed_work(&osdc->osds_timeout_work,
>>           round_jiffies_relative(osdc->client->options->osd_idle_ttl));
>> +     schedule_delayed_work(&osdc->osd_request_timeout_work,
>> +         round_jiffies_relative(osdc->client->options->osd_request_timeout));
>>
>>       return 0;
>>
>
> Having a job that has to wake up every second or so isn't ideal. Perhaps
> you would be better off scheduling the delayed work in the request
> submission codepath, and only rearm it when the tree isn't empty after
> calling complete_osd_stuck_requests?
>
> Also, I don't see where this job is ever cancelled when the osdc is torn
> down. That needs to occur or you'll cause a use-after-free oops...

Yeah, there are other bugs here: rbtree_postorder_for_each_entry_safe()
can't be used because complete_request() calls erase(), etc.  The patch
I had just piggy-backed on handle_timeout(), which is called every five
seconds anyway.

Given that there is interest, I think it's worth adding.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-10 11:31 ` Jeff Layton
  2017-02-10 12:25   ` Ilya Dryomov
@ 2017-02-11 20:30   ` Artur Molchanov
  2017-02-12 12:34     ` Jeff Layton
  1 sibling, 1 reply; 14+ messages in thread
From: Artur Molchanov @ 2017-02-11 20:30 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: ceph-devel

Hi Jef,

On Fri, 2017-02-10 at 14:31, Jeff Layton wrote:
> On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
>> From: Artur Molchanov <artur.molchanov@synesis.ru>
>>
>> Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
>> If osd_request_timeout equals to 0 (default value) then do nothing with
>> hung requests (keep default behavior).
>>
>> Create RBD map option osd_request_timeout to set timeout in seconds. Set
>> osd_request_timeout to 0 by default.
>>
> Also, what exactly are the requests blocked on when this occurs? Is the
> ceph_osd_request_target ending up paused? I wonder if we might be better
> off with something that returns a hard error under the circumstances
> where you're hanging, rather than depending on timeouts.

I wonder that it is better to complete requests only after timeout expired, just 
because a request can fail due to temporary network issues (e.g. router 
restarted) or restarting machine/services.

> Having a job that has to wake up every second or so isn't ideal. Perhaps
> you would be better off scheduling the delayed work in the request
> submission codepath, and only rearm it when the tree isn't empty after
> calling complete_osd_stuck_requests?

Would you please tell me more about rearming work only if the tree is not empty 
after calling complete_osd_stuck_requests? From what code we should call 
complete_osd_stuck_requests?

As I understood, there are two primary cases:
1 - Requests to OSD failed, but monitors do not return new osdmap (because all 
monitors are offline or monitors did not update osdmap yet).
In this case requests are retried by cyclic calling ceph_con_workfn -> con_fault 
-> ceph_con_workfn. We can check request timestamp and does not call con_fault 
but complete it.

2 - Monitors return new osdmap which does not have any OSD for RBD.
In this case all requests to the last ready OSD will be linked on "homeless" OSD 
and will not be retried until new osdmap with appropriate OSD received. I think 
that we need additional periodic checking timestamp such requests.

Yes, there is already existing job handle_timeout. But the responsibility of 
this job is to sending keepalive requests to slow OSD. I'm not sure that it is a 
good idea to perform additional actions inside this job.
I decided that creating specific job handle_osd_request_timeout is more applicable.

This job will be run only once with a default value of osd_request_timeout (0).
At the same time, I think that user will not use too small value for this 
parameter. I wonder that typical value will be about 1 minute or greater.

> Also, I don't see where this job is ever cancelled when the osdc is torn
> down. That needs to occur or you'll cause a use-after-free oops...

It is my fault, thanks for the correction.

-- 
Artur

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-11 20:30   ` Artur Molchanov
@ 2017-02-12 12:34     ` Jeff Layton
  2017-02-12 15:26       ` Artur Molchanov
  0 siblings, 1 reply; 14+ messages in thread
From: Jeff Layton @ 2017-02-12 12:34 UTC (permalink / raw)
  To: Artur Molchanov, Ilya Dryomov; +Cc: ceph-devel

On Sat, 2017-02-11 at 23:30 +0300, Artur Molchanov wrote:
> Hi Jef,
> 
> On Fri, 2017-02-10 at 14:31, Jeff Layton wrote:
> > On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
> > > From: Artur Molchanov <artur.molchanov@synesis.ru>
> > > 
> > > Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
> > > If osd_request_timeout equals to 0 (default value) then do nothing with
> > > hung requests (keep default behavior).
> > > 
> > > Create RBD map option osd_request_timeout to set timeout in seconds. Set
> > > osd_request_timeout to 0 by default.
> > > 
> > 
> > Also, what exactly are the requests blocked on when this occurs? Is the
> > ceph_osd_request_target ending up paused? I wonder if we might be better
> > off with something that returns a hard error under the circumstances
> > where you're hanging, rather than depending on timeouts.
> 
> I wonder that it is better to complete requests only after timeout expired, just 
> because a request can fail due to temporary network issues (e.g. router 
> restarted) or restarting machine/services.
> 
> > Having a job that has to wake up every second or so isn't ideal. Perhaps
> > you would be better off scheduling the delayed work in the request
> > submission codepath, and only rearm it when the tree isn't empty after
> > calling complete_osd_stuck_requests?
> 
> Would you please tell me more about rearming work only if the tree is not empty 
> after calling complete_osd_stuck_requests? From what code we should call 
> complete_osd_stuck_requests?
> 

Sure. I'm saying you would want to call schedule_delayed_work for the
timeout handler from the request submission path when you link a request
into the tree that has a timeout. Maybe in __submit_request?

Then, instead of unconditionally calling schedule_delayed_work at the
end of handle_request_timeout, you'd only call it if there were no
requests still sitting in the osdc trees.

> As I understood, there are two primary cases:
> 1 - Requests to OSD failed, but monitors do not return new osdmap (because all 
> monitors are offline or monitors did not update osdmap yet).
> In this case requests are retried by cyclic calling ceph_con_workfn -> con_fault 
> -> ceph_con_workfn. We can check request timestamp and does not call con_fault 
> but complete it.
> 
> 2 - Monitors return new osdmap which does not have any OSD for RBD.
> In this case all requests to the last ready OSD will be linked on "homeless" OSD 
> and will not be retried until new osdmap with appropriate OSD received. I think 
> that we need additional periodic checking timestamp such requests.
> 
> Yes, there is already existing job handle_timeout. But the responsibility of 
> this job is to sending keepalive requests to slow OSD. I'm not sure that it is a 
> good idea to perform additional actions inside this job.
> I decided that creating specific job handle_osd_request_timeout is more applicable.
> 
> This job will be run only once with a default value of osd_request_timeout (0).

Ahh, I missed that -- thanks.

> At the same time, I think that user will not use too small value for this 
> parameter. I wonder that typical value will be about 1 minute or greater.
> 
> > Also, I don't see where this job is ever cancelled when the osdc is torn
> > down. That needs to occur or you'll cause a use-after-free oops...
> 
> It is my fault, thanks for the correction.
> 

-- 
Jeff Layton <jlayton@redhat.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-12 12:34     ` Jeff Layton
@ 2017-02-12 15:26       ` Artur Molchanov
  0 siblings, 0 replies; 14+ messages in thread
From: Artur Molchanov @ 2017-02-12 15:26 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: ceph-devel

On Sun 2017-2-12 at 15:34, Jeff Layton wrote:
> On Sat, 2017-02-11 at 23:30 +0300, Artur Molchanov wrote:
>> Hi Jef,
>>
>> On Fri, 2017-02-10 at 14:31, Jeff Layton wrote:
>>> On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
>>>> From: Artur Molchanov <artur.molchanov@synesis.ru>
>>>>
>>>> Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
>>>> If osd_request_timeout equals to 0 (default value) then do nothing with
>>>> hung requests (keep default behavior).
>>>>
>>>> Create RBD map option osd_request_timeout to set timeout in seconds. Set
>>>> osd_request_timeout to 0 by default.
>>>>
>>>
>>> Also, what exactly are the requests blocked on when this occurs? Is the
>>> ceph_osd_request_target ending up paused? I wonder if we might be better
>>> off with something that returns a hard error under the circumstances
>>> where you're hanging, rather than depending on timeouts.
>>
>> I wonder that it is better to complete requests only after timeout expired, just
>> because a request can fail due to temporary network issues (e.g. router
>> restarted) or restarting machine/services.
>>
>>> Having a job that has to wake up every second or so isn't ideal. Perhaps
>>> you would be better off scheduling the delayed work in the request
>>> submission codepath, and only rearm it when the tree isn't empty after
>>> calling complete_osd_stuck_requests?
>>
>> Would you please tell me more about rearming work only if the tree is not empty
>> after calling complete_osd_stuck_requests? From what code we should call
>> complete_osd_stuck_requests?
>>
>
> Sure. I'm saying you would want to call schedule_delayed_work for the
> timeout handler from the request submission path when you link a request
> into the tree that has a timeout. Maybe in __submit_request?
>
> Then, instead of unconditionally calling schedule_delayed_work at the
> end of handle_request_timeout, you'd only call it if there were no
> requests still sitting in the osdc trees.

Thanks for the clarification.
But, unfortunately, requests which are linked to "homeless" OSD will not retry 
until new osdmap with proper OSD received.

>> As I understood, there are two primary cases:
>> 1 - Requests to OSD failed, but monitors do not return new osdmap (because all
>> monitors are offline or monitors did not update osdmap yet).
>> In this case requests are retried by cyclic calling ceph_con_workfn -> con_fault
>> -> ceph_con_workfn. We can check request timestamp and does not call con_fault
>> but complete it.
>>
>> 2 - Monitors return new osdmap which does not have any OSD for RBD.
>> In this case all requests to the last ready OSD will be linked on "homeless" OSD
>> and will not be retried until new osdmap with appropriate OSD received. I think
>> that we need additional periodic checking timestamp such requests.
>>
>> Yes, there is already existing job handle_timeout. But the responsibility of
>> this job is to sending keepalive requests to slow OSD. I'm not sure that it is a
>> good idea to perform additional actions inside this job.
>> I decided that creating specific job handle_osd_request_timeout is more applicable.
>>
>> This job will be run only once with a default value of osd_request_timeout (0).
>
> Ahh, I missed that -- thanks.
>
>> At the same time, I think that user will not use too small value for this
>> parameter. I wonder that typical value will be about 1 minute or greater.
>>
>>> Also, I don't see where this job is ever cancelled when the osdc is torn
>>> down. That needs to occur or you'll cause a use-after-free oops...
>>
>> It is my fault, thanks for the correction.
>>

-- 
Artur

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-10 12:25   ` Ilya Dryomov
@ 2017-02-13  9:11     ` Ilya Dryomov
  2017-02-13  9:54       ` Artur Molchanov
  0 siblings, 1 reply; 14+ messages in thread
From: Ilya Dryomov @ 2017-02-13  9:11 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Artur Molchanov, Ceph Development

[-- Attachment #1: Type: text/plain, Size: 10072 bytes --]

On Fri, Feb 10, 2017 at 1:25 PM, Ilya Dryomov <idryomov@gmail.com> wrote:
> On Fri, Feb 10, 2017 at 12:31 PM, Jeff Layton <jlayton@redhat.com> wrote:
>> On Thu, 2017-02-09 at 16:04 +0300, Artur Molchanov wrote:
>>> From: Artur Molchanov <artur.molchanov@synesis.ru>
>>>
>>> Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
>>> If osd_request_timeout equals to 0 (default value) then do nothing with
>>> hung requests (keep default behavior).
>>>
>>> Create RBD map option osd_request_timeout to set timeout in seconds. Set
>>> osd_request_timeout to 0 by default.
>>>
>> Also, what exactly are the requests blocked on when this occurs? Is the
>> ceph_osd_request_target ending up paused? I wonder if we might be better
>> off with something that returns a hard error under the circumstances
>> where you're hanging, rather than depending on timeouts.
>>
>>
>>> Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
>>> ---
>>>   include/linux/ceph/libceph.h    |  8 +++--
>>>   include/linux/ceph/osd_client.h |  1 +
>>>   net/ceph/ceph_common.c          | 12 +++++++
>>>   net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
>>>   4 files changed, 90 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
>>> index 1816c5e..10d8acb 100644
>>> --- a/include/linux/ceph/libceph.h
>>> +++ b/include/linux/ceph/libceph.h
>>> @@ -48,6 +48,7 @@ struct ceph_options {
>>>       unsigned long mount_timeout;            /* jiffies */
>>>       unsigned long osd_idle_ttl;             /* jiffies */
>>>       unsigned long osd_keepalive_timeout;    /* jiffies */
>>> +     unsigned long osd_request_timeout;      /* jiffies */
>>>
>>>       /*
>>>        * any type that can't be simply compared or doesn't need need
>>> @@ -65,9 +66,10 @@ struct ceph_options {
>>>   /*
>>>    * defaults
>>>    */
>>> -#define CEPH_MOUNT_TIMEOUT_DEFAULT   msecs_to_jiffies(60 * 1000)
>>> -#define CEPH_OSD_KEEPALIVE_DEFAULT   msecs_to_jiffies(5 * 1000)
>>> -#define CEPH_OSD_IDLE_TTL_DEFAULT    msecs_to_jiffies(60 * 1000)
>>> +#define CEPH_MOUNT_TIMEOUT_DEFAULT           msecs_to_jiffies(60 * 1000)
>>> +#define CEPH_OSD_KEEPALIVE_DEFAULT           msecs_to_jiffies(5 * 1000)
>>> +#define CEPH_OSD_IDLE_TTL_DEFAULT            msecs_to_jiffies(60 * 1000)
>>> +#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT     msecs_to_jiffies(0 * 1000)
>>>
>>>   #define CEPH_MONC_HUNT_INTERVAL             msecs_to_jiffies(3 * 1000)
>>>   #define CEPH_MONC_PING_INTERVAL             msecs_to_jiffies(10 * 1000)
>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>>> index 03a6653..e45cba0 100644
>>> --- a/include/linux/ceph/osd_client.h
>>> +++ b/include/linux/ceph/osd_client.h
>>> @@ -279,6 +279,7 @@ struct ceph_osd_client {
>>>       atomic_t               num_homeless;
>>>       struct delayed_work    timeout_work;
>>>       struct delayed_work    osds_timeout_work;
>>> +     struct delayed_work    osd_request_timeout_work;
>>>   #ifdef CONFIG_DEBUG_FS
>>>       struct dentry          *debugfs_file;
>>>   #endif
>>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>>> index 464e885..81876c8 100644
>>> --- a/net/ceph/ceph_common.c
>>> +++ b/net/ceph/ceph_common.c
>>> @@ -230,6 +230,7 @@ enum {
>>>       Opt_osdkeepalivetimeout,
>>>       Opt_mount_timeout,
>>>       Opt_osd_idle_ttl,
>>> +     Opt_osd_request_timeout,
>>>       Opt_last_int,
>>>       /* int args above */
>>>       Opt_fsid,
>>> @@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
>>>       {Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
>>>       {Opt_mount_timeout, "mount_timeout=%d"},
>>>       {Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
>>> +     {Opt_osd_request_timeout, "osd_request_timeout=%d"},
>>>       /* int args above */
>>>       {Opt_fsid, "fsid=%s"},
>>>       {Opt_name, "name=%s"},
>>> @@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
>>>       opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
>>>       opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
>>>       opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
>>> +     opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
>>>
>>>       /* get mon ip(s) */
>>>       /* ip1[:port1][,ip2[:port2]...] */
>>> @@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
>>>                       }
>>>                       opt->mount_timeout = msecs_to_jiffies(intval * 1000);
>>>                       break;
>>> +             case Opt_osd_request_timeout:
>>> +                     /* 0 is "wait forever" (i.e. infinite timeout) */
>>> +                     if (intval < 0 || intval > INT_MAX / 1000) {
>>> +                             pr_err("osd_request_timeout out of range\n");
>>> +                             err = -EINVAL;
>>> +                             goto out;
>>> +                     }
>>> +                     opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
>>> +                     break;
>>>
>>>               case Opt_share:
>>>                       opt->flags &= ~CEPH_OPT_NOSHARE;
>>> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
>>> index 842f049..2f5a024 100644
>>> --- a/net/ceph/osd_client.c
>>> +++ b/net/ceph/osd_client.c
>>> @@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
>>>                             osdc->client->options->osd_keepalive_timeout);
>>>   }
>>>
>>> +/*
>>> + * Complete request to OSD with error err if it started before cutoff.
>>> + */
>>> +static void complete_osd_stuck_request(struct ceph_osd_request *req,
>>> +                                    unsigned long cutoff,
>>> +                                    int err)
>>> +{
>>> +     if (!time_before(req->r_stamp, cutoff))
>>> +             return;
>>> +
>>> +     pr_warn_ratelimited("osd req on osd%d timeout expired\n",
>>> +                         req->r_osd->o_osd);
>>> +
>>> +     complete_request(req, err);
>>> +}
>>> +
>>> +/*
>>> + * Complete all requests to OSD that has been active for more than timeout.
>>> + */
>>> +static void complete_osd_stuck_requests(struct ceph_osd *osd,
>>> +                                     unsigned long timeout,
>>> +                                     int err)
>>> +{
>>> +     struct ceph_osd_request *req, *n;
>>> +     const unsigned long cutoff = jiffies - timeout;
>>> +
>>> +     rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
>>> +             complete_osd_stuck_request(req, cutoff, -EIO);
>>> +     }
>>> +}
>>> +
>>> +/*
>>> + * Timeout callback, called every N seconds. When 1 or more OSD
>>> + * requests that has been active for more than N seconds,
>>> + * we complete it with error EIO.
>>> + */
>>> +static void handle_osd_request_timeout(struct work_struct *work)
>>> +{
>>> +     struct ceph_osd_client *osdc =
>>> +             container_of(work, struct ceph_osd_client,
>>> +                          osd_request_timeout_work.work);
>>> +     struct ceph_osd *osd, *n;
>>> +     struct ceph_options *opts = osdc->client->options;
>>> +     const unsigned long timeout = opts->osd_request_timeout;
>>> +
>>> +     dout("%s osdc %p\n", __func__, osdc);
>>> +
>>> +     if (!timeout)
>>> +             return;
>>> +
>>> +     down_write(&osdc->lock);
>>> +
>>> +     rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
>>> +             complete_osd_stuck_requests(osd, timeout, -EIO);
>>> +     }
>>> +
>>> +     up_write(&osdc->lock);
>>> +
>>> +     if (!atomic_read(&osdc->num_homeless))
>>> +             goto out;
>>> +
>>> +     down_write(&osdc->lock);
>>> +     complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
>>> +     up_write(&osdc->lock);
>>> +
>>> +out:
>>> +     schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
>>> +}
>>> +
>>>   static void handle_osds_timeout(struct work_struct *work)
>>>   {
>>>       struct ceph_osd_client *osdc =
>>> @@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
>>> ceph_client *client)
>>>       osdc->linger_map_checks = RB_ROOT;
>>>       INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
>>>       INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
>>> +     INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);
>>>
>>>       err = -ENOMEM;
>>>       osdc->osdmap = ceph_osdmap_alloc();
>>> @@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
>>> ceph_client *client)
>>>                             osdc->client->options->osd_keepalive_timeout);
>>>       schedule_delayed_work(&osdc->osds_timeout_work,
>>>           round_jiffies_relative(osdc->client->options->osd_idle_ttl));
>>> +     schedule_delayed_work(&osdc->osd_request_timeout_work,
>>> +         round_jiffies_relative(osdc->client->options->osd_request_timeout));
>>>
>>>       return 0;
>>>
>>
>> Having a job that has to wake up every second or so isn't ideal. Perhaps
>> you would be better off scheduling the delayed work in the request
>> submission codepath, and only rearm it when the tree isn't empty after
>> calling complete_osd_stuck_requests?
>>
>> Also, I don't see where this job is ever cancelled when the osdc is torn
>> down. That needs to occur or you'll cause a use-after-free oops...
>
> Yeah, there are other bugs here: rbtree_postorder_for_each_entry_safe()
> can't be used because complete_request() calls erase(), etc.  The patch
> I had just piggy-backed on handle_timeout(), which is called every five
> seconds anyway.
>
> Given that there is interest, I think it's worth adding.

Hi Artur,

How about the attached patch?  handle_timeout() is going to iterate
over all but the homeless OSD anyway; all it costs us is a couple of
tests, so I don't think a separate work is needed.

abort_request() is a simple wrapper around complete_request(), making
it safe to call at any time -- replace it with complete_request() for
now if you want to try this out.

Thanks,

                Ilya

[-- Attachment #2: osd-request-timeout.diff --]
[-- Type: text/plain, Size: 5856 bytes --]

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e26581..88cd5dc8e238 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */
 
 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -68,6 +69,7 @@ struct ceph_options {
 #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
 #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
 #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */
 
 #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653d329a..4db031dd748d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -193,6 +193,7 @@ struct ceph_osd_request {
 
 	/* internal */
 	unsigned long r_stamp;                /* jiffies, send or check time */
+	unsigned long r_start_stamp;          /* jiffies */
 	int r_attempts;
 	struct ceph_eversion r_replay_version; /* aka reassert_version */
 	u32 r_last_force_resend;
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e88599b9d..108533859a53 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
 	Opt_osdkeepalivetimeout,
 	Opt_mount_timeout,
 	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
 	Opt_last_int,
 	/* int args above */
 	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
 	{Opt_mount_timeout, "mount_timeout=%d"},
 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
 	/* int args above */
 	{Opt_fsid, "fsid=%s"},
 	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
 
 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
 			}
 			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
 			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;
 
 		case Opt_share:
 			opt->flags &= ~CEPH_OPT_NOSHARE;
@@ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
 		seq_printf(m, "osdkeepalivetimeout=%d,",
 		    jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000);
+	if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT)
+		seq_printf(m, "osd_request_timeout=%d,",
+			   jiffies_to_msecs(opt->osd_request_timeout) / 1000);
 
 	/* drop redundant comma */
 	if (m->count != pos)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b8da4279e745..1d580530c700 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1717,6 +1717,8 @@ static void account_request(struct ceph_osd_request *req)
 
 	WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask);
 	atomic_inc(&req->r_osdc->num_requests);
+
+	req->r_start_stamp = jiffies;
 }
 
 static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -2504,6 +2506,7 @@ static void handle_timeout(struct work_struct *work)
 		container_of(work, struct ceph_osd_client, timeout_work.work);
 	struct ceph_options *opts = osdc->client->options;
 	unsigned long cutoff = jiffies - opts->osd_keepalive_timeout;
+	unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout;
 	LIST_HEAD(slow_osds);
 	struct rb_node *n, *p;
 
@@ -2519,15 +2522,23 @@ static void handle_timeout(struct work_struct *work)
 		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
 		bool found = false;
 
-		for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) {
+		for (p = rb_first(&osd->o_requests); p; ) {
 			struct ceph_osd_request *req =
 			    rb_entry(p, struct ceph_osd_request, r_node);
 
+			p = rb_next(p); /* abort_request() */
+
 			if (time_before(req->r_stamp, cutoff)) {
 				dout(" req %p tid %llu on osd%d is laggy\n",
 				     req, req->r_tid, osd->o_osd);
 				found = true;
 			}
+			if (opts->osd_request_timeout &&
+			    time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osd->o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
 		}
 		for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) {
 			struct ceph_osd_linger_request *lreq =
@@ -2547,6 +2558,21 @@ static void handle_timeout(struct work_struct *work)
 			list_move_tail(&osd->o_keepalive_item, &slow_osds);
 	}
 
+	if (opts->osd_request_timeout) {
+		for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) {
+			struct ceph_osd_request *req =
+			    rb_entry(p, struct ceph_osd_request, r_node);
+
+			p = rb_next(p); /* abort_request() */
+
+			if (time_before(req->r_start_stamp, expiry_cutoff)) {
+				pr_err_ratelimited("tid %llu on osd%d timeout\n",
+				       req->r_tid, osdc->homeless_osd.o_osd);
+				abort_request(req, -ETIMEDOUT);
+			}
+		}
+	}
+
 	if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds))
 		maybe_request_map(osdc);
 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-13  9:11     ` Ilya Dryomov
@ 2017-02-13  9:54       ` Artur Molchanov
  2017-02-13 14:15         ` Ilya Dryomov
  0 siblings, 1 reply; 14+ messages in thread
From: Artur Molchanov @ 2017-02-13  9:54 UTC (permalink / raw)
  To: Ilya Dryomov, Jeff Layton; +Cc: Ceph Development

Hi Ilya,

On 02/13/2017 12:11 PM, Ilya Dryomov wrote:
> Hi Artur,
>
> How about the attached patch?  handle_timeout() is going to iterate
> over all but the homeless OSD anyway; all it costs us is a couple of
> tests, so I don't think a separate work is needed.
>
> abort_request() is a simple wrapper around complete_request(), making
> it safe to call at any time -- replace it with complete_request() for
> now if you want to try this out.

Using one job for sending keepalive requests and completing stuck requests 
brings us to the need to check that osd_keepalive_timeout is not larger then 
osd_request_timeout. So we should not forget to say about it in the documentation.
Is it worth creating a correlation between osd_keepalive_timeout and 
osd_request_timeout?

-- 
Artur

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-13  9:54       ` Artur Molchanov
@ 2017-02-13 14:15         ` Ilya Dryomov
  2017-02-13 15:23           ` Artur Molchanov
  0 siblings, 1 reply; 14+ messages in thread
From: Ilya Dryomov @ 2017-02-13 14:15 UTC (permalink / raw)
  To: Artur Molchanov; +Cc: Jeff Layton, Ceph Development

On Mon, Feb 13, 2017 at 10:54 AM, Artur Molchanov
<artur.molchanov@synesis.ru> wrote:
> Hi Ilya,
>
> On 02/13/2017 12:11 PM, Ilya Dryomov wrote:
>>
>> Hi Artur,
>>
>> How about the attached patch?  handle_timeout() is going to iterate
>> over all but the homeless OSD anyway; all it costs us is a couple of
>> tests, so I don't think a separate work is needed.
>>
>> abort_request() is a simple wrapper around complete_request(), making
>> it safe to call at any time -- replace it with complete_request() for
>> now if you want to try this out.
>
>
> Using one job for sending keepalive requests and completing stuck requests
> brings us to the need to check that osd_keepalive_timeout is not larger then
> osd_request_timeout. So we should not forget to say about it in the
> documentation.

I'll make a note to mention that it is osd_keepalive_timeout-precise.

> Is it worth creating a correlation between osd_keepalive_timeout and
> osd_request_timeout?

No, probably not.  osd_keepalive_timeout and osd_idle_ttl are similarly
related and we don't have any.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-13 14:15         ` Ilya Dryomov
@ 2017-02-13 15:23           ` Artur Molchanov
  2017-02-13 15:32             ` Ilya Dryomov
  0 siblings, 1 reply; 14+ messages in thread
From: Artur Molchanov @ 2017-02-13 15:23 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Jeff Layton, Ceph Development

On 02/13/2017 05:15 PM, Ilya Dryomov wrote:
> On Mon, Feb 13, 2017 at 10:54 AM, Artur Molchanov
> <artur.molchanov@synesis.ru> wrote:
>> Hi Ilya,
>>
>> On 02/13/2017 12:11 PM, Ilya Dryomov wrote:
>>>
>>> Hi Artur,
>>>
>>> How about the attached patch?  handle_timeout() is going to iterate
>>> over all but the homeless OSD anyway; all it costs us is a couple of
>>> tests, so I don't think a separate work is needed.
>>>
>>> abort_request() is a simple wrapper around complete_request(), making
>>> it safe to call at any time -- replace it with complete_request() for
>>> now if you want to try this out.
>>
>>
>> Using one job for sending keepalive requests and completing stuck requests
>> brings us to the need to check that osd_keepalive_timeout is not larger then
>> osd_request_timeout. So we should not forget to say about it in the
>> documentation.
>
> I'll make a note to mention that it is osd_keepalive_timeout-precise.
>
>> Is it worth creating a correlation between osd_keepalive_timeout and
>> osd_request_timeout?
>
> No, probably not.  osd_keepalive_timeout and osd_idle_ttl are similarly
> related and we don't have any.

Your variant of patch works.
As I said, using job handle_timeout to send keepalive requests and aborting 
stuck requests is not the perfect choice, but OK, it works.

What should I do to make this patch merged to upstream?

-- 
Artur


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] libceph: Complete stuck requests to OSD with EIO
  2017-02-13 15:23           ` Artur Molchanov
@ 2017-02-13 15:32             ` Ilya Dryomov
  0 siblings, 0 replies; 14+ messages in thread
From: Ilya Dryomov @ 2017-02-13 15:32 UTC (permalink / raw)
  To: Artur Molchanov; +Cc: Jeff Layton, Ceph Development

On Mon, Feb 13, 2017 at 4:23 PM, Artur Molchanov
<artur.molchanov@synesis.ru> wrote:
> On 02/13/2017 05:15 PM, Ilya Dryomov wrote:
>>
>> On Mon, Feb 13, 2017 at 10:54 AM, Artur Molchanov
>> <artur.molchanov@synesis.ru> wrote:
>>>
>>> Hi Ilya,
>>>
>>> On 02/13/2017 12:11 PM, Ilya Dryomov wrote:
>>>>
>>>>
>>>> Hi Artur,
>>>>
>>>> How about the attached patch?  handle_timeout() is going to iterate
>>>> over all but the homeless OSD anyway; all it costs us is a couple of
>>>> tests, so I don't think a separate work is needed.
>>>>
>>>> abort_request() is a simple wrapper around complete_request(), making
>>>> it safe to call at any time -- replace it with complete_request() for
>>>> now if you want to try this out.
>>>
>>>
>>>
>>> Using one job for sending keepalive requests and completing stuck
>>> requests
>>> brings us to the need to check that osd_keepalive_timeout is not larger
>>> then
>>> osd_request_timeout. So we should not forget to say about it in the
>>> documentation.
>>
>>
>> I'll make a note to mention that it is osd_keepalive_timeout-precise.
>>
>>> Is it worth creating a correlation between osd_keepalive_timeout and
>>> osd_request_timeout?
>>
>>
>> No, probably not.  osd_keepalive_timeout and osd_idle_ttl are similarly
>> related and we don't have any.
>
>
> Your variant of patch works.
> As I said, using job handle_timeout to send keepalive requests and aborting
> stuck requests is not the perfect choice, but OK, it works.
>
> What should I do to make this patch merged to upstream?

Nothing -- I'll apply it for 4.11 once we have abort_request().

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-02-13 15:32 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-09 13:04 [PATCH] libceph: Complete stuck requests to OSD with EIO Artur Molchanov
2017-02-09 14:27 ` Ilya Dryomov
2017-02-09 15:51   ` Artur Molchanov
2017-02-09 16:11     ` Wido den Hollander
2017-02-10 11:31 ` Jeff Layton
2017-02-10 12:25   ` Ilya Dryomov
2017-02-13  9:11     ` Ilya Dryomov
2017-02-13  9:54       ` Artur Molchanov
2017-02-13 14:15         ` Ilya Dryomov
2017-02-13 15:23           ` Artur Molchanov
2017-02-13 15:32             ` Ilya Dryomov
2017-02-11 20:30   ` Artur Molchanov
2017-02-12 12:34     ` Jeff Layton
2017-02-12 15:26       ` Artur Molchanov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.