[PATCH] libceph: Complete stuck requests to OSD with EIO

* [PATCH] libceph: Complete stuck requests to OSD with EIO
@ 2017-02-09 13:04 Artur Molchanov
  2017-02-09 14:27 ` Ilya Dryomov
  2017-02-10 11:31 ` Jeff Layton
  0 siblings, 2 replies; 14+ messages in thread
From: Artur Molchanov @ 2017-02-09 13:04 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel

From: Artur Molchanov <artur.molchanov@synesis.ru>

Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
If osd_request_timeout equals to 0 (default value) then do nothing with
hung requests (keep default behavior).

Create RBD map option osd_request_timeout to set timeout in seconds. Set
osd_request_timeout to 0 by default.

Signed-off-by: Artur Molchanov <artur.molchanov@synesis.ru>
---
  include/linux/ceph/libceph.h    |  8 +++--
  include/linux/ceph/osd_client.h |  1 +
  net/ceph/ceph_common.c          | 12 +++++++
  net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
  4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e..10d8acb 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
  	unsigned long mount_timeout;		/* jiffies */
  	unsigned long osd_idle_ttl;		/* jiffies */
  	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */

  	/*
  	 * any type that can't be simply compared or doesn't need need
@@ -65,9 +66,10 @@ struct ceph_options {
  /*
   * defaults
   */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
-#define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
-#define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_MOUNT_TIMEOUT_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT		msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT	msecs_to_jiffies(0 * 1000)

  #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
  #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653..e45cba0 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -279,6 +279,7 @@ struct ceph_osd_client {
  	atomic_t               num_homeless;
  	struct delayed_work    timeout_work;
  	struct delayed_work    osds_timeout_work;
+	struct delayed_work    osd_request_timeout_work;
  #ifdef CONFIG_DEBUG_FS
  	struct dentry 	       *debugfs_file;
  #endif
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e885..81876c8 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
  	Opt_osdkeepalivetimeout,
  	Opt_mount_timeout,
  	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
  	Opt_last_int,
  	/* int args above */
  	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
  	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
  	{Opt_mount_timeout, "mount_timeout=%d"},
  	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
  	/* int args above */
  	{Opt_fsid, "fsid=%s"},
  	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
  	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
  	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
  	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;

  	/* get mon ip(s) */
  	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
  			}
  			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
  			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;

  		case Opt_share:
  			opt->flags &= ~CEPH_OPT_NOSHARE;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049..2f5a024 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
  			      osdc->client->options->osd_keepalive_timeout);
  }

+/*
+ * Complete request to OSD with error err if it started before cutoff.
+ */
+static void complete_osd_stuck_request(struct ceph_osd_request *req,
+				       unsigned long cutoff,
+				       int err)
+{
+	if (!time_before(req->r_stamp, cutoff))
+		return;
+
+	pr_warn_ratelimited("osd req on osd%d timeout expired\n",
+			    req->r_osd->o_osd);
+
+	complete_request(req, err);
+}
+
+/*
+ * Complete all requests to OSD that has been active for more than timeout.
+ */
+static void complete_osd_stuck_requests(struct ceph_osd *osd,
+					unsigned long timeout,
+					int err)
+{
+	struct ceph_osd_request *req, *n;
+	const unsigned long cutoff = jiffies - timeout;
+
+	rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
+		complete_osd_stuck_request(req, cutoff, -EIO);
+	}
+}
+
+/*
+ * Timeout callback, called every N seconds. When 1 or more OSD
+ * requests that has been active for more than N seconds,
+ * we complete it with error EIO.
+ */
+static void handle_osd_request_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client,
+			     osd_request_timeout_work.work);
+	struct ceph_osd *osd, *n;
+	struct ceph_options *opts = osdc->client->options;
+	const unsigned long timeout = opts->osd_request_timeout;
+
+	dout("%s osdc %p\n", __func__, osdc);
+
+	if (!timeout)
+		return;
+
+	down_write(&osdc->lock);
+
+	rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
+		complete_osd_stuck_requests(osd, timeout, -EIO);
+	}
+
+	up_write(&osdc->lock);
+
+	if (!atomic_read(&osdc->num_homeless))
+		goto out;
+
+	down_write(&osdc->lock);
+	complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
+	up_write(&osdc->lock);
+
+out:
+	schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
+}
+
  static void handle_osds_timeout(struct work_struct *work)
  {
  	struct ceph_osd_client *osdc =
@@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
ceph_client *client)
  	osdc->linger_map_checks = RB_ROOT;
  	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
  	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);

  	err = -ENOMEM;
  	osdc->osdmap = ceph_osdmap_alloc();
@@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct 
ceph_client *client)
  			      osdc->client->options->osd_keepalive_timeout);
  	schedule_delayed_work(&osdc->osds_timeout_work,
  	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
+	schedule_delayed_work(&osdc->osd_request_timeout_work,
+	    round_jiffies_relative(osdc->client->options->osd_request_timeout));

  	return 0;

-- 
2.7.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread