All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Jeffery <andrew@aj.id.au>
To: joel@jms.id.au, jk@ozlabs.org, eajames@linux.vnet.ibm.com,
	bradleyb@fuzziesquirrel.com, cbostic@linux.vnet.ibm.com
Cc: openbmc@lists.ozlabs.org, "Edward A . James" <eajames@us.ibm.com>,
	Andrew Jeffery <andrew@aj.id.au>
Subject: [PATCH linux dev-4.13 12/16] fsi: sbefifo: Avoid using a timer to drive FIFO transfers
Date: Tue, 20 Feb 2018 14:48:40 +1030	[thread overview]
Message-ID: <20180220041844.13228-13-andrew@aj.id.au> (raw)
In-Reply-To: <20180220041844.13228-1-andrew@aj.id.au>

From: Eddie James <eajames@linux.vnet.ibm.com>

The timer subsystem appears to misbehave when requesting an immediate expiry
via `mod_timer(..., jiffies)`. The SBEFIFO driver uses this pattern for
scheduling work when it either adds a new transfer to an empty queue or the
timer callback finds the transfer queue is not empty. In both cases work should
be scheduled immediately, however the current approach is not optimal:

The comment [0] outlines an issue where OCC sensor polling on the BMC can come
at remarkable cost. Delays of up to and above 10 seconds can be experienced
when reading an OCC attribute.

Tracing of the subsystems involved (FSI, the SBEFIFO, the OCC, the scheduler
and the timer subsystems) showed that the SBEFIFO timer callbacks had a
saw-tooth pattern of execution latency in jiffies relative to their scheduled
expiry time. In the measurements the typical latency of an immediate reschedule
was 15 jiffies, though this could be as few as 1 or greater than 100 in the
pathological case. Surprisingly, these delays coincided in length with system
idle time between the SBEFIFO transfers - it appears timer subsystem is not
properly accounting for the immediacy of the request[1].

Querying sensors on the OCC incurs multiple SBEFIFO transfers, each with its
own (increasing) expiry latency, which leads to the poor responsiveness
observed above.

Stop misusing the timer infrastructure for work queue management and incurring
associated penalties (even if they appear to be a bug in the timer core).
Instead, use a workqueue and use queue_work() and queue_delayed_work() as
required. With this change, reading an OCC sensor typically takes ~0.5 seconds
wall-clock in the un-cached case and rarely exceeds 1 second. In the cached
case the read completes in around 0.01 seconds.

Finally, using a workqueue enables the use of mutexes in place of spinlocks,
making way for future changes down the stack in the FSI core.

[0] https://github.com/openbmc/openbmc/issues/2696#issuecomment-351207046
[1] It seems scheduling a timer with `mod_timer(..., jiffies)` is a
    pathological case: Changing all such call-sites in the SBEFIFO driver to
    use `mod_timer(..., jiffies + 1)` yielded a similar improvement in
    performance to the workqueue strategy.

Signed-off-by: Edward A. James <eajames@us.ibm.com>
[arj: Rework the commit message]
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
---
 drivers/fsi/fsi-sbefifo.c | 54 ++++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
index 041704806ae4..645ae00cc6ac 100644
--- a/drivers/fsi/fsi-sbefifo.c
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -29,9 +29,9 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/timer.h>
 #include <linux/uaccess.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/sbefifo.h>
@@ -60,7 +60,7 @@
 #define SBEFIFO_MAX_RESCHDULE	msecs_to_jiffies(5000)
 
 struct sbefifo {
-	struct timer_list poll_timer;
+	struct delayed_work work;
 	struct fsi_device *fsi_dev;
 	struct miscdevice mdev;
 	wait_queue_head_t wait;
@@ -102,6 +102,8 @@ struct sbefifo_client {
 	unsigned long f_flags;
 };
 
+static struct workqueue_struct *sbefifo_wq;
+
 static DEFINE_IDA(sbefifo_ida);
 
 static int sbefifo_inw(struct sbefifo *sbefifo, int reg, u32 *word)
@@ -344,7 +346,7 @@ static void sbefifo_release_client(struct kref *kref)
 			 */
 			set_bit(SBEFIFO_XFR_CANCEL, &xfr->flags);
 			sbefifo_get(sbefifo);
-			if (mod_timer(&client->dev->poll_timer, jiffies))
+			if (!queue_work(sbefifo_wq, &sbefifo->work.work))
 				sbefifo_put(sbefifo);
 		}
 	}
@@ -382,10 +384,11 @@ static struct sbefifo_xfr *sbefifo_next_xfr(struct sbefifo *sbefifo)
 	return NULL;
 }
 
-static void sbefifo_poll_timer(unsigned long data)
+static void sbefifo_worker(struct work_struct *work)
 {
 	static const unsigned long EOT_MASK = 0x000000ff;
-	struct sbefifo *sbefifo = (void *)data;
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct sbefifo *sbefifo = container_of(dwork, struct sbefifo, work);
 	struct sbefifo_buf *rbuf, *wbuf;
 	struct sbefifo_xfr *xfr, *tmp;
 	struct sbefifo_buf drain;
@@ -403,6 +406,7 @@ static void sbefifo_poll_timer(unsigned long data)
 
 	trace_sbefifo_begin_xfer(xfr);
 
+again:
 	rbuf = xfr->rbuf;
 	wbuf = xfr->wbuf;
 
@@ -424,9 +428,8 @@ static void sbefifo_poll_timer(unsigned long data)
 		devn = sbefifo_dev_nwwriteable(sts);
 		if (devn == 0) {
 			/* No open slot for write.  Reschedule. */
-			sbefifo->poll_timer.expires = jiffies +
-				SBEFIFO_RESCHEDULE;
-			add_timer(&sbefifo->poll_timer);
+			queue_delayed_work(sbefifo_wq, &sbefifo->work,
+					   SBEFIFO_RESCHEDULE);
 			goto out_unlock;
 		}
 
@@ -478,9 +481,8 @@ static void sbefifo_poll_timer(unsigned long data)
 			}
 
 			/* No data yet.  Reschedule. */
-			sbefifo->poll_timer.expires = jiffies +
-				SBEFIFO_RESCHEDULE;
-			add_timer(&sbefifo->poll_timer);
+			queue_delayed_work(sbefifo_wq, &sbefifo->work,
+					   SBEFIFO_RESCHEDULE);
 			goto out_unlock;
 		} else {
 			xfr->wait_data_timeout = 0;
@@ -528,10 +530,12 @@ static void sbefifo_poll_timer(unsigned long data)
 		}
 		INIT_LIST_HEAD(&sbefifo->xfrs);
 
-	} else if (eot && sbefifo_next_xfr(sbefifo)) {
-		sbefifo_get(sbefifo);
-		sbefifo->poll_timer.expires = jiffies;
-		add_timer(&sbefifo->poll_timer);
+	} else if (eot) {
+		xfr = sbefifo_next_xfr(sbefifo);
+		if (xfr) {
+			wake_up_interruptible(&sbefifo->wait);
+			goto again;
+		}
 	}
 
 	sbefifo_put(sbefifo);
@@ -652,7 +656,7 @@ static ssize_t sbefifo_read_common(struct sbefifo_client *client,
 		if (!test_bit(SBEFIFO_XFR_COMPLETE, &xfr->flags)) {
 			/* Fill the read buffer back up. */
 			sbefifo_get(sbefifo);
-			if (mod_timer(&client->dev->poll_timer, jiffies))
+			if (!queue_work(sbefifo_wq, &sbefifo->work.work))
 				sbefifo_put(sbefifo);
 		} else {
 			list_del(&xfr->client);
@@ -738,7 +742,7 @@ static ssize_t sbefifo_write_common(struct sbefifo_client *client,
 								 &n))) {
 			set_bit(SBEFIFO_XFR_CANCEL, &xfr->flags);
 			sbefifo_get(sbefifo);
-			if (mod_timer(&sbefifo->poll_timer, jiffies))
+			if (!queue_work(sbefifo_wq, &sbefifo->work.work))
 				sbefifo_put(sbefifo);
 
 			ret = -ERESTARTSYS;
@@ -758,7 +762,8 @@ static ssize_t sbefifo_write_common(struct sbefifo_client *client,
 			    n)) {
 				set_bit(SBEFIFO_XFR_CANCEL, &xfr->flags);
 				sbefifo_get(sbefifo);
-				if (mod_timer(&sbefifo->poll_timer, jiffies))
+				if (!queue_work(sbefifo_wq,
+						&sbefifo->work.work))
 					sbefifo_put(sbefifo);
 				ret = -EFAULT;
 				goto out;
@@ -783,7 +788,7 @@ static ssize_t sbefifo_write_common(struct sbefifo_client *client,
 
 		/* Drain the write buffer. */
 		sbefifo_get(sbefifo);
-		if (mod_timer(&client->dev->poll_timer, jiffies))
+		if (!queue_work(sbefifo_wq, &sbefifo->work.work))
 			sbefifo_put(sbefifo);
 	}
 
@@ -943,8 +948,7 @@ static int sbefifo_probe(struct device *dev)
 		 sbefifo->idx);
 
 	/* This bit of silicon doesn't offer any interrupts... */
-	setup_timer(&sbefifo->poll_timer, sbefifo_poll_timer,
-		    (unsigned long)sbefifo);
+	INIT_DELAYED_WORK(&sbefifo->work, sbefifo_worker);
 
 	sbefifo->mdev.minor = MISC_DYNAMIC_MINOR;
 	sbefifo->mdev.fops = &sbefifo_fops;
@@ -997,7 +1001,7 @@ static int sbefifo_remove(struct device *dev)
 
 	ida_simple_remove(&sbefifo_ida, sbefifo->idx);
 
-	if (del_timer_sync(&sbefifo->poll_timer))
+	if (cancel_delayed_work_sync(&sbefifo->work))
 		sbefifo_put(sbefifo);
 
 	sbefifo_put(sbefifo);
@@ -1025,11 +1029,17 @@ static struct fsi_driver sbefifo_drv = {
 
 static int sbefifo_init(void)
 {
+	sbefifo_wq = create_singlethread_workqueue("sbefifo");
+	if (!sbefifo_wq)
+		return -ENOMEM;
+
 	return fsi_driver_register(&sbefifo_drv);
 }
 
 static void sbefifo_exit(void)
 {
+	destroy_workqueue(sbefifo_wq);
+
 	fsi_driver_unregister(&sbefifo_drv);
 
 	ida_destroy(&sbefifo_ida);
-- 
2.14.1

  parent reply	other threads:[~2018-02-20  4:19 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-20  4:18 [PATCH linux dev-4.13 00/16] Locking fixes for FSI, SBEFIFO, OCC Andrew Jeffery
2018-02-20  4:18 ` [PATCH linux dev-4.13 01/16] dts: fsi: Make OCC description match expected hierarchy Andrew Jeffery
2018-02-22 20:25   ` Eddie James
2018-02-23  0:15     ` Andrew Jeffery
2018-02-20  4:18 ` [PATCH linux dev-4.13 02/16] hwmon (p9_sbe): Initialise device spin lock Andrew Jeffery
2018-02-22 20:26   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 03/16] fsi: sbefifo: Rename sbefifo_release_client() for consistency Andrew Jeffery
2018-02-22 20:28   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 04/16] fsi: sbefifo: Add tracing of transfers Andrew Jeffery
2018-02-22 20:28   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 05/16] fsi: gpio: Trace busy count Andrew Jeffery
2018-02-22 20:29   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 06/16] fsi: occ: Add tracepoints Andrew Jeffery
2018-02-22 20:29   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 07/16] fsi: sbefifo: don't delete canceled xfrs in write Andrew Jeffery
2018-02-20  4:18 ` [PATCH linux dev-4.13 08/16] hwmon (p9_sbe): Rename context variable Andrew Jeffery
2018-02-22 20:30   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 09/16] hwmon (p9_sbe): Rename lock member of struct p9_sbe_occ Andrew Jeffery
2018-02-22 20:31   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 10/16] hwmon (p9_sbe): Convert client_lock from a spinlock to a mutex Andrew Jeffery
2018-02-22 20:31   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 11/16] hwmon (p9_sbe): Add static key to satisfy lockdep Andrew Jeffery
2018-02-22 20:32   ` Eddie James
2018-02-20  4:18 ` Andrew Jeffery [this message]
2018-02-20  4:18 ` [PATCH linux dev-4.13 13/16] fsi: sbefifo: Switch to mutex in work function Andrew Jeffery
2018-02-20  4:18 ` [PATCH linux dev-4.13 14/16] fsi: sbefifo: Switch list_lock from spinlock to mutex Andrew Jeffery
2018-02-22 20:35   ` Eddie James
2018-02-20  4:18 ` [PATCH linux dev-4.13 15/16] fsi: gpio: Remove unused 'id' variable Andrew Jeffery
2018-02-20  4:18 ` [PATCH linux dev-4.13 16/16] fsi: gpio: Use a mutex to protect transfers Andrew Jeffery

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180220041844.13228-13-andrew@aj.id.au \
    --to=andrew@aj.id.au \
    --cc=bradleyb@fuzziesquirrel.com \
    --cc=cbostic@linux.vnet.ibm.com \
    --cc=eajames@linux.vnet.ibm.com \
    --cc=eajames@us.ibm.com \
    --cc=jk@ozlabs.org \
    --cc=joel@jms.id.au \
    --cc=openbmc@lists.ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.