All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
@ 2012-10-15 15:36 Konstantin Dorfman
  2012-10-21 23:02 ` Per Forlin
  0 siblings, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-15 15:36 UTC (permalink / raw)
  To: cjb; +Cc: linux-mmc, per.lkml, Konstantin Dorfman

The main assumption of the async request design is that the file
system adds block requests to the block device queue asynchronously
without waiting for completion (see the Rationale section of
https://wiki.linaro.org/WorkingGroups/Kernel/Specs
/StoragePerfMMC-async-req).

We found out that in case of sequential read operations this is not
the case, due to the read ahead mechanism.
When mmcqt reports on completion of a request there should be
a context switch to allow the insertion of the next read ahead BIOs
to the block layer. Since the mmcqd tries to fetch another request
immediately after the completion of the previous request it gets NULL
and starts waiting for the completion of the previous request.
This wait on completion gives the FS the opportunity to insert the next
request but the MMC layer is already blocked on the previous request
completion and is not aware of the new request waiting to be fetched.

This patch fixes the above behavior and allows the async request mechanism
to work in case of sequential read scenarios.
The main idea is to replace the blocking wait for a completion with an
event driven mechanism and add a new event of new_request.
When the block layer notifies the MMC layer on a new request, we check
for the above case where MMC layer is waiting on a previous request
completion and the current request is NULL.
In such a case the new_request event will be triggered to wakeup
the waiting thread. MMC layer will then fetch the new request
and after its preparation will go back to waiting on the completion.

Our tests showed that this fix improves the read sequential throughput
by 16%.

Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 172a768..4d6431b 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -112,17 +112,6 @@ struct mmc_blk_data {
 
 static DEFINE_MUTEX(open_lock);
 
-enum mmc_blk_status {
-	MMC_BLK_SUCCESS = 0,
-	MMC_BLK_PARTIAL,
-	MMC_BLK_CMD_ERR,
-	MMC_BLK_RETRY,
-	MMC_BLK_ABORT,
-	MMC_BLK_DATA_ERR,
-	MMC_BLK_ECC_ERR,
-	MMC_BLK_NOMEDIUM,
-};
-
 module_param(perdev_minors, int, 0444);
 MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
 
@@ -1224,6 +1213,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 	}
 
 	mqrq->mmc_active.mrq = &brq->mrq;
+	mqrq->mmc_active.mrq->sync_data = &mq->sync_data;
 	mqrq->mmc_active.err_check = mmc_blk_err_check;
 
 	mmc_queue_bounce_pre(mqrq);
@@ -1284,9 +1274,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			areq = &mq->mqrq_cur->mmc_active;
 		} else
 			areq = NULL;
-		areq = mmc_start_req(card->host, areq, (int *) &status);
-		if (!areq)
+		areq = mmc_start_data_req(card->host, areq, (int *)&status);
+		if (!areq) {
+			if (status == MMC_BLK_NEW_REQUEST)
+				return status;
 			return 0;
+		}
 
 		mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
 		brq = &mq_rq->brq;
@@ -1295,6 +1288,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 		mmc_queue_bounce_post(mq_rq);
 
 		switch (status) {
+		case MMC_BLK_NEW_REQUEST:
+			BUG_ON(1); /* should never get here */
 		case MMC_BLK_SUCCESS:
 		case MMC_BLK_PARTIAL:
 			/*
@@ -1367,7 +1362,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 			 * prepare it again and resend.
 			 */
 			mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
-			mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
+			mmc_start_data_req(card->host, &mq_rq->mmc_active,
+				(int *)&status);
 		}
 	} while (ret);
 
@@ -1382,7 +1378,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
  start_new_req:
 	if (rqc) {
 		mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
-		mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
+		mmc_start_data_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
 	}
 
 	return 0;
@@ -1426,9 +1422,10 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 	}
 
 out:
-	if (!req)
+	if (!req && (ret != MMC_BLK_NEW_REQUEST))
 		/* release host only when there are no more requests */
 		mmc_release_host(card->host);
+
 	return ret;
 }
 
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index e360a97..65cecf2 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -67,7 +67,8 @@ static int mmc_queue_thread(void *d)
 
 		if (req || mq->mqrq_prev->req) {
 			set_current_state(TASK_RUNNING);
-			mq->issue_fn(mq, req);
+			if (mq->issue_fn(mq, req) == MMC_BLK_NEW_REQUEST)
+				continue; /* fetch again */
 		} else {
 			if (kthread_should_stop()) {
 				set_current_state(TASK_RUNNING);
@@ -98,6 +99,7 @@ static int mmc_queue_thread(void *d)
  */
 static void mmc_request_fn(struct request_queue *q)
 {
+	unsigned long flags;
 	struct mmc_queue *mq = q->queuedata;
 	struct request *req;
 
@@ -108,9 +110,25 @@ static void mmc_request_fn(struct request_queue *q)
 		}
 		return;
 	}
-
-	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
-		wake_up_process(mq->thread);
+	if (!mq->mqrq_cur->req && mq->mqrq_prev->req) {
+		/*
+		 * New MMC request arrived when MMC thread may be
+		 * blocked on the previous request to be complete
+		 * with no current request fetched
+		 */
+		mq->sync_data.should_skip_awake = false;
+		/* critical section with mmc_wait_data_req_done() */
+		spin_lock_irqsave(&mq->sync_data.lock, flags);
+		/* do stop flow only when mmc thread is waiting for done */
+		if (mq->sync_data.is_waiting &&
+				!mq->sync_data.is_new_req &&
+				!mq->sync_data.should_skip_awake) {
+			mq->sync_data.is_new_req = true;
+			wake_up_interruptible(&mq->sync_data.wait);
+		}
+		spin_unlock_irqrestore(&mq->sync_data.lock, flags);
+	} else if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
+			wake_up_process(mq->thread);
 }
 
 static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
@@ -259,6 +277,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
 	}
 
 	sema_init(&mq->thread_sem, 1);
+	spin_lock_init(&mq->sync_data.lock);
+	mq->sync_data.should_skip_awake = false;
+	mq->sync_data.is_new_req = false;
+	mq->sync_data.is_done_rcv = false;
+	mq->sync_data.is_waiting = false;
+	init_waitqueue_head(&mq->sync_data.wait);
 
 	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d%s",
 		host->index, subname ? subname : "");
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index d2a1eb4..bcccfd6 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -33,6 +33,7 @@ struct mmc_queue {
 	struct mmc_queue_req	mqrq[2];
 	struct mmc_queue_req	*mqrq_cur;
 	struct mmc_queue_req	*mqrq_prev;
+	struct mmc_sync_data	sync_data;
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 06c42cf..b93ea31 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -316,11 +316,49 @@ out:
 }
 EXPORT_SYMBOL(mmc_start_bkops);
 
+/*
+ * mmc_wait_data_done() - done callback for data request
+ * @mrq: done data request
+ *
+ * Wakes up mmc context, passed as callback to host controller driver
+ */
+static void mmc_wait_data_done(struct mmc_request *mrq)
+{
+	unsigned long flags;
+
+	/* critical section with  blk layer notifications */
+	spin_lock_irqsave(&mrq->sync_data->lock, flags);
+	mrq->sync_data->should_skip_awake = true;
+	mrq->sync_data->is_done_rcv = true;
+	wake_up_interruptible(&mrq->sync_data->wait);
+	spin_unlock_irqrestore(&mrq->sync_data->lock, flags);
+}
+
 static void mmc_wait_done(struct mmc_request *mrq)
 {
 	complete(&mrq->completion);
 }
 
+/*
+ *__mmc_start_data_req() - starts data request
+ * @host: MMC host to start the request
+ * @mrq: data request to start
+ *
+ * Fills done callback that will be used when request are done by card.
+ * Starts data mmc request execution
+ */
+static int __mmc_start_data_req(struct mmc_host *host, struct mmc_request *mrq)
+{
+	mrq->done = mmc_wait_data_done;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		return -ENOMEDIUM;
+	}
+	mmc_start_request(host, mrq);
+
+	return 0;
+}
+
 static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 {
 	init_completion(&mrq->completion);
@@ -334,6 +372,64 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
 	return 0;
 }
 
+/*
+ * mmc_wait_for_data_req_done() - wait for request completed or new
+ *				  request notification arrives
+ * @host: MMC host to prepare the command.
+ * @mrq: MMC request to wait for
+ *
+ * Blocks MMC context till host controller will ack end of data request
+ * execution or new request arrives from block layer. Handles
+ * command retries.
+ *
+ * Returns enum mmc_blk_status after checking errors.
+ */
+static int mmc_wait_for_data_req_done(struct mmc_host *host,
+				      struct mmc_request *mrq)
+{
+	struct mmc_command *cmd;
+	struct mmc_sync_data *sync_data = mrq->sync_data;
+	bool is_done_rcv = false;
+	bool is_new_req = false;
+	int err;
+
+	while (1) {
+		sync_data->is_waiting = true;
+		sync_data->is_new_req = false;
+		wait_event_interruptible(sync_data->wait,
+				(sync_data->is_done_rcv ||
+				 sync_data->is_new_req));
+		sync_data->is_waiting = false;
+		is_done_rcv = sync_data->is_done_rcv;
+		is_new_req = sync_data->is_new_req;
+		if (is_done_rcv) {
+			sync_data->is_done_rcv = false;
+			sync_data->is_new_req = false;
+			cmd = mrq->cmd;
+			if (!cmd->error || !cmd->retries ||
+					mmc_card_removed(host->card)) {
+				err = host->areq->err_check(host->card,
+						host->areq);
+				break; /* return err */
+			} else {
+				pr_info("%s: req failed (CMD%u): %d, retrying...\n",
+						mmc_hostname(host),
+						cmd->opcode, cmd->error);
+				cmd->retries--;
+				cmd->error = 0;
+				host->ops->request(host, mrq);
+				sync_data->is_done_rcv = false;
+				continue; /* wait for done/new event again */
+			}
+		} else if (is_new_req) {
+			sync_data->is_new_req = false;
+			err = MMC_BLK_NEW_REQUEST;
+			break; /* return err */
+		}
+	} /* while */
+	return err;
+}
+
 static void mmc_wait_for_req_done(struct mmc_host *host,
 				  struct mmc_request *mrq)
 {
@@ -396,6 +492,64 @@ static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
 }
 
 /**
+ * mmc_start_data_req - start a non-blocking data request
+ * @host: MMC host to start the command
+ * @areq: async request to start
+ * @error: out parameter; returns 0 for success, otherwise non zero
+ *
+ * Wait for the ongoing request (previoulsy started) to complete and
+ * return the completed request. If there is no ongoing request, NULL
+ * is returned without waiting. NULL is not an error condition.
+ */
+struct mmc_async_req *mmc_start_data_req(struct mmc_host *host,
+					 struct mmc_async_req *areq, int *error)
+{
+	int err = 0;
+	int start_err = 0;
+	struct mmc_async_req *data = host->areq;
+
+	/* Prepare a new request */
+	if (areq)
+		mmc_pre_req(host, areq->mrq, !host->areq);
+
+	if (host->areq) {
+		err = mmc_wait_for_data_req_done(host, host->areq->mrq);
+		if (err == MMC_BLK_NEW_REQUEST) {
+			if (areq)
+				pr_err("%s: new request while areq = %p",
+						mmc_hostname(host), areq);
+			*error = err;
+			/*
+			 * The previous request was not completed,
+			 * nothing to return
+			 */
+			return NULL;
+		}
+	}
+
+	if (!err && areq)
+		start_err = __mmc_start_data_req(host, areq->mrq);
+
+	if (host->areq)
+		mmc_post_req(host, host->areq->mrq, 0);
+
+	/* Cancel a prepared request if it was not started. */
+	if ((err || start_err) && areq)
+		mmc_post_req(host, areq->mrq, -EINVAL);
+
+	if (err)
+		host->areq = NULL;
+	else
+		host->areq = areq;
+
+	if (error)
+		*error = err;
+
+	return data;
+}
+EXPORT_SYMBOL(mmc_start_data_req);
+
+/**
  *	mmc_start_req - start a non-blocking request
  *	@host: MMC host to start command
  *	@areq: async request to start
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 943550d..9681d8f 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -186,6 +186,18 @@ struct sdio_func_tuple;
 
 #define SDIO_MAX_FUNCS		7
 
+enum mmc_blk_status {
+	MMC_BLK_SUCCESS = 0,
+	MMC_BLK_PARTIAL,
+	MMC_BLK_CMD_ERR,
+	MMC_BLK_RETRY,
+	MMC_BLK_ABORT,
+	MMC_BLK_DATA_ERR,
+	MMC_BLK_ECC_ERR,
+	MMC_BLK_NOMEDIUM,
+	MMC_BLK_NEW_REQUEST,
+};
+
 /* The number of MMC physical partitions.  These consist of:
  * boot partitions (2), general purpose partitions (4) in MMC v4.4.
  */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 9b9cdaf..fdf4dac 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -120,6 +120,24 @@ struct mmc_data {
 	s32			host_cookie;	/* host private data */
 };
 
+/**
+ * mmc_sync_data - synchronization details for mmc context
+ * @is_done_rcv		wake up reason was done request
+ * @is_new_req	wake up reason was new request
+ * @should_skip_awake	done arrived, no need in wake up flag
+ * @is_waiting	mmc context in waiting state flag
+ * @wait		wait queue
+ * @lock		spinlock to protect this struct
+ */
+struct mmc_sync_data {
+	bool			is_done_rcv;
+	bool			is_new_req;
+	bool			should_skip_awake;
+	bool			is_waiting;
+	wait_queue_head_t	wait;
+	spinlock_t		lock;
+};
+
 struct mmc_request {
 	struct mmc_command	*sbc;		/* SET_BLOCK_COUNT for multiblock */
 	struct mmc_command	*cmd;
@@ -128,6 +146,7 @@ struct mmc_request {
 
 	struct completion	completion;
 	void			(*done)(struct mmc_request *);/* completion function */
+	struct mmc_sync_data    *sync_data;
 };
 
 struct mmc_host;
@@ -138,6 +157,8 @@ extern int mmc_stop_bkops(struct mmc_card *);
 extern int mmc_read_bkops_status(struct mmc_card *);
 extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
 					   struct mmc_async_req *, int *);
+extern struct mmc_async_req *mmc_start_data_req(struct mmc_host *,
+					   struct mmc_async_req *, int *);
 extern int mmc_interrupt_hpi(struct mmc_card *);
 extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
 extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
-- 
1.7.6
--
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-15 15:36 [PATCH v1] mmc: fix async request mechanism for sequential read scenarios Konstantin Dorfman
@ 2012-10-21 23:02 ` Per Forlin
  0 siblings, 0 replies; 25+ messages in thread
From: Per Forlin @ 2012-10-21 23:02 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: cjb, linux-mmc

On Mon, Oct 15, 2012 at 5:36 PM, Konstantin Dorfman
<kdorfman@codeaurora.org> wrote:
> The main assumption of the async request design is that the file
> system adds block requests to the block device queue asynchronously
> without waiting for completion (see the Rationale section of
> https://wiki.linaro.org/WorkingGroups/Kernel/Specs
> /StoragePerfMMC-async-req).
>
> We found out that in case of sequential read operations this is not
> the case, due to the read ahead mechanism.
> When mmcqt reports on completion of a request there should be
> a context switch to allow the insertion of the next read ahead BIOs
> to the block layer. Since the mmcqd tries to fetch another request
> immediately after the completion of the previous request it gets NULL
> and starts waiting for the completion of the previous request.
> This wait on completion gives the FS the opportunity to insert the next
> request but the MMC layer is already blocked on the previous request
> completion and is not aware of the new request waiting to be fetched.
I thought that I could trigger a context switch in order to give
execution time for FS to add the new request to the MMC queue.
I made a simple hack to call yield() in case the request gets NULL. I
thought it may give the FS layer enough time to add a new request to
the MMC queue. This would not delay the MMC transfer since the yield()
is done in parallel with an ongoing transfer. Anyway it was just meant
to be a simple test.

One yield was not enough. Just for sanity check I added a msleep as
well and that was enough to let FS add a new request,
Would it be possible to gain throughput by delaying the fetch of new
request? Too avoid unnecessary NULL requests

If (ongoing request is read AND size is max read ahead AND new request
is NULL) yield();

BR
Per

>
> This patch fixes the above behavior and allows the async request mechanism
> to work in case of sequential read scenarios.
> The main idea is to replace the blocking wait for a completion with an
> event driven mechanism and add a new event of new_request.
> When the block layer notifies the MMC layer on a new request, we check
> for the above case where MMC layer is waiting on a previous request
> completion and the current request is NULL.
> In such a case the new_request event will be triggered to wakeup
> the waiting thread. MMC layer will then fetch the new request
> and after its preparation will go back to waiting on the completion.
>
> Our tests showed that this fix improves the read sequential throughput
> by 16%.
>
> Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>
>
> diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
> index 172a768..4d6431b 100644
> --- a/drivers/mmc/card/block.c
> +++ b/drivers/mmc/card/block.c
> @@ -112,17 +112,6 @@ struct mmc_blk_data {
>
>  static DEFINE_MUTEX(open_lock);
>
> -enum mmc_blk_status {
> -       MMC_BLK_SUCCESS = 0,
> -       MMC_BLK_PARTIAL,
> -       MMC_BLK_CMD_ERR,
> -       MMC_BLK_RETRY,
> -       MMC_BLK_ABORT,
> -       MMC_BLK_DATA_ERR,
> -       MMC_BLK_ECC_ERR,
> -       MMC_BLK_NOMEDIUM,
> -};
> -
>  module_param(perdev_minors, int, 0444);
>  MODULE_PARM_DESC(perdev_minors, "Minors numbers to allocate per device");
>
> @@ -1224,6 +1213,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
>         }
>
>         mqrq->mmc_active.mrq = &brq->mrq;
> +       mqrq->mmc_active.mrq->sync_data = &mq->sync_data;
>         mqrq->mmc_active.err_check = mmc_blk_err_check;
>
>         mmc_queue_bounce_pre(mqrq);
> @@ -1284,9 +1274,12 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
>                         areq = &mq->mqrq_cur->mmc_active;
>                 } else
>                         areq = NULL;
> -               areq = mmc_start_req(card->host, areq, (int *) &status);
> -               if (!areq)
> +               areq = mmc_start_data_req(card->host, areq, (int *)&status);
> +               if (!areq) {
> +                       if (status == MMC_BLK_NEW_REQUEST)
> +                               return status;
>                         return 0;
> +               }
>
>                 mq_rq = container_of(areq, struct mmc_queue_req, mmc_active);
>                 brq = &mq_rq->brq;
> @@ -1295,6 +1288,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
>                 mmc_queue_bounce_post(mq_rq);
>
>                 switch (status) {
> +               case MMC_BLK_NEW_REQUEST:
> +                       BUG_ON(1); /* should never get here */
>                 case MMC_BLK_SUCCESS:
>                 case MMC_BLK_PARTIAL:
>                         /*
> @@ -1367,7 +1362,8 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
>                          * prepare it again and resend.
>                          */
>                         mmc_blk_rw_rq_prep(mq_rq, card, disable_multi, mq);
> -                       mmc_start_req(card->host, &mq_rq->mmc_active, NULL);
> +                       mmc_start_data_req(card->host, &mq_rq->mmc_active,
> +                               (int *)&status);
>                 }
>         } while (ret);
>
> @@ -1382,7 +1378,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
>   start_new_req:
>         if (rqc) {
>                 mmc_blk_rw_rq_prep(mq->mqrq_cur, card, 0, mq);
> -               mmc_start_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
> +               mmc_start_data_req(card->host, &mq->mqrq_cur->mmc_active, NULL);
>         }
>
>         return 0;
> @@ -1426,9 +1422,10 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
>         }
>
>  out:
> -       if (!req)
> +       if (!req && (ret != MMC_BLK_NEW_REQUEST))
>                 /* release host only when there are no more requests */
>                 mmc_release_host(card->host);
> +
>         return ret;
>  }
>
> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
> index e360a97..65cecf2 100644
> --- a/drivers/mmc/card/queue.c
> +++ b/drivers/mmc/card/queue.c
> @@ -67,7 +67,8 @@ static int mmc_queue_thread(void *d)
>
>                 if (req || mq->mqrq_prev->req) {
>                         set_current_state(TASK_RUNNING);
> -                       mq->issue_fn(mq, req);
> +                       if (mq->issue_fn(mq, req) == MMC_BLK_NEW_REQUEST)
> +                               continue; /* fetch again */
>                 } else {
>                         if (kthread_should_stop()) {
>                                 set_current_state(TASK_RUNNING);
> @@ -98,6 +99,7 @@ static int mmc_queue_thread(void *d)
>   */
>  static void mmc_request_fn(struct request_queue *q)
>  {
> +       unsigned long flags;
>         struct mmc_queue *mq = q->queuedata;
>         struct request *req;
>
> @@ -108,9 +110,25 @@ static void mmc_request_fn(struct request_queue *q)
>                 }
>                 return;
>         }
> -
> -       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
> -               wake_up_process(mq->thread);
> +       if (!mq->mqrq_cur->req && mq->mqrq_prev->req) {
> +               /*
> +                * New MMC request arrived when MMC thread may be
> +                * blocked on the previous request to be complete
> +                * with no current request fetched
> +                */
> +               mq->sync_data.should_skip_awake = false;
> +               /* critical section with mmc_wait_data_req_done() */
> +               spin_lock_irqsave(&mq->sync_data.lock, flags);
> +               /* do stop flow only when mmc thread is waiting for done */
> +               if (mq->sync_data.is_waiting &&
> +                               !mq->sync_data.is_new_req &&
> +                               !mq->sync_data.should_skip_awake) {
> +                       mq->sync_data.is_new_req = true;
> +                       wake_up_interruptible(&mq->sync_data.wait);
> +               }
> +               spin_unlock_irqrestore(&mq->sync_data.lock, flags);
> +       } else if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
> +                       wake_up_process(mq->thread);
>  }
>
>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
> @@ -259,6 +277,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
>         }
>
>         sema_init(&mq->thread_sem, 1);
> +       spin_lock_init(&mq->sync_data.lock);
> +       mq->sync_data.should_skip_awake = false;
> +       mq->sync_data.is_new_req = false;
> +       mq->sync_data.is_done_rcv = false;
> +       mq->sync_data.is_waiting = false;
> +       init_waitqueue_head(&mq->sync_data.wait);
>
>         mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d%s",
>                 host->index, subname ? subname : "");
> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
> index d2a1eb4..bcccfd6 100644
> --- a/drivers/mmc/card/queue.h
> +++ b/drivers/mmc/card/queue.h
> @@ -33,6 +33,7 @@ struct mmc_queue {
>         struct mmc_queue_req    mqrq[2];
>         struct mmc_queue_req    *mqrq_cur;
>         struct mmc_queue_req    *mqrq_prev;
> +       struct mmc_sync_data    sync_data;
>  };
>
>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
> index 06c42cf..b93ea31 100644
> --- a/drivers/mmc/core/core.c
> +++ b/drivers/mmc/core/core.c
> @@ -316,11 +316,49 @@ out:
>  }
>  EXPORT_SYMBOL(mmc_start_bkops);
>
> +/*
> + * mmc_wait_data_done() - done callback for data request
> + * @mrq: done data request
> + *
> + * Wakes up mmc context, passed as callback to host controller driver
> + */
> +static void mmc_wait_data_done(struct mmc_request *mrq)
> +{
> +       unsigned long flags;
> +
> +       /* critical section with  blk layer notifications */
> +       spin_lock_irqsave(&mrq->sync_data->lock, flags);
> +       mrq->sync_data->should_skip_awake = true;
> +       mrq->sync_data->is_done_rcv = true;
> +       wake_up_interruptible(&mrq->sync_data->wait);
> +       spin_unlock_irqrestore(&mrq->sync_data->lock, flags);
> +}
> +
>  static void mmc_wait_done(struct mmc_request *mrq)
>  {
>         complete(&mrq->completion);
>  }
>
> +/*
> + *__mmc_start_data_req() - starts data request
> + * @host: MMC host to start the request
> + * @mrq: data request to start
> + *
> + * Fills done callback that will be used when request are done by card.
> + * Starts data mmc request execution
> + */
> +static int __mmc_start_data_req(struct mmc_host *host, struct mmc_request *mrq)
> +{
> +       mrq->done = mmc_wait_data_done;
> +       if (mmc_card_removed(host->card)) {
> +               mrq->cmd->error = -ENOMEDIUM;
> +               return -ENOMEDIUM;
> +       }
> +       mmc_start_request(host, mrq);
> +
> +       return 0;
> +}
> +
>  static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
>  {
>         init_completion(&mrq->completion);
> @@ -334,6 +372,64 @@ static int __mmc_start_req(struct mmc_host *host, struct mmc_request *mrq)
>         return 0;
>  }
>
> +/*
> + * mmc_wait_for_data_req_done() - wait for request completed or new
> + *                               request notification arrives
> + * @host: MMC host to prepare the command.
> + * @mrq: MMC request to wait for
> + *
> + * Blocks MMC context till host controller will ack end of data request
> + * execution or new request arrives from block layer. Handles
> + * command retries.
> + *
> + * Returns enum mmc_blk_status after checking errors.
> + */
> +static int mmc_wait_for_data_req_done(struct mmc_host *host,
> +                                     struct mmc_request *mrq)
> +{
> +       struct mmc_command *cmd;
> +       struct mmc_sync_data *sync_data = mrq->sync_data;
> +       bool is_done_rcv = false;
> +       bool is_new_req = false;
> +       int err;
> +
> +       while (1) {
> +               sync_data->is_waiting = true;
> +               sync_data->is_new_req = false;
> +               wait_event_interruptible(sync_data->wait,
> +                               (sync_data->is_done_rcv ||
> +                                sync_data->is_new_req));
> +               sync_data->is_waiting = false;
> +               is_done_rcv = sync_data->is_done_rcv;
> +               is_new_req = sync_data->is_new_req;
> +               if (is_done_rcv) {
> +                       sync_data->is_done_rcv = false;
> +                       sync_data->is_new_req = false;
> +                       cmd = mrq->cmd;
> +                       if (!cmd->error || !cmd->retries ||
> +                                       mmc_card_removed(host->card)) {
> +                               err = host->areq->err_check(host->card,
> +                                               host->areq);
> +                               break; /* return err */
> +                       } else {
> +                               pr_info("%s: req failed (CMD%u): %d, retrying...\n",
> +                                               mmc_hostname(host),
> +                                               cmd->opcode, cmd->error);
> +                               cmd->retries--;
> +                               cmd->error = 0;
> +                               host->ops->request(host, mrq);
> +                               sync_data->is_done_rcv = false;
> +                               continue; /* wait for done/new event again */
> +                       }
> +               } else if (is_new_req) {
> +                       sync_data->is_new_req = false;
> +                       err = MMC_BLK_NEW_REQUEST;
> +                       break; /* return err */
> +               }
> +       } /* while */
> +       return err;
> +}
> +
>  static void mmc_wait_for_req_done(struct mmc_host *host,
>                                   struct mmc_request *mrq)
>  {
> @@ -396,6 +492,64 @@ static void mmc_post_req(struct mmc_host *host, struct mmc_request *mrq,
>  }
>
>  /**
> + * mmc_start_data_req - start a non-blocking data request
> + * @host: MMC host to start the command
> + * @areq: async request to start
> + * @error: out parameter; returns 0 for success, otherwise non zero
> + *
> + * Wait for the ongoing request (previoulsy started) to complete and
> + * return the completed request. If there is no ongoing request, NULL
> + * is returned without waiting. NULL is not an error condition.
> + */
> +struct mmc_async_req *mmc_start_data_req(struct mmc_host *host,
> +                                        struct mmc_async_req *areq, int *error)
> +{
> +       int err = 0;
> +       int start_err = 0;
> +       struct mmc_async_req *data = host->areq;
> +
> +       /* Prepare a new request */
> +       if (areq)
> +               mmc_pre_req(host, areq->mrq, !host->areq);
> +
> +       if (host->areq) {
> +               err = mmc_wait_for_data_req_done(host, host->areq->mrq);
> +               if (err == MMC_BLK_NEW_REQUEST) {
> +                       if (areq)
> +                               pr_err("%s: new request while areq = %p",
> +                                               mmc_hostname(host), areq);
> +                       *error = err;
> +                       /*
> +                        * The previous request was not completed,
> +                        * nothing to return
> +                        */
> +                       return NULL;
> +               }
> +       }
> +
> +       if (!err && areq)
> +               start_err = __mmc_start_data_req(host, areq->mrq);
> +
> +       if (host->areq)
> +               mmc_post_req(host, host->areq->mrq, 0);
> +
> +       /* Cancel a prepared request if it was not started. */
> +       if ((err || start_err) && areq)
> +               mmc_post_req(host, areq->mrq, -EINVAL);
> +
> +       if (err)
> +               host->areq = NULL;
> +       else
> +               host->areq = areq;
> +
> +       if (error)
> +               *error = err;
> +
> +       return data;
> +}
> +EXPORT_SYMBOL(mmc_start_data_req);
> +
> +/**
>   *     mmc_start_req - start a non-blocking request
>   *     @host: MMC host to start command
>   *     @areq: async request to start
> diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
> index 943550d..9681d8f 100644
> --- a/include/linux/mmc/card.h
> +++ b/include/linux/mmc/card.h
> @@ -186,6 +186,18 @@ struct sdio_func_tuple;
>
>  #define SDIO_MAX_FUNCS         7
>
> +enum mmc_blk_status {
> +       MMC_BLK_SUCCESS = 0,
> +       MMC_BLK_PARTIAL,
> +       MMC_BLK_CMD_ERR,
> +       MMC_BLK_RETRY,
> +       MMC_BLK_ABORT,
> +       MMC_BLK_DATA_ERR,
> +       MMC_BLK_ECC_ERR,
> +       MMC_BLK_NOMEDIUM,
> +       MMC_BLK_NEW_REQUEST,
> +};
> +
>  /* The number of MMC physical partitions.  These consist of:
>   * boot partitions (2), general purpose partitions (4) in MMC v4.4.
>   */
> diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
> index 9b9cdaf..fdf4dac 100644
> --- a/include/linux/mmc/core.h
> +++ b/include/linux/mmc/core.h
> @@ -120,6 +120,24 @@ struct mmc_data {
>         s32                     host_cookie;    /* host private data */
>  };
>
> +/**
> + * mmc_sync_data - synchronization details for mmc context
> + * @is_done_rcv                wake up reason was done request
> + * @is_new_req wake up reason was new request
> + * @should_skip_awake  done arrived, no need in wake up flag
> + * @is_waiting mmc context in waiting state flag
> + * @wait               wait queue
> + * @lock               spinlock to protect this struct
> + */
> +struct mmc_sync_data {
> +       bool                    is_done_rcv;
> +       bool                    is_new_req;
> +       bool                    should_skip_awake;
> +       bool                    is_waiting;
> +       wait_queue_head_t       wait;
> +       spinlock_t              lock;
> +};
> +
>  struct mmc_request {
>         struct mmc_command      *sbc;           /* SET_BLOCK_COUNT for multiblock */
>         struct mmc_command      *cmd;
> @@ -128,6 +146,7 @@ struct mmc_request {
>
>         struct completion       completion;
>         void                    (*done)(struct mmc_request *);/* completion function */
> +       struct mmc_sync_data    *sync_data;
>  };
>
>  struct mmc_host;
> @@ -138,6 +157,8 @@ extern int mmc_stop_bkops(struct mmc_card *);
>  extern int mmc_read_bkops_status(struct mmc_card *);
>  extern struct mmc_async_req *mmc_start_req(struct mmc_host *,
>                                            struct mmc_async_req *, int *);
> +extern struct mmc_async_req *mmc_start_data_req(struct mmc_host *,
> +                                          struct mmc_async_req *, int *);
>  extern int mmc_interrupt_hpi(struct mmc_card *);
>  extern void mmc_wait_for_req(struct mmc_host *, struct mmc_request *);
>  extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int);
> --
> 1.7.6
> --
> Konstantin Dorfman,
> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-19 21:34                         ` Per Förlin
  2012-11-20 16:26                           ` Konstantin Dorfman
@ 2012-11-26 15:28                           ` Konstantin Dorfman
  1 sibling, 0 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-11-26 15:28 UTC (permalink / raw)
  To: Per Förlin; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

[-- Attachment #1: Type: text/plain, Size: 3052 bytes --]

Hello,

On 11/19/2012 11:34 PM, Per Förlin wrote:
> On 11/19/2012 03:32 PM, Per Förlin wrote:
>> On 11/19/2012 10:48 AM, Konstantin Dorfman wrote:
>>
> I'm fine with wait_event() (block request transfers) combined with completion (for other transfer), as long as the core.c API is intact. I don't see a reason for a new start_data_req()
>
> mmc_start_req() is only used by the mmc block transfers.
> Making a test case in mmc_test.c is a good way to stress test the feature (e.g. random timing of incoming new requests) and it will show how the API works too.
>
> BR
> Per
>
Right now there are couple of tests in mmc_test module that use async 
request mechanism. After applying my fix (v3 mmc: fix async request 
mechanism for sequential read scenarios), these test were broken.

The patch attached fixes those broken tests and also you can see exactly 
what is API change. It is not in mmc_start_req() signature, it is new 
context_info field that we need to synchronize with NEW_REQUEST 
notification. mmc_test is not uses the notification, but it is very easy 
to implement.


>> My main concern is to avoid adding new API to core.c in order to add the NEW_REQ feature. My patch is an example of changes to be done in core.c without adding new functions.
Now you can review API changes.

>>
>>>
>>> We would like to have a generic capability to handle additional events,
>>> such as HPI/stop flow, in addition to the NEW_REQUEST notification.
>>> Therefore, an event mechanism seems to be a better design than completion.
>>>
>>> I've looked at SDIO code and from what I can understand, right now SDIO
>>> is not using async request mechanism and works from 'wait_for_cmd()`
>>> level. This means that such work as exposing MMC core API's is major
>>> change and definitely should be separate design and implementation
>>> effort, while my current patch right now will fix mmc thread behavior
>>> and give better performance for upper layers.
>> You are right. There is no support in the SDIO implementation for pre/post/async feature. Still I had it in mind design the "struct mmc_async". It's possible to re-use the mmc core parts in order to utilize this support in the SDIO case. I'm not saying we should design for SDIO but at least keep the API in a way that it's potential usable from SDIO too. The API of core.c (start/wait of requests) should make sense without the existence of MMC block driver (block/queue).
>>
>> One way to test the design is to add a test in mmc_test.c for penging NEW_REQ. In mmc_test.c there is not block.c or queue.c.
>> If the test case if simple to write in mmc_test.c it's means that the API is on a good level.
I can simulate single NEW_PACKET notification in the mmc_test, but this 
will check only correctness, without real queue of requests we can't 
see/measure tpt/latency gain of this.

Kind reminder about patch v3, waiting for your review.

Thanks

-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

[-- Attachment #2: 0001-mmc_test-fix-non-blocking-req-test-to-support-mmc-co.patch --]
[-- Type: text/x-diff, Size: 3064 bytes --]

>From 240fa5757e9c784679b391ba42ec58e70fe856d9 Mon Sep 17 00:00:00 2001
From: Konstantin Dorfman <kdorfman@codeaurora.org>
Date: Mon, 26 Nov 2012 11:50:35 +0200
Subject: [RFC/PATCH] mmc_test: fix non-blocking req test to support mmc core
 wait_event mechanism

After introduce new wait_event synchronization mechanism at mmc/core/core.c level,
non-blocking request tests from mmc_test ut module are broken.

This patch fixes the tests by updating test environment to work
correctly on top of new wait_event mechanism.

Signed-off-by: Konstantin Dorfman <kdorfman@codeaurora.org>

diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 759714e..764471f 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -764,7 +764,8 @@ static int mmc_test_check_broken_result(struct mmc_test_card *test,
 static void mmc_test_nonblock_reset(struct mmc_request *mrq,
 				    struct mmc_command *cmd,
 				    struct mmc_command *stop,
-				    struct mmc_data *data)
+				    struct mmc_data *data,
+				    struct mmc_context_info *context_info)
 {
 	memset(mrq, 0, sizeof(struct mmc_request));
 	memset(cmd, 0, sizeof(struct mmc_command));
@@ -774,6 +775,7 @@ static void mmc_test_nonblock_reset(struct mmc_request *mrq,
 	mrq->cmd = cmd;
 	mrq->data = data;
 	mrq->stop = stop;
+	mrq->context_info = context_info;
 }
 static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 				      struct scatterlist *sg, unsigned sg_len,
@@ -790,6 +792,8 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 	struct mmc_command stop2;
 	struct mmc_data data2;
 
+	struct mmc_context_info context_info;
+
 	struct mmc_test_async_req test_areq[2];
 	struct mmc_async_req *done_areq;
 	struct mmc_async_req *cur_areq = &test_areq[0].areq;
@@ -800,14 +804,18 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 	test_areq[0].test = test;
 	test_areq[1].test = test;
 
-	mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1);
-	mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2);
+	memset(&context_info, 0, sizeof(struct mmc_context_info));
+
+	mmc_test_nonblock_reset(&mrq1, &cmd1, &stop1, &data1, &context_info);
+	mmc_test_nonblock_reset(&mrq2, &cmd2, &stop2, &data2, &context_info);
 
 	cur_areq->mrq = &mrq1;
 	cur_areq->err_check = mmc_test_check_result_async;
 	other_areq->mrq = &mrq2;
 	other_areq->err_check = mmc_test_check_result_async;
 
+	spin_lock_init(&context_info.lock);
+	init_waitqueue_head(&context_info.wait);
 	for (i = 0; i < count; i++) {
 		mmc_test_prepare_mrq(test, cur_areq->mrq, sg, sg_len, dev_addr,
 				     blocks, blksz, write);
@@ -819,10 +827,10 @@ static int mmc_test_nonblock_transfer(struct mmc_test_card *test,
 		if (done_areq) {
 			if (done_areq->mrq == &mrq2)
 				mmc_test_nonblock_reset(&mrq2, &cmd2,
-							&stop2, &data2);
+							&stop2, &data2, &context_info);
 			else
 				mmc_test_nonblock_reset(&mrq1, &cmd1,
-							&stop1, &data1);
+							&stop1, &data1, &context_info);
 		}
 		done_areq = cur_areq;
 		cur_areq = other_areq;
-- 
1.7.6


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-20 16:26                           ` Konstantin Dorfman
@ 2012-11-20 18:57                             ` Konstantin Dorfman
  0 siblings, 0 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-11-20 18:57 UTC (permalink / raw)
  To: Konstantin Dorfman
  Cc: Per Förlin, Per Forlin, Venkatraman S, cjb, linux-mmc

Correction inside
On Tue, November 20, 2012 6:26 pm, Konstantin Dorfman wrote:
> Hello,
>
> On 11/19/2012 11:34 PM, Per Förlin wrote:
>
>>>>>>>        if (host->areq) {
>>>>>>> +             if (!areq)
>>>>>>> +                     mmc_prefetch_init(host->areq,
>>>>>>> +
>>>>>>> &host->areq->mrq->completion);
>>>>>>>                mmc_wait_for_req_done(host, host->areq->mrq);
>>>>>>> +             if (!areq) {
>>>>>>> +                     mmc_prefetch_start(host->areq, false);
>>>>>>> +                     if (mmc_prefetch_pending(host->areq))
>>>>>>> +                             return NULL;
>>>> In this case, mmc thread may be unblocked because done() arrived for
>>>> current request and not because new request notification. In such a
>>>> case
>>>> we would like the done request to be handled before fetching the new
>>>> request.
>>> I agree. We wake up and there is no pending new request execution
>>> should proceed normally by handling the completed request.
>>>
>>>> In my code is_done_rcv flag used along with is_new_req flag in
>>>> order to differentiate the reason for mmc thread awake.
>>> In my patch the return value of "mmc_prefetch_pending()" specifies if
>>> thee is a pending request.what should happen.
>>> If both current request is done and mmc_prefetch_pending is done at the
>>> same time I see there is a problem with my patch. There needs to be a
>>> flag to indicate if current request is done.
> There is race between done() and NEW_REQUEST events, also when we got
> both of them, the order is to complete current and then to fetch new.
>
>
>>>>>> I understand your motivation and idea for re-structure the patch. It
>>>>>> is
>>>>>> still not clear for me how exactly mmc thread will be awaken on new
>>>>>> request notification in your version, but I have another problem:
>>>>>>
>>>>> mmc_request_fn() is called and it calls
>>>>> complete(mq->prefetch_completion) which wakes up the current thread.
>>>>> My patch is just an example. The intention is to make the patch
>>>>> cleaner. But I may have missed some of the HPI aspects.
>>>>
>>>> Is it the lack of functions wrappers that you are using in your
>>>> example?
>>> It's fine to skip the functions wrappers if it makes no sense.
>>> What I want to avoid is to create new functions for data_req_start and
>>> data_req_wait.
>>> I would rather add this to the existing functions in core.c and keep
>>> changes in block.c and the flow minimal.
>>>
>>>> As I understand your example, you mean to implement generic logic on
>>>> core/core.c level by using wrapper functions and leave final
>>>> implementation for MMC to card/queue.c and for SDIO layer to
>>>> card/..sdio.. (I'm not too familiar with sdio protocol
>>>> implementation).
>>>> Well, it is make a lot of sense.
>>>>
>>> I still have "plans" to add pre/post/async support in the SDIO
>>> framework too but I have not got to it.
>>> I would be nice to add your NEW_REQ feature along with the other
>>> mmc-async features, to make it usable from SDIO.
>>>
>>>
>>>> But the devil is in details - there is a lot of work in
>>>> mmc_wait_for_data_req_done(), done() callback and also error handling
>>>> changes for card/block.c
>>> Things in core.c could be re-used in the SDIO case. In block.c there
>>> are only FS specific implementation not relevant for SDIO.
>>>
>>>>
>>>> Do you think, that wait_event() API used not suits the same semantic
>>>> as
>>>> completion API?
>>> The waiting mechanims may be wait_event or completion.
>>> I'm not in favor of using both. Better to replace completion with
>>> wait_event if you prefer.
>>>
>> I'm fine with wait_event() (block request transfers) combined with
>> completion (for other transfer), as long as the core.c API is intact. I
>> don't see a reason for a new start_data_req()
>>
>> mmc_start_req() is only used by the mmc block transfers.
> The main idea is to change start_req() waiting point
> (mmc_wait_for_data_req_done() function) in such way that external events
> (in the MMC case it is coming from block layer) may awake MMC context
> from waiting for current request. This is done by change the original
> mmc_start_req() function and not changing its signature of
> mmc_start_req().
>
> May I ask you to see [PATCH v2] patch? I think that is is no change in
> core.c API (by core.c API you mean all functions from core.h?).
>
[PATCH v3] mmc: fix async request mechanism for sequential read scenarios

> Also to use new request feature of core.c one need to implement
> notification similar to mmc_request_fn() and I do not see difficulties
> to do it from SDIO.
>
>
>> Making a test case in mmc_test.c is a good way to stress test the
>> feature (e.g. random timing of incoming new requests) and it will show
>> how the API works too.
>>
>> BR
>> Per
>>
>>> My main concern is to avoid adding new API to core.c in order to add
>>> the NEW_REQ feature. My patch is an example of changes to be done in
>>> core.c without adding new functions.
>>>
>>>>
>>>> We would like to have a generic capability to handle additional
>>>> events,
>>>> such as HPI/stop flow, in addition to the NEW_REQUEST notification.
>>>> Therefore, an event mechanism seems to be a better design than
>>>> completion.
>>>>
>>>> I've looked at SDIO code and from what I can understand, right now
>>>> SDIO
>>>> is not using async request mechanism and works from 'wait_for_cmd()`
>>>> level. This means that such work as exposing MMC core API's is major
>>>> change and definitely should be separate design and implementation
>>>> effort, while my current patch right now will fix mmc thread behavior
>>>> and give better performance for upper layers.
>>> You are right. There is no support in the SDIO implementation for
>>> pre/post/async feature. Still I had it in mind design the "struct
>>> mmc_async". It's possible to re-use the mmc core parts in order to
>>> utilize this support in the SDIO case. I'm not saying we should design
>>> for SDIO but at least keep the API in a way that it's potential usable
>>> from SDIO too. The API of core.c (start/wait of requests) should make
>>> sense without the existence of MMC block driver (block/queue).
>>>
>>> One way to test the design is to add a test in mmc_test.c for penging
>>> NEW_REQ. In mmc_test.c there is not block.c or queue.c.
>>> If the test case if simple to write in mmc_test.c it's means that the
>>> API is on a good level.
> I have plans to implement new test in mmc_test.c, but current patch was
> tested using special test I/O scheduler, that is used instead of cfq I/O
> scheduler: "[PATCH v5 1/3] block: Add test-iosched scheduler"
>
> This gives us great power to create any ut scenarios.
>
> --
> Konstantin Dorfman,
> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
> Inc. is a member of Code Aurora Forum,
> hosted by The Linux Foundation
> --
> To unsubscribe from this list: send the line "unsubscribe linux-mmc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-19 21:34                         ` Per Förlin
@ 2012-11-20 16:26                           ` Konstantin Dorfman
  2012-11-20 18:57                             ` Konstantin Dorfman
  2012-11-26 15:28                           ` Konstantin Dorfman
  1 sibling, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-11-20 16:26 UTC (permalink / raw)
  To: Per Förlin; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

Hello,

On 11/19/2012 11:34 PM, Per Förlin wrote:

>>>>>>        if (host->areq) {
>>>>>> +             if (!areq)
>>>>>> +                     mmc_prefetch_init(host->areq,
>>>>>> +                                       &host->areq->mrq->completion);
>>>>>>                mmc_wait_for_req_done(host, host->areq->mrq);
>>>>>> +             if (!areq) {
>>>>>> +                     mmc_prefetch_start(host->areq, false);
>>>>>> +                     if (mmc_prefetch_pending(host->areq))
>>>>>> +                             return NULL;
>>> In this case, mmc thread may be unblocked because done() arrived for
>>> current request and not because new request notification. In such a case
>>> we would like the done request to be handled before fetching the new
>>> request.
>> I agree. We wake up and there is no pending new request execution should proceed normally by handling the completed request.
>>
>>> In my code is_done_rcv flag used along with is_new_req flag in
>>> order to differentiate the reason for mmc thread awake.
>> In my patch the return value of "mmc_prefetch_pending()" specifies if thee is a pending request.what should happen.
>> If both current request is done and mmc_prefetch_pending is done at the same time I see there is a problem with my patch. There needs to be a flag to indicate if current request is done.
There is race between done() and NEW_REQUEST events, also when we got 
both of them, the order is to complete current and then to fetch new.


>>>>> I understand your motivation and idea for re-structure the patch. It is
>>>>> still not clear for me how exactly mmc thread will be awaken on new
>>>>> request notification in your version, but I have another problem:
>>>>>
>>>> mmc_request_fn() is called and it calls complete(mq->prefetch_completion) which wakes up the current thread.
>>>> My patch is just an example. The intention is to make the patch cleaner. But I may have missed some of the HPI aspects.
>>>
>>> Is it the lack of functions wrappers that you are using in your example?
>> It's fine to skip the functions wrappers if it makes no sense.
>> What I want to avoid is to create new functions for data_req_start and data_req_wait.
>> I would rather add this to the existing functions in core.c and keep changes in block.c and the flow minimal.
>>
>>> As I understand your example, you mean to implement generic logic on
>>> core/core.c level by using wrapper functions and leave final
>>> implementation for MMC to card/queue.c and for SDIO layer to
>>> card/..sdio.. (I'm not too familiar with sdio protocol implementation).
>>> Well, it is make a lot of sense.
>>>
>> I still have "plans" to add pre/post/async support in the SDIO framework too but I have not got to it.
>> I would be nice to add your NEW_REQ feature along with the other mmc-async features, to make it usable from SDIO.
>>
>>
>>> But the devil is in details - there is a lot of work in
>>> mmc_wait_for_data_req_done(), done() callback and also error handling
>>> changes for card/block.c
>> Things in core.c could be re-used in the SDIO case. In block.c there are only FS specific implementation not relevant for SDIO.
>>
>>>
>>> Do you think, that wait_event() API used not suits the same semantic as
>>> completion API?
>> The waiting mechanims may be wait_event or completion.
>> I'm not in favor of using both. Better to replace completion with wait_event if you prefer.
>>
> I'm fine with wait_event() (block request transfers) combined with completion (for other transfer), as long as the core.c API is intact. I don't see a reason for a new start_data_req()
>
> mmc_start_req() is only used by the mmc block transfers.
The main idea is to change start_req() waiting point 
(mmc_wait_for_data_req_done() function) in such way that external events 
(in the MMC case it is coming from block layer) may awake MMC context 
from waiting for current request. This is done by change the original 
mmc_start_req() function and not changing its signature of mmc_start_req().

May I ask you to see [PATCH v2] patch? I think that is is no change in 
core.c API (by core.c API you mean all functions from core.h?).

Also to use new request feature of core.c one need to implement 
notification similar to mmc_request_fn() and I do not see difficulties 
to do it from SDIO.


> Making a test case in mmc_test.c is a good way to stress test the feature (e.g. random timing of incoming new requests) and it will show how the API works too.
>
> BR
> Per
>
>> My main concern is to avoid adding new API to core.c in order to add the NEW_REQ feature. My patch is an example of changes to be done in core.c without adding new functions.
>>
>>>
>>> We would like to have a generic capability to handle additional events,
>>> such as HPI/stop flow, in addition to the NEW_REQUEST notification.
>>> Therefore, an event mechanism seems to be a better design than completion.
>>>
>>> I've looked at SDIO code and from what I can understand, right now SDIO
>>> is not using async request mechanism and works from 'wait_for_cmd()`
>>> level. This means that such work as exposing MMC core API's is major
>>> change and definitely should be separate design and implementation
>>> effort, while my current patch right now will fix mmc thread behavior
>>> and give better performance for upper layers.
>> You are right. There is no support in the SDIO implementation for pre/post/async feature. Still I had it in mind design the "struct mmc_async". It's possible to re-use the mmc core parts in order to utilize this support in the SDIO case. I'm not saying we should design for SDIO but at least keep the API in a way that it's potential usable from SDIO too. The API of core.c (start/wait of requests) should make sense without the existence of MMC block driver (block/queue).
>>
>> One way to test the design is to add a test in mmc_test.c for penging NEW_REQ. In mmc_test.c there is not block.c or queue.c.
>> If the test case if simple to write in mmc_test.c it's means that the API is on a good level.
I have plans to implement new test in mmc_test.c, but current patch was 
tested using special test I/O scheduler, that is used instead of cfq I/O 
scheduler: "[PATCH v5 1/3] block: Add test-iosched scheduler"

This gives us great power to create any ut scenarios.

-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-19 14:32                       ` Per Förlin
@ 2012-11-19 21:34                         ` Per Förlin
  2012-11-20 16:26                           ` Konstantin Dorfman
  2012-11-26 15:28                           ` Konstantin Dorfman
  0 siblings, 2 replies; 25+ messages in thread
From: Per Förlin @ 2012-11-19 21:34 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

On 11/19/2012 03:32 PM, Per Förlin wrote:
> On 11/19/2012 10:48 AM, Konstantin Dorfman wrote:
>> Hello Per,
>>
>> Thank you for giving me an example, below I'm trying to explain some
>> logic issues of it and asking you some questions about your vision of
>> the patch.
>>
>> On 11/15/2012 06:38 PM, Per Förlin wrote:
>>> On 11/14/2012 04:15 PM, Konstantin Dorfman wrote:
>>>> Hello Per,
>>>>
>>>> On 11/13/2012 11:10 PM, Per Forlin wrote:
>>>>> On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
>>>>> <kdorfman@codeaurora.org> wrote:
>>>>>> Hello,
>>>>>>
>>>>>> On 10/29/2012 11:40 PM, Per Forlin wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> I would like to move the focus of my concerns from root cause analysis
>>>>>>> to the actual patch,
>>>>>>> My first reflection is that the patch is relatively complex and some
>>>>>>> of the code looks duplicated.
>>>>>>> Would it be possible to simplify it and re-use  the current execution flow.
>>>>>>>
>>>>>>> Is the following flow feasible?
>>>>>>>
>>>>>>> in mmc_start_req():
>>>>>>> --------------
>>>>>>> if (rqc == NULL && not_resend)
>>>>>>>   wait_for_both_mmc_and_arrival_of_new_req
>>>>>>> else
>>>>>>>   wait_only_for_mmc
>>>>>>>
>>>>>>> if (arrival_of_new_req) {
>>>>>>>    set flag to indicate fetch-new_req
>>>>>>>   return NULL;
>>>>>>> }
>>>>>>> -----------------
>>>>>>>
>>>>>>> in queue.c:
>>>>>>> if (fetch-new_req)
>>>>>>>   don't overwrite previous req.
>>>>>>>
>>>>>>> BR
>>>>>>> Per
>>>>>>
>>>>>> You are talking about using both waiting mechanisms, old (wait on
>>>>>> completion) and new - wait_event_interruptible()? But how done()
>>>>>> callback, called on host controller irq context, will differentiate
>>>>>> which one is relevant for the request?
>>>>>>
>>>>>> I think it is better to use single wait point for mmc thread.
>>>>>
>>>>> I have sketch a patch to better explain my point. It's not tested it
>>>>> barely compiles :)
>>>>> The patch tries to introduce your feature and still keep the same code
>>>>> path. And it exposes an API that could be used by SDIO as well.
>>>>> The intention of my sketch patch is only to explain what I tried to
>>>>> visualize in the pseudo code previously in this thread.
>>>>> The out come of your final patch should be documented here I think:
>>>>> Documentation/mmc/mmc-async-req.txt
>>>> This document is ready, attaching it to this mail and will be included
>>>> in next version of the patch (or RESEND).
>>>>>
>>>>> Here follows my patch sketch:
>>>>> ........................................................
>>>>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>>>>> index e360a97..08036a1 100644
>>>>> --- a/drivers/mmc/card/queue.c
>>>>> +++ b/drivers/mmc/card/queue.c
>>>>> @@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
>>>>>               spin_unlock_irq(q->queue_lock);
>>>>>
>>>>>               if (req || mq->mqrq_prev->req) {
>>>>> +                     if (!req)
>>>>> +                             mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
>>>>>                       set_current_state(TASK_RUNNING);
>>>>>                       mq->issue_fn(mq, req);
>>>>>               } else {
>>>>> @@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
>>>>>               }
>>>>>
>>>>>               /* Current request becomes previous request and vice versa. */
>>>>> -             mq->mqrq_prev->brq.mrq.data = NULL;
>>>>> -             mq->mqrq_prev->req = NULL;
>>>>> -             tmp = mq->mqrq_prev;
>>>>> -             mq->mqrq_prev = mq->mqrq_cur;
>>>>> -             mq->mqrq_cur = tmp;
>>>>> +             if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
>>>>> +                     mq->mqrq_prev->brq.mrq.data = NULL;
>>>>> +                     mq->mqrq_prev->req = NULL;
>>>>> +                     tmp = mq->mqrq_prev;
>>>>> +                     mq->mqrq_prev = mq->mqrq_cur;
>>>>> +                     mq->mqrq_cur = tmp;
>>>>> +             }
>>>>>       } while (1);
>>>>>       up(&mq->thread_sem);
>>>>>
>>>>> @@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
>>>>>               return;
>>>>>       }
>>>>>
>>>>> +     if (mq->prefetch_enable) {
>>>>> +             spin_lock(&mq->prefetch_lock);
>>>>> +             if (mq->prefetch_completion)
>>>>> +                     complete(mq->prefetch_completion);
>> Since mq->prefetch_completion init happens only in core.c after
>> mmc_start_req(NULL), we can miss all new request notifications coming
>> from fetching NULL request until then.
> It's a mistake in the patch.
> I meant check mq->prefetch_pending to know whether we need to wait in the first place.
> That's why mq->prefetch_pending is assigned even if mq->prefetch_completion is not set yet.
> Patch needs to be updated to take it into account. One could let mmc_prefecth_init() return and indicate if there is already a NEW_REQ pending. If this is the case skip wait.
> 
> 
>>>>> +             mq->prefetch_pending = true;
>>>>> +             spin_unlock(&mq->prefetch_lock);
>>>>> +     }
>>>>> +
>>>>>       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
>>>>>               wake_up_process(mq->thread);
>>>>>  }
>>>>>
>>>>> +static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
>>>>> +{
>>>>> +     struct mmc_queue *mq =
>>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>>> +
>>>>> +     spin_lock(&mq->prefetch_lock);
>>>>> +     mq->prefetch_completion = compl;
>>>>> +     if (mq->prefetch_pending)
>>>>> +             complete(mq->prefetch_completion);
>>>>> +     spin_unlock(&mq->prefetch_lock);
>>>>> +}
>>>>> +
>>>>> +static void mmc_req_start(struct mmc_async_req *areq, bool enable)
>>>>> +{
>>>>> +     struct mmc_queue *mq =
>>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>>> +     mq->prefetch_enable = enable;
>>>>> +}
>>>>> +
>>>>> +static bool mmc_req_pending(struct mmc_async_req *areq)
>>>>> +{
>>>>> +     struct mmc_queue *mq =
>>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>>> +     return mq->prefetch_pending;
>>>>> +}
>>>>> +
>>>>>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
>>>>>  {
>>>>>       struct scatterlist *sg;
>>>>> @@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
>>>>> mmc_card *card,
>>>>>       int ret;
>>>>>       struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
>>>>>       struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
>>>>> +     spin_lock_init(&mq->prefetch_lock);
>>>>> +     mq->prefetch.wait_req_init = mmc_req_init;
>>>>> +     mq->prefetch.wait_req_start = mmc_req_start;
>>>>> +     mq->prefetch.wait_req_pending = mmc_req_pending;
>>>>> +     mqrq_cur->mmc_active.prefetch = &mq->prefetch;
>>>>> +     mqrq_prev->mmc_active.prefetch = &mq->prefetch;
>>>>>
>>>>>       if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
>>>>>               limit = *mmc_dev(host)->dma_mask;
>>>>> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
>>>>> index d2a1eb4..5afd467 100644
>>>>> --- a/drivers/mmc/card/queue.h
>>>>> +++ b/drivers/mmc/card/queue.h
>>>>> @@ -33,6 +33,12 @@ struct mmc_queue {
>>>>>       struct mmc_queue_req    mqrq[2];
>>>>>       struct mmc_queue_req    *mqrq_cur;
>>>>>       struct mmc_queue_req    *mqrq_prev;
>>>>> +
>>>>> +     struct mmc_async_prefetch prefetch;
>>>>> +     spinlock_t              prefetch_lock;
>>>>> +     struct completion       *prefetch_completion;
>>>>> +     bool                    prefetch_enable;
>>>>> +     bool                    prefetch_pending;
>>>>>  };
>>>>>
>>>>>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
>>>>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
>>>>> index 9503cab..06fc036 100644
>>>>> --- a/drivers/mmc/core/core.c
>>>>> +++ b/drivers/mmc/core/core.c
>>>>> @@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
>>>>>               mmc_pre_req(host, areq->mrq, !host->areq);
>>>>>
>>>>>       if (host->areq) {
>>>>> +             if (!areq)
>>>>> +                     mmc_prefetch_init(host->areq,
>>>>> +                                       &host->areq->mrq->completion);
>>>>>               mmc_wait_for_req_done(host, host->areq->mrq);
>>>>> +             if (!areq) {
>>>>> +                     mmc_prefetch_start(host->areq, false);
>>>>> +                     if (mmc_prefetch_pending(host->areq))
>>>>> +                             return NULL;
>> In this case, mmc thread may be unblocked because done() arrived for
>> current request and not because new request notification. In such a case
>> we would like the done request to be handled before fetching the new
>> request.
> I agree. We wake up and there is no pending new request execution should proceed normally by handling the completed request.
> 
>> In my code is_done_rcv flag used along with is_new_req flag in
>> order to differentiate the reason for mmc thread awake.
> In my patch the return value of "mmc_prefetch_pending()" specifies if thee is a pending request.what should happen.
> If both current request is done and mmc_prefetch_pending is done at the same time I see there is a problem with my patch. There needs to be a flag to indicate if current request is done.
> 
>>
>>>>> +             }
>>>>>               err = host->areq->err_check(host->card, host->areq);
>>>>>       }
>>>>>
>>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>>>> index 65c64ee..ce5d03f 100644
>>>>> --- a/include/linux/mmc/host.h
>>>>> +++ b/include/linux/mmc/host.h
>>>>> @@ -15,6 +15,7 @@
>>>>>  #include <linux/sched.h>
>>>>>  #include <linux/device.h>
>>>>>  #include <linux/fault-inject.h>
>>>>> +#include <linux/completion.h>
>>>>>
>>>>>  #include <linux/mmc/core.h>
>>>>>  #include <linux/mmc/pm.h>
>>>>> @@ -140,6 +141,13 @@ struct mmc_host_ops {
>>>>>
>>>>>  struct mmc_card;
>>>>>  struct device;
>>>>> +struct mmc_async_req;
>>>>> +
>>>>> +struct mmc_async_prefetch {
>>>>> +     void (*wait_req_init)(struct mmc_async_req *, struct completion *);
>>>>> +     void (*wait_req_start)(struct mmc_async_req *, bool);
>>>>> +     bool (*wait_req_pending)(struct mmc_async_req *);
>>>>> +};
>>>>>
>>>>>  struct mmc_async_req {
>>>>>       /* active mmc request */
>>>>> @@ -149,8 +157,33 @@ struct mmc_async_req {
>>>>>        * Returns 0 if success otherwise non zero.
>>>>>        */
>>>>>       int (*err_check) (struct mmc_card *, struct mmc_async_req *);
>>>>> +     struct mmc_async_prefetch *prefetch;
>>>>>  };
>>>>>
>>>>> +/* set completion variable, complete == NULL to reset completion */
>>>>> +static inline void mmc_prefetch_init(struct mmc_async_req *areq,
>>>>> +                                  struct completion *complete)
>>>>> +{
>>>>> +     if (areq->prefetch && areq->prefetch->wait_req_init)
>>>>> +             areq->prefetch->wait_req_init(areq, complete);
>>>>> +}
>>>>> +
>>>>> +/* enable or disable prefetch feature */
>>>>> +static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
>>>>> +{
>>>>> +     if (areq->prefetch && areq->prefetch->wait_req_start)
>>>>> +             areq->prefetch->wait_req_start(areq, enable);
>>>>> +}
>>>>> +
>>>>> +/* return true if new request is pending otherwise false */
>>>>> +static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
>>>>> +{
>>>>> +     if (areq->prefetch && areq->prefetch->wait_req_pending)
>>>>> +             return areq->prefetch->wait_req_pending(areq);
>>>>> +     else
>>>>> +             return false;
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * struct mmc_slot - MMC slot functions
>>>>>   *
>>>>> ....................................................................
>>>>>
>>>>>
>>>>> BR
>>>>> Per
>>>> I understand your motivation and idea for re-structure the patch. It is
>>>> still not clear for me how exactly mmc thread will be awaken on new
>>>> request notification in your version, but I have another problem:
>>>>
>>> mmc_request_fn() is called and it calls complete(mq->prefetch_completion) which wakes up the current thread.
>>> My patch is just an example. The intention is to make the patch cleaner. But I may have missed some of the HPI aspects.
>>
>> Is it the lack of functions wrappers that you are using in your example?
> It's fine to skip the functions wrappers if it makes no sense.
> What I want to avoid is to create new functions for data_req_start and data_req_wait.
> I would rather add this to the existing functions in core.c and keep changes in block.c and the flow minimal.
> 
>> As I understand your example, you mean to implement generic logic on
>> core/core.c level by using wrapper functions and leave final
>> implementation for MMC to card/queue.c and for SDIO layer to
>> card/..sdio.. (I'm not too familiar with sdio protocol implementation).
>> Well, it is make a lot of sense.
>>
> I still have "plans" to add pre/post/async support in the SDIO framework too but I have not got to it.
> I would be nice to add your NEW_REQ feature along with the other mmc-async features, to make it usable from SDIO.
> 
> 
>> But the devil is in details - there is a lot of work in
>> mmc_wait_for_data_req_done(), done() callback and also error handling
>> changes for card/block.c
> Things in core.c could be re-used in the SDIO case. In block.c there are only FS specific implementation not relevant for SDIO.
> 
>>
>> Do you think, that wait_event() API used not suits the same semantic as
>> completion API?
> The waiting mechanims may be wait_event or completion.
> I'm not in favor of using both. Better to replace completion with wait_event if you prefer.
> 
I'm fine with wait_event() (block request transfers) combined with completion (for other transfer), as long as the core.c API is intact. I don't see a reason for a new start_data_req()

mmc_start_req() is only used by the mmc block transfers.
Making a test case in mmc_test.c is a good way to stress test the feature (e.g. random timing of incoming new requests) and it will show how the API works too.

BR
Per

> My main concern is to avoid adding new API to core.c in order to add the NEW_REQ feature. My patch is an example of changes to be done in core.c without adding new functions.
> 
>>
>> We would like to have a generic capability to handle additional events,
>> such as HPI/stop flow, in addition to the NEW_REQUEST notification.
>> Therefore, an event mechanism seems to be a better design than completion.
>>
>> I've looked at SDIO code and from what I can understand, right now SDIO
>> is not using async request mechanism and works from 'wait_for_cmd()`
>> level. This means that such work as exposing MMC core API's is major
>> change and definitely should be separate design and implementation
>> effort, while my current patch right now will fix mmc thread behavior
>> and give better performance for upper layers.
> You are right. There is no support in the SDIO implementation for pre/post/async feature. Still I had it in mind design the "struct mmc_async". It's possible to re-use the mmc core parts in order to utilize this support in the SDIO case. I'm not saying we should design for SDIO but at least keep the API in a way that it's potential usable from SDIO too. The API of core.c (start/wait of requests) should make sense without the existence of MMC block driver (block/queue).
> 
> One way to test the design is to add a test in mmc_test.c for penging NEW_REQ. In mmc_test.c there is not block.c or queue.c.
> If the test case if simple to write in mmc_test.c it's means that the API is on a good level.
> 
> 
> BR
> Per
> 
>>
>>
>>>
>>> If the API is not implemented the mmc core shall simply ignore it.
>>>
>>>> We want to expand this event based mechanism (unblock mmc thread from
>>>> waiting for the running request) by adding another reason/event. This is
>>>> URGENT_REQUEST event. When block layer has Urgent request to execute, it
>>>> notifies mmc layer (similar to mmc_request() notification) and mmc layer
>>>> handles this urgent request notification by correctly stopping running
>>>> request, re-inserting back to I/O scheduler all not yet performed parts
>>>> and fetching & starting the urgent request.
>>>> The reasoning and general idea are documented together with "New event"
>>>> and will be submitted soon. The patch itself will be submitted in a week
>>>> or so.
>>> I have not consider use of HPI in my proposal. If the NEW_REQ is first in line in the mmc queue it will be fetched as the NEW_REQ.
>>> Then the current request must be interrupted and returned to mmc-queue or io-scheduler.
>>>
>>> I don't see a direct contradiction between the two designs.
>>>
>>> The main point is to make the NEW_REQ API more simple clear.
>>> My example is just an example.
>>>
>>>>
>>>> As for our discussion - to handle both events mmc layer should be
>>>> capable to be awaken not only in case of no mmc_prefetch_pending() (in
>>>> your patch terms), but also when we have perfect case of async requests:
>>>> one running on the bus and second prepared and waiting for the first.
>>>>
>>>> I think, that in case SDIO protocol driver need such functionality as
>>>> NEW_REQUEST, one need to implement notification to the mmc layer similar
>>>> to mmc_request() code in my patch.
>>> SDIO needs to implement the new NEW_REQ API, but the API needs to be clean.
>>>
>>> BR
>>> Per
>>>
>> Best regards,
>> --
>> Konstantin Dorfman,
>> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
>> Inc. is a member of Code Aurora Forum,
>> hosted by The Linux Foundation
>>
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-19  9:48                     ` Konstantin Dorfman
@ 2012-11-19 14:32                       ` Per Förlin
  2012-11-19 21:34                         ` Per Förlin
  0 siblings, 1 reply; 25+ messages in thread
From: Per Förlin @ 2012-11-19 14:32 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

On 11/19/2012 10:48 AM, Konstantin Dorfman wrote:
> Hello Per,
> 
> Thank you for giving me an example, below I'm trying to explain some
> logic issues of it and asking you some questions about your vision of
> the patch.
> 
> On 11/15/2012 06:38 PM, Per Förlin wrote:
>> On 11/14/2012 04:15 PM, Konstantin Dorfman wrote:
>>> Hello Per,
>>>
>>> On 11/13/2012 11:10 PM, Per Forlin wrote:
>>>> On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
>>>> <kdorfman@codeaurora.org> wrote:
>>>>> Hello,
>>>>>
>>>>> On 10/29/2012 11:40 PM, Per Forlin wrote:
>>>>>> Hi,
>>>>>>
>>>>>> I would like to move the focus of my concerns from root cause analysis
>>>>>> to the actual patch,
>>>>>> My first reflection is that the patch is relatively complex and some
>>>>>> of the code looks duplicated.
>>>>>> Would it be possible to simplify it and re-use  the current execution flow.
>>>>>>
>>>>>> Is the following flow feasible?
>>>>>>
>>>>>> in mmc_start_req():
>>>>>> --------------
>>>>>> if (rqc == NULL && not_resend)
>>>>>>   wait_for_both_mmc_and_arrival_of_new_req
>>>>>> else
>>>>>>   wait_only_for_mmc
>>>>>>
>>>>>> if (arrival_of_new_req) {
>>>>>>    set flag to indicate fetch-new_req
>>>>>>   return NULL;
>>>>>> }
>>>>>> -----------------
>>>>>>
>>>>>> in queue.c:
>>>>>> if (fetch-new_req)
>>>>>>   don't overwrite previous req.
>>>>>>
>>>>>> BR
>>>>>> Per
>>>>>
>>>>> You are talking about using both waiting mechanisms, old (wait on
>>>>> completion) and new - wait_event_interruptible()? But how done()
>>>>> callback, called on host controller irq context, will differentiate
>>>>> which one is relevant for the request?
>>>>>
>>>>> I think it is better to use single wait point for mmc thread.
>>>>
>>>> I have sketch a patch to better explain my point. It's not tested it
>>>> barely compiles :)
>>>> The patch tries to introduce your feature and still keep the same code
>>>> path. And it exposes an API that could be used by SDIO as well.
>>>> The intention of my sketch patch is only to explain what I tried to
>>>> visualize in the pseudo code previously in this thread.
>>>> The out come of your final patch should be documented here I think:
>>>> Documentation/mmc/mmc-async-req.txt
>>> This document is ready, attaching it to this mail and will be included
>>> in next version of the patch (or RESEND).
>>>>
>>>> Here follows my patch sketch:
>>>> ........................................................
>>>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>>>> index e360a97..08036a1 100644
>>>> --- a/drivers/mmc/card/queue.c
>>>> +++ b/drivers/mmc/card/queue.c
>>>> @@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
>>>>               spin_unlock_irq(q->queue_lock);
>>>>
>>>>               if (req || mq->mqrq_prev->req) {
>>>> +                     if (!req)
>>>> +                             mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
>>>>                       set_current_state(TASK_RUNNING);
>>>>                       mq->issue_fn(mq, req);
>>>>               } else {
>>>> @@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
>>>>               }
>>>>
>>>>               /* Current request becomes previous request and vice versa. */
>>>> -             mq->mqrq_prev->brq.mrq.data = NULL;
>>>> -             mq->mqrq_prev->req = NULL;
>>>> -             tmp = mq->mqrq_prev;
>>>> -             mq->mqrq_prev = mq->mqrq_cur;
>>>> -             mq->mqrq_cur = tmp;
>>>> +             if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
>>>> +                     mq->mqrq_prev->brq.mrq.data = NULL;
>>>> +                     mq->mqrq_prev->req = NULL;
>>>> +                     tmp = mq->mqrq_prev;
>>>> +                     mq->mqrq_prev = mq->mqrq_cur;
>>>> +                     mq->mqrq_cur = tmp;
>>>> +             }
>>>>       } while (1);
>>>>       up(&mq->thread_sem);
>>>>
>>>> @@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
>>>>               return;
>>>>       }
>>>>
>>>> +     if (mq->prefetch_enable) {
>>>> +             spin_lock(&mq->prefetch_lock);
>>>> +             if (mq->prefetch_completion)
>>>> +                     complete(mq->prefetch_completion);
> Since mq->prefetch_completion init happens only in core.c after
> mmc_start_req(NULL), we can miss all new request notifications coming
> from fetching NULL request until then.
It's a mistake in the patch.
I meant check mq->prefetch_pending to know whether we need to wait in the first place.
That's why mq->prefetch_pending is assigned even if mq->prefetch_completion is not set yet.
Patch needs to be updated to take it into account. One could let mmc_prefecth_init() return and indicate if there is already a NEW_REQ pending. If this is the case skip wait.


>>>> +             mq->prefetch_pending = true;
>>>> +             spin_unlock(&mq->prefetch_lock);
>>>> +     }
>>>> +
>>>>       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
>>>>               wake_up_process(mq->thread);
>>>>  }
>>>>
>>>> +static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
>>>> +{
>>>> +     struct mmc_queue *mq =
>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>> +
>>>> +     spin_lock(&mq->prefetch_lock);
>>>> +     mq->prefetch_completion = compl;
>>>> +     if (mq->prefetch_pending)
>>>> +             complete(mq->prefetch_completion);
>>>> +     spin_unlock(&mq->prefetch_lock);
>>>> +}
>>>> +
>>>> +static void mmc_req_start(struct mmc_async_req *areq, bool enable)
>>>> +{
>>>> +     struct mmc_queue *mq =
>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>> +     mq->prefetch_enable = enable;
>>>> +}
>>>> +
>>>> +static bool mmc_req_pending(struct mmc_async_req *areq)
>>>> +{
>>>> +     struct mmc_queue *mq =
>>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>>> +     return mq->prefetch_pending;
>>>> +}
>>>> +
>>>>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
>>>>  {
>>>>       struct scatterlist *sg;
>>>> @@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
>>>> mmc_card *card,
>>>>       int ret;
>>>>       struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
>>>>       struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
>>>> +     spin_lock_init(&mq->prefetch_lock);
>>>> +     mq->prefetch.wait_req_init = mmc_req_init;
>>>> +     mq->prefetch.wait_req_start = mmc_req_start;
>>>> +     mq->prefetch.wait_req_pending = mmc_req_pending;
>>>> +     mqrq_cur->mmc_active.prefetch = &mq->prefetch;
>>>> +     mqrq_prev->mmc_active.prefetch = &mq->prefetch;
>>>>
>>>>       if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
>>>>               limit = *mmc_dev(host)->dma_mask;
>>>> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
>>>> index d2a1eb4..5afd467 100644
>>>> --- a/drivers/mmc/card/queue.h
>>>> +++ b/drivers/mmc/card/queue.h
>>>> @@ -33,6 +33,12 @@ struct mmc_queue {
>>>>       struct mmc_queue_req    mqrq[2];
>>>>       struct mmc_queue_req    *mqrq_cur;
>>>>       struct mmc_queue_req    *mqrq_prev;
>>>> +
>>>> +     struct mmc_async_prefetch prefetch;
>>>> +     spinlock_t              prefetch_lock;
>>>> +     struct completion       *prefetch_completion;
>>>> +     bool                    prefetch_enable;
>>>> +     bool                    prefetch_pending;
>>>>  };
>>>>
>>>>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
>>>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
>>>> index 9503cab..06fc036 100644
>>>> --- a/drivers/mmc/core/core.c
>>>> +++ b/drivers/mmc/core/core.c
>>>> @@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
>>>>               mmc_pre_req(host, areq->mrq, !host->areq);
>>>>
>>>>       if (host->areq) {
>>>> +             if (!areq)
>>>> +                     mmc_prefetch_init(host->areq,
>>>> +                                       &host->areq->mrq->completion);
>>>>               mmc_wait_for_req_done(host, host->areq->mrq);
>>>> +             if (!areq) {
>>>> +                     mmc_prefetch_start(host->areq, false);
>>>> +                     if (mmc_prefetch_pending(host->areq))
>>>> +                             return NULL;
> In this case, mmc thread may be unblocked because done() arrived for
> current request and not because new request notification. In such a case
> we would like the done request to be handled before fetching the new
> request.
I agree. We wake up and there is no pending new request execution should proceed normally by handling the completed request.

> In my code is_done_rcv flag used along with is_new_req flag in
> order to differentiate the reason for mmc thread awake.
In my patch the return value of "mmc_prefetch_pending()" specifies if thee is a pending request.what should happen.
If both current request is done and mmc_prefetch_pending is done at the same time I see there is a problem with my patch. There needs to be a flag to indicate if current request is done.

> 
>>>> +             }
>>>>               err = host->areq->err_check(host->card, host->areq);
>>>>       }
>>>>
>>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>>> index 65c64ee..ce5d03f 100644
>>>> --- a/include/linux/mmc/host.h
>>>> +++ b/include/linux/mmc/host.h
>>>> @@ -15,6 +15,7 @@
>>>>  #include <linux/sched.h>
>>>>  #include <linux/device.h>
>>>>  #include <linux/fault-inject.h>
>>>> +#include <linux/completion.h>
>>>>
>>>>  #include <linux/mmc/core.h>
>>>>  #include <linux/mmc/pm.h>
>>>> @@ -140,6 +141,13 @@ struct mmc_host_ops {
>>>>
>>>>  struct mmc_card;
>>>>  struct device;
>>>> +struct mmc_async_req;
>>>> +
>>>> +struct mmc_async_prefetch {
>>>> +     void (*wait_req_init)(struct mmc_async_req *, struct completion *);
>>>> +     void (*wait_req_start)(struct mmc_async_req *, bool);
>>>> +     bool (*wait_req_pending)(struct mmc_async_req *);
>>>> +};
>>>>
>>>>  struct mmc_async_req {
>>>>       /* active mmc request */
>>>> @@ -149,8 +157,33 @@ struct mmc_async_req {
>>>>        * Returns 0 if success otherwise non zero.
>>>>        */
>>>>       int (*err_check) (struct mmc_card *, struct mmc_async_req *);
>>>> +     struct mmc_async_prefetch *prefetch;
>>>>  };
>>>>
>>>> +/* set completion variable, complete == NULL to reset completion */
>>>> +static inline void mmc_prefetch_init(struct mmc_async_req *areq,
>>>> +                                  struct completion *complete)
>>>> +{
>>>> +     if (areq->prefetch && areq->prefetch->wait_req_init)
>>>> +             areq->prefetch->wait_req_init(areq, complete);
>>>> +}
>>>> +
>>>> +/* enable or disable prefetch feature */
>>>> +static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
>>>> +{
>>>> +     if (areq->prefetch && areq->prefetch->wait_req_start)
>>>> +             areq->prefetch->wait_req_start(areq, enable);
>>>> +}
>>>> +
>>>> +/* return true if new request is pending otherwise false */
>>>> +static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
>>>> +{
>>>> +     if (areq->prefetch && areq->prefetch->wait_req_pending)
>>>> +             return areq->prefetch->wait_req_pending(areq);
>>>> +     else
>>>> +             return false;
>>>> +}
>>>> +
>>>>  /**
>>>>   * struct mmc_slot - MMC slot functions
>>>>   *
>>>> ....................................................................
>>>>
>>>>
>>>> BR
>>>> Per
>>> I understand your motivation and idea for re-structure the patch. It is
>>> still not clear for me how exactly mmc thread will be awaken on new
>>> request notification in your version, but I have another problem:
>>>
>> mmc_request_fn() is called and it calls complete(mq->prefetch_completion) which wakes up the current thread.
>> My patch is just an example. The intention is to make the patch cleaner. But I may have missed some of the HPI aspects.
> 
> Is it the lack of functions wrappers that you are using in your example?
It's fine to skip the functions wrappers if it makes no sense.
What I want to avoid is to create new functions for data_req_start and data_req_wait.
I would rather add this to the existing functions in core.c and keep changes in block.c and the flow minimal.

> As I understand your example, you mean to implement generic logic on
> core/core.c level by using wrapper functions and leave final
> implementation for MMC to card/queue.c and for SDIO layer to
> card/..sdio.. (I'm not too familiar with sdio protocol implementation).
> Well, it is make a lot of sense.
> 
I still have "plans" to add pre/post/async support in the SDIO framework too but I have not got to it.
I would be nice to add your NEW_REQ feature along with the other mmc-async features, to make it usable from SDIO.


> But the devil is in details - there is a lot of work in
> mmc_wait_for_data_req_done(), done() callback and also error handling
> changes for card/block.c
Things in core.c could be re-used in the SDIO case. In block.c there are only FS specific implementation not relevant for SDIO.

> 
> Do you think, that wait_event() API used not suits the same semantic as
> completion API?
The waiting mechanims may be wait_event or completion.
I'm not in favor of using both. Better to replace completion with wait_event if you prefer.

My main concern is to avoid adding new API to core.c in order to add the NEW_REQ feature. My patch is an example of changes to be done in core.c without adding new functions.

> 
> We would like to have a generic capability to handle additional events,
> such as HPI/stop flow, in addition to the NEW_REQUEST notification.
> Therefore, an event mechanism seems to be a better design than completion.
> 
> I've looked at SDIO code and from what I can understand, right now SDIO
> is not using async request mechanism and works from 'wait_for_cmd()`
> level. This means that such work as exposing MMC core API's is major
> change and definitely should be separate design and implementation
> effort, while my current patch right now will fix mmc thread behavior
> and give better performance for upper layers.
You are right. There is no support in the SDIO implementation for pre/post/async feature. Still I had it in mind design the "struct mmc_async". It's possible to re-use the mmc core parts in order to utilize this support in the SDIO case. I'm not saying we should design for SDIO but at least keep the API in a way that it's potential usable from SDIO too. The API of core.c (start/wait of requests) should make sense without the existence of MMC block driver (block/queue).

One way to test the design is to add a test in mmc_test.c for penging NEW_REQ. In mmc_test.c there is not block.c or queue.c.
If the test case if simple to write in mmc_test.c it's means that the API is on a good level.


BR
Per

> 
> 
>>
>> If the API is not implemented the mmc core shall simply ignore it.
>>
>>> We want to expand this event based mechanism (unblock mmc thread from
>>> waiting for the running request) by adding another reason/event. This is
>>> URGENT_REQUEST event. When block layer has Urgent request to execute, it
>>> notifies mmc layer (similar to mmc_request() notification) and mmc layer
>>> handles this urgent request notification by correctly stopping running
>>> request, re-inserting back to I/O scheduler all not yet performed parts
>>> and fetching & starting the urgent request.
>>> The reasoning and general idea are documented together with "New event"
>>> and will be submitted soon. The patch itself will be submitted in a week
>>> or so.
>> I have not consider use of HPI in my proposal. If the NEW_REQ is first in line in the mmc queue it will be fetched as the NEW_REQ.
>> Then the current request must be interrupted and returned to mmc-queue or io-scheduler.
>>
>> I don't see a direct contradiction between the two designs.
>>
>> The main point is to make the NEW_REQ API more simple clear.
>> My example is just an example.
>>
>>>
>>> As for our discussion - to handle both events mmc layer should be
>>> capable to be awaken not only in case of no mmc_prefetch_pending() (in
>>> your patch terms), but also when we have perfect case of async requests:
>>> one running on the bus and second prepared and waiting for the first.
>>>
>>> I think, that in case SDIO protocol driver need such functionality as
>>> NEW_REQUEST, one need to implement notification to the mmc layer similar
>>> to mmc_request() code in my patch.
>> SDIO needs to implement the new NEW_REQ API, but the API needs to be clean.
>>
>> BR
>> Per
>>
> Best regards,
> --
> Konstantin Dorfman,
> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
> Inc. is a member of Code Aurora Forum,
> hosted by The Linux Foundation
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-15 16:38                   ` Per Förlin
@ 2012-11-19  9:48                     ` Konstantin Dorfman
  2012-11-19 14:32                       ` Per Förlin
  0 siblings, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-11-19  9:48 UTC (permalink / raw)
  To: Per Förlin; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

Hello Per,

Thank you for giving me an example, below I'm trying to explain some
logic issues of it and asking you some questions about your vision of
the patch.

On 11/15/2012 06:38 PM, Per Förlin wrote:
> On 11/14/2012 04:15 PM, Konstantin Dorfman wrote:
>> Hello Per,
>>
>> On 11/13/2012 11:10 PM, Per Forlin wrote:
>>> On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
>>> <kdorfman@codeaurora.org> wrote:
>>>> Hello,
>>>>
>>>> On 10/29/2012 11:40 PM, Per Forlin wrote:
>>>>> Hi,
>>>>>
>>>>> I would like to move the focus of my concerns from root cause analysis
>>>>> to the actual patch,
>>>>> My first reflection is that the patch is relatively complex and some
>>>>> of the code looks duplicated.
>>>>> Would it be possible to simplify it and re-use  the current execution flow.
>>>>>
>>>>> Is the following flow feasible?
>>>>>
>>>>> in mmc_start_req():
>>>>> --------------
>>>>> if (rqc == NULL && not_resend)
>>>>>   wait_for_both_mmc_and_arrival_of_new_req
>>>>> else
>>>>>   wait_only_for_mmc
>>>>>
>>>>> if (arrival_of_new_req) {
>>>>>    set flag to indicate fetch-new_req
>>>>>   return NULL;
>>>>> }
>>>>> -----------------
>>>>>
>>>>> in queue.c:
>>>>> if (fetch-new_req)
>>>>>   don't overwrite previous req.
>>>>>
>>>>> BR
>>>>> Per
>>>>
>>>> You are talking about using both waiting mechanisms, old (wait on
>>>> completion) and new - wait_event_interruptible()? But how done()
>>>> callback, called on host controller irq context, will differentiate
>>>> which one is relevant for the request?
>>>>
>>>> I think it is better to use single wait point for mmc thread.
>>>
>>> I have sketch a patch to better explain my point. It's not tested it
>>> barely compiles :)
>>> The patch tries to introduce your feature and still keep the same code
>>> path. And it exposes an API that could be used by SDIO as well.
>>> The intention of my sketch patch is only to explain what I tried to
>>> visualize in the pseudo code previously in this thread.
>>> The out come of your final patch should be documented here I think:
>>> Documentation/mmc/mmc-async-req.txt
>> This document is ready, attaching it to this mail and will be included
>> in next version of the patch (or RESEND).
>>>
>>> Here follows my patch sketch:
>>> ........................................................
>>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>>> index e360a97..08036a1 100644
>>> --- a/drivers/mmc/card/queue.c
>>> +++ b/drivers/mmc/card/queue.c
>>> @@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
>>>               spin_unlock_irq(q->queue_lock);
>>>
>>>               if (req || mq->mqrq_prev->req) {
>>> +                     if (!req)
>>> +                             mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
>>>                       set_current_state(TASK_RUNNING);
>>>                       mq->issue_fn(mq, req);
>>>               } else {
>>> @@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
>>>               }
>>>
>>>               /* Current request becomes previous request and vice versa. */
>>> -             mq->mqrq_prev->brq.mrq.data = NULL;
>>> -             mq->mqrq_prev->req = NULL;
>>> -             tmp = mq->mqrq_prev;
>>> -             mq->mqrq_prev = mq->mqrq_cur;
>>> -             mq->mqrq_cur = tmp;
>>> +             if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
>>> +                     mq->mqrq_prev->brq.mrq.data = NULL;
>>> +                     mq->mqrq_prev->req = NULL;
>>> +                     tmp = mq->mqrq_prev;
>>> +                     mq->mqrq_prev = mq->mqrq_cur;
>>> +                     mq->mqrq_cur = tmp;
>>> +             }
>>>       } while (1);
>>>       up(&mq->thread_sem);
>>>
>>> @@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
>>>               return;
>>>       }
>>>
>>> +     if (mq->prefetch_enable) {
>>> +             spin_lock(&mq->prefetch_lock);
>>> +             if (mq->prefetch_completion)
>>> +                     complete(mq->prefetch_completion);
Since mq->prefetch_completion init happens only in core.c after
mmc_start_req(NULL), we can miss all new request notifications coming
from fetching NULL request until then.
>>> +             mq->prefetch_pending = true;
>>> +             spin_unlock(&mq->prefetch_lock);
>>> +     }
>>> +
>>>       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
>>>               wake_up_process(mq->thread);
>>>  }
>>>
>>> +static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
>>> +{
>>> +     struct mmc_queue *mq =
>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>> +
>>> +     spin_lock(&mq->prefetch_lock);
>>> +     mq->prefetch_completion = compl;
>>> +     if (mq->prefetch_pending)
>>> +             complete(mq->prefetch_completion);
>>> +     spin_unlock(&mq->prefetch_lock);
>>> +}
>>> +
>>> +static void mmc_req_start(struct mmc_async_req *areq, bool enable)
>>> +{
>>> +     struct mmc_queue *mq =
>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>> +     mq->prefetch_enable = enable;
>>> +}
>>> +
>>> +static bool mmc_req_pending(struct mmc_async_req *areq)
>>> +{
>>> +     struct mmc_queue *mq =
>>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>>> +     return mq->prefetch_pending;
>>> +}
>>> +
>>>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
>>>  {
>>>       struct scatterlist *sg;
>>> @@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
>>> mmc_card *card,
>>>       int ret;
>>>       struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
>>>       struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
>>> +     spin_lock_init(&mq->prefetch_lock);
>>> +     mq->prefetch.wait_req_init = mmc_req_init;
>>> +     mq->prefetch.wait_req_start = mmc_req_start;
>>> +     mq->prefetch.wait_req_pending = mmc_req_pending;
>>> +     mqrq_cur->mmc_active.prefetch = &mq->prefetch;
>>> +     mqrq_prev->mmc_active.prefetch = &mq->prefetch;
>>>
>>>       if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
>>>               limit = *mmc_dev(host)->dma_mask;
>>> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
>>> index d2a1eb4..5afd467 100644
>>> --- a/drivers/mmc/card/queue.h
>>> +++ b/drivers/mmc/card/queue.h
>>> @@ -33,6 +33,12 @@ struct mmc_queue {
>>>       struct mmc_queue_req    mqrq[2];
>>>       struct mmc_queue_req    *mqrq_cur;
>>>       struct mmc_queue_req    *mqrq_prev;
>>> +
>>> +     struct mmc_async_prefetch prefetch;
>>> +     spinlock_t              prefetch_lock;
>>> +     struct completion       *prefetch_completion;
>>> +     bool                    prefetch_enable;
>>> +     bool                    prefetch_pending;
>>>  };
>>>
>>>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
>>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
>>> index 9503cab..06fc036 100644
>>> --- a/drivers/mmc/core/core.c
>>> +++ b/drivers/mmc/core/core.c
>>> @@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
>>>               mmc_pre_req(host, areq->mrq, !host->areq);
>>>
>>>       if (host->areq) {
>>> +             if (!areq)
>>> +                     mmc_prefetch_init(host->areq,
>>> +                                       &host->areq->mrq->completion);
>>>               mmc_wait_for_req_done(host, host->areq->mrq);
>>> +             if (!areq) {
>>> +                     mmc_prefetch_start(host->areq, false);
>>> +                     if (mmc_prefetch_pending(host->areq))
>>> +                             return NULL;
In this case, mmc thread may be unblocked because done() arrived for
current request and not because new request notification. In such a case
we would like the done request to be handled before fetching the new
request. In my code is_done_rcv flag used along with is_new_req flag in
order to differentiate the reason for mmc thread awake.

>>> +             }
>>>               err = host->areq->err_check(host->card, host->areq);
>>>       }
>>>
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index 65c64ee..ce5d03f 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -15,6 +15,7 @@
>>>  #include <linux/sched.h>
>>>  #include <linux/device.h>
>>>  #include <linux/fault-inject.h>
>>> +#include <linux/completion.h>
>>>
>>>  #include <linux/mmc/core.h>
>>>  #include <linux/mmc/pm.h>
>>> @@ -140,6 +141,13 @@ struct mmc_host_ops {
>>>
>>>  struct mmc_card;
>>>  struct device;
>>> +struct mmc_async_req;
>>> +
>>> +struct mmc_async_prefetch {
>>> +     void (*wait_req_init)(struct mmc_async_req *, struct completion *);
>>> +     void (*wait_req_start)(struct mmc_async_req *, bool);
>>> +     bool (*wait_req_pending)(struct mmc_async_req *);
>>> +};
>>>
>>>  struct mmc_async_req {
>>>       /* active mmc request */
>>> @@ -149,8 +157,33 @@ struct mmc_async_req {
>>>        * Returns 0 if success otherwise non zero.
>>>        */
>>>       int (*err_check) (struct mmc_card *, struct mmc_async_req *);
>>> +     struct mmc_async_prefetch *prefetch;
>>>  };
>>>
>>> +/* set completion variable, complete == NULL to reset completion */
>>> +static inline void mmc_prefetch_init(struct mmc_async_req *areq,
>>> +                                  struct completion *complete)
>>> +{
>>> +     if (areq->prefetch && areq->prefetch->wait_req_init)
>>> +             areq->prefetch->wait_req_init(areq, complete);
>>> +}
>>> +
>>> +/* enable or disable prefetch feature */
>>> +static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
>>> +{
>>> +     if (areq->prefetch && areq->prefetch->wait_req_start)
>>> +             areq->prefetch->wait_req_start(areq, enable);
>>> +}
>>> +
>>> +/* return true if new request is pending otherwise false */
>>> +static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
>>> +{
>>> +     if (areq->prefetch && areq->prefetch->wait_req_pending)
>>> +             return areq->prefetch->wait_req_pending(areq);
>>> +     else
>>> +             return false;
>>> +}
>>> +
>>>  /**
>>>   * struct mmc_slot - MMC slot functions
>>>   *
>>> ....................................................................
>>>
>>>
>>> BR
>>> Per
>> I understand your motivation and idea for re-structure the patch. It is
>> still not clear for me how exactly mmc thread will be awaken on new
>> request notification in your version, but I have another problem:
>>
> mmc_request_fn() is called and it calls complete(mq->prefetch_completion) which wakes up the current thread.
> My patch is just an example. The intention is to make the patch cleaner. But I may have missed some of the HPI aspects.

Is it the lack of functions wrappers that you are using in your example?
As I understand your example, you mean to implement generic logic on
core/core.c level by using wrapper functions and leave final
implementation for MMC to card/queue.c and for SDIO layer to
card/..sdio.. (I'm not too familiar with sdio protocol implementation).
Well, it is make a lot of sense.

But the devil is in details - there is a lot of work in
mmc_wait_for_data_req_done(), done() callback and also error handling
changes for card/block.c

Do you think, that wait_event() API used not suits the same semantic as
completion API?

We would like to have a generic capability to handle additional events,
such as HPI/stop flow, in addition to the NEW_REQUEST notification.
Therefore, an event mechanism seems to be a better design than completion.

I've looked at SDIO code and from what I can understand, right now SDIO
is not using async request mechanism and works from 'wait_for_cmd()`
level. This means that such work as exposing MMC core API's is major
change and definitely should be separate design and implementation
effort, while my current patch right now will fix mmc thread behavior
and give better performance for upper layers.


> 
> If the API is not implemented the mmc core shall simply ignore it.
> 
>> We want to expand this event based mechanism (unblock mmc thread from
>> waiting for the running request) by adding another reason/event. This is
>> URGENT_REQUEST event. When block layer has Urgent request to execute, it
>> notifies mmc layer (similar to mmc_request() notification) and mmc layer
>> handles this urgent request notification by correctly stopping running
>> request, re-inserting back to I/O scheduler all not yet performed parts
>> and fetching & starting the urgent request.
>> The reasoning and general idea are documented together with "New event"
>> and will be submitted soon. The patch itself will be submitted in a week
>> or so.
> I have not consider use of HPI in my proposal. If the NEW_REQ is first in line in the mmc queue it will be fetched as the NEW_REQ.
> Then the current request must be interrupted and returned to mmc-queue or io-scheduler.
> 
> I don't see a direct contradiction between the two designs.
> 
> The main point is to make the NEW_REQ API more simple clear.
> My example is just an example.
> 
>>
>> As for our discussion - to handle both events mmc layer should be
>> capable to be awaken not only in case of no mmc_prefetch_pending() (in
>> your patch terms), but also when we have perfect case of async requests:
>> one running on the bus and second prepared and waiting for the first.
>>
>> I think, that in case SDIO protocol driver need such functionality as
>> NEW_REQUEST, one need to implement notification to the mmc layer similar
>> to mmc_request() code in my patch.
> SDIO needs to implement the new NEW_REQ API, but the API needs to be clean.
> 
> BR
> Per
> 
Best regards,
-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-14 15:15                 ` Konstantin Dorfman
@ 2012-11-15 16:38                   ` Per Förlin
  2012-11-19  9:48                     ` Konstantin Dorfman
  0 siblings, 1 reply; 25+ messages in thread
From: Per Förlin @ 2012-11-15 16:38 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Forlin, Venkatraman S, cjb, linux-mmc

On 11/14/2012 04:15 PM, Konstantin Dorfman wrote:
> Hello Per,
> 
> On 11/13/2012 11:10 PM, Per Forlin wrote:
>> On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
>> <kdorfman@codeaurora.org> wrote:
>>> Hello,
>>>
>>> On 10/29/2012 11:40 PM, Per Forlin wrote:
>>>> Hi,
>>>>
>>>> I would like to move the focus of my concerns from root cause analysis
>>>> to the actual patch,
>>>> My first reflection is that the patch is relatively complex and some
>>>> of the code looks duplicated.
>>>> Would it be possible to simplify it and re-use  the current execution flow.
>>>>
>>>> Is the following flow feasible?
>>>>
>>>> in mmc_start_req():
>>>> --------------
>>>> if (rqc == NULL && not_resend)
>>>>   wait_for_both_mmc_and_arrival_of_new_req
>>>> else
>>>>   wait_only_for_mmc
>>>>
>>>> if (arrival_of_new_req) {
>>>>    set flag to indicate fetch-new_req
>>>>   return NULL;
>>>> }
>>>> -----------------
>>>>
>>>> in queue.c:
>>>> if (fetch-new_req)
>>>>   don't overwrite previous req.
>>>>
>>>> BR
>>>> Per
>>>
>>> You are talking about using both waiting mechanisms, old (wait on
>>> completion) and new - wait_event_interruptible()? But how done()
>>> callback, called on host controller irq context, will differentiate
>>> which one is relevant for the request?
>>>
>>> I think it is better to use single wait point for mmc thread.
>>
>> I have sketch a patch to better explain my point. It's not tested it
>> barely compiles :)
>> The patch tries to introduce your feature and still keep the same code
>> path. And it exposes an API that could be used by SDIO as well.
>> The intention of my sketch patch is only to explain what I tried to
>> visualize in the pseudo code previously in this thread.
>> The out come of your final patch should be documented here I think:
>> Documentation/mmc/mmc-async-req.txt
> This document is ready, attaching it to this mail and will be included
> in next version of the patch (or RESEND).
>>
>> Here follows my patch sketch:
>> ........................................................
>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>> index e360a97..08036a1 100644
>> --- a/drivers/mmc/card/queue.c
>> +++ b/drivers/mmc/card/queue.c
>> @@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
>>               spin_unlock_irq(q->queue_lock);
>>
>>               if (req || mq->mqrq_prev->req) {
>> +                     if (!req)
>> +                             mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
>>                       set_current_state(TASK_RUNNING);
>>                       mq->issue_fn(mq, req);
>>               } else {
>> @@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
>>               }
>>
>>               /* Current request becomes previous request and vice versa. */
>> -             mq->mqrq_prev->brq.mrq.data = NULL;
>> -             mq->mqrq_prev->req = NULL;
>> -             tmp = mq->mqrq_prev;
>> -             mq->mqrq_prev = mq->mqrq_cur;
>> -             mq->mqrq_cur = tmp;
>> +             if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
>> +                     mq->mqrq_prev->brq.mrq.data = NULL;
>> +                     mq->mqrq_prev->req = NULL;
>> +                     tmp = mq->mqrq_prev;
>> +                     mq->mqrq_prev = mq->mqrq_cur;
>> +                     mq->mqrq_cur = tmp;
>> +             }
>>       } while (1);
>>       up(&mq->thread_sem);
>>
>> @@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
>>               return;
>>       }
>>
>> +     if (mq->prefetch_enable) {
>> +             spin_lock(&mq->prefetch_lock);
>> +             if (mq->prefetch_completion)
>> +                     complete(mq->prefetch_completion);
>> +             mq->prefetch_pending = true;
>> +             spin_unlock(&mq->prefetch_lock);
>> +     }
>> +
>>       if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
>>               wake_up_process(mq->thread);
>>  }
>>
>> +static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
>> +{
>> +     struct mmc_queue *mq =
>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>> +
>> +     spin_lock(&mq->prefetch_lock);
>> +     mq->prefetch_completion = compl;
>> +     if (mq->prefetch_pending)
>> +             complete(mq->prefetch_completion);
>> +     spin_unlock(&mq->prefetch_lock);
>> +}
>> +
>> +static void mmc_req_start(struct mmc_async_req *areq, bool enable)
>> +{
>> +     struct mmc_queue *mq =
>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>> +     mq->prefetch_enable = enable;
>> +}
>> +
>> +static bool mmc_req_pending(struct mmc_async_req *areq)
>> +{
>> +     struct mmc_queue *mq =
>> +             container_of(areq->prefetch, struct mmc_queue, prefetch);
>> +     return mq->prefetch_pending;
>> +}
>> +
>>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
>>  {
>>       struct scatterlist *sg;
>> @@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
>> mmc_card *card,
>>       int ret;
>>       struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
>>       struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
>> +     spin_lock_init(&mq->prefetch_lock);
>> +     mq->prefetch.wait_req_init = mmc_req_init;
>> +     mq->prefetch.wait_req_start = mmc_req_start;
>> +     mq->prefetch.wait_req_pending = mmc_req_pending;
>> +     mqrq_cur->mmc_active.prefetch = &mq->prefetch;
>> +     mqrq_prev->mmc_active.prefetch = &mq->prefetch;
>>
>>       if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
>>               limit = *mmc_dev(host)->dma_mask;
>> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
>> index d2a1eb4..5afd467 100644
>> --- a/drivers/mmc/card/queue.h
>> +++ b/drivers/mmc/card/queue.h
>> @@ -33,6 +33,12 @@ struct mmc_queue {
>>       struct mmc_queue_req    mqrq[2];
>>       struct mmc_queue_req    *mqrq_cur;
>>       struct mmc_queue_req    *mqrq_prev;
>> +
>> +     struct mmc_async_prefetch prefetch;
>> +     spinlock_t              prefetch_lock;
>> +     struct completion       *prefetch_completion;
>> +     bool                    prefetch_enable;
>> +     bool                    prefetch_pending;
>>  };
>>
>>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
>> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
>> index 9503cab..06fc036 100644
>> --- a/drivers/mmc/core/core.c
>> +++ b/drivers/mmc/core/core.c
>> @@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
>>               mmc_pre_req(host, areq->mrq, !host->areq);
>>
>>       if (host->areq) {
>> +             if (!areq)
>> +                     mmc_prefetch_init(host->areq,
>> +                                       &host->areq->mrq->completion);
>>               mmc_wait_for_req_done(host, host->areq->mrq);
>> +             if (!areq) {
>> +                     mmc_prefetch_start(host->areq, false);
>> +                     if (mmc_prefetch_pending(host->areq))
>> +                             return NULL;
>> +             }
>>               err = host->areq->err_check(host->card, host->areq);
>>       }
>>
>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>> index 65c64ee..ce5d03f 100644
>> --- a/include/linux/mmc/host.h
>> +++ b/include/linux/mmc/host.h
>> @@ -15,6 +15,7 @@
>>  #include <linux/sched.h>
>>  #include <linux/device.h>
>>  #include <linux/fault-inject.h>
>> +#include <linux/completion.h>
>>
>>  #include <linux/mmc/core.h>
>>  #include <linux/mmc/pm.h>
>> @@ -140,6 +141,13 @@ struct mmc_host_ops {
>>
>>  struct mmc_card;
>>  struct device;
>> +struct mmc_async_req;
>> +
>> +struct mmc_async_prefetch {
>> +     void (*wait_req_init)(struct mmc_async_req *, struct completion *);
>> +     void (*wait_req_start)(struct mmc_async_req *, bool);
>> +     bool (*wait_req_pending)(struct mmc_async_req *);
>> +};
>>
>>  struct mmc_async_req {
>>       /* active mmc request */
>> @@ -149,8 +157,33 @@ struct mmc_async_req {
>>        * Returns 0 if success otherwise non zero.
>>        */
>>       int (*err_check) (struct mmc_card *, struct mmc_async_req *);
>> +     struct mmc_async_prefetch *prefetch;
>>  };
>>
>> +/* set completion variable, complete == NULL to reset completion */
>> +static inline void mmc_prefetch_init(struct mmc_async_req *areq,
>> +                                  struct completion *complete)
>> +{
>> +     if (areq->prefetch && areq->prefetch->wait_req_init)
>> +             areq->prefetch->wait_req_init(areq, complete);
>> +}
>> +
>> +/* enable or disable prefetch feature */
>> +static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
>> +{
>> +     if (areq->prefetch && areq->prefetch->wait_req_start)
>> +             areq->prefetch->wait_req_start(areq, enable);
>> +}
>> +
>> +/* return true if new request is pending otherwise false */
>> +static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
>> +{
>> +     if (areq->prefetch && areq->prefetch->wait_req_pending)
>> +             return areq->prefetch->wait_req_pending(areq);
>> +     else
>> +             return false;
>> +}
>> +
>>  /**
>>   * struct mmc_slot - MMC slot functions
>>   *
>> ....................................................................
>>
>>
>> BR
>> Per
> I understand your motivation and idea for re-structure the patch. It is
> still not clear for me how exactly mmc thread will be awaken on new
> request notification in your version, but I have another problem:
> 
mmc_request_fn() is called and it calls complete(mq->prefetch_completion) which wakes up the current thread.
My patch is just an example. The intention is to make the patch cleaner. But I may have missed some of the HPI aspects.

If the API is not implemented the mmc core shall simply ignore it.

> We want to expand this event based mechanism (unblock mmc thread from
> waiting for the running request) by adding another reason/event. This is
> URGENT_REQUEST event. When block layer has Urgent request to execute, it
> notifies mmc layer (similar to mmc_request() notification) and mmc layer
> handles this urgent request notification by correctly stopping running
> request, re-inserting back to I/O scheduler all not yet performed parts
> and fetching & starting the urgent request.
> The reasoning and general idea are documented together with "New event"
> and will be submitted soon. The patch itself will be submitted in a week
> or so.
I have not consider use of HPI in my proposal. If the NEW_REQ is first in line in the mmc queue it will be fetched as the NEW_REQ.
Then the current request must be interrupted and returned to mmc-queue or io-scheduler.

I don't see a direct contradiction between the two designs.

The main point is to make the NEW_REQ API more simple clear.
My example is just an example.

> 
> As for our discussion - to handle both events mmc layer should be
> capable to be awaken not only in case of no mmc_prefetch_pending() (in
> your patch terms), but also when we have perfect case of async requests:
> one running on the bus and second prepared and waiting for the first.
> 
> I think, that in case SDIO protocol driver need such functionality as
> NEW_REQUEST, one need to implement notification to the mmc layer similar
> to mmc_request() code in my patch.
SDIO needs to implement the new NEW_REQ API, but the API needs to be clean.

BR
Per


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-11-13 21:10               ` Per Forlin
@ 2012-11-14 15:15                 ` Konstantin Dorfman
  2012-11-15 16:38                   ` Per Förlin
  0 siblings, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-11-14 15:15 UTC (permalink / raw)
  To: Per Forlin; +Cc: Venkatraman S, cjb, linux-mmc, per.forlin

[-- Attachment #1: Type: text/plain, Size: 9521 bytes --]

Hello Per,

On 11/13/2012 11:10 PM, Per Forlin wrote:
> On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
> <kdorfman@codeaurora.org> wrote:
>> Hello,
>>
>> On 10/29/2012 11:40 PM, Per Forlin wrote:
>>> Hi,
>>>
>>> I would like to move the focus of my concerns from root cause analysis
>>> to the actual patch,
>>> My first reflection is that the patch is relatively complex and some
>>> of the code looks duplicated.
>>> Would it be possible to simplify it and re-use  the current execution flow.
>>>
>>> Is the following flow feasible?
>>>
>>> in mmc_start_req():
>>> --------------
>>> if (rqc == NULL && not_resend)
>>>   wait_for_both_mmc_and_arrival_of_new_req
>>> else
>>>   wait_only_for_mmc
>>>
>>> if (arrival_of_new_req) {
>>>    set flag to indicate fetch-new_req
>>>   return NULL;
>>> }
>>> -----------------
>>>
>>> in queue.c:
>>> if (fetch-new_req)
>>>   don't overwrite previous req.
>>>
>>> BR
>>> Per
>>
>> You are talking about using both waiting mechanisms, old (wait on
>> completion) and new - wait_event_interruptible()? But how done()
>> callback, called on host controller irq context, will differentiate
>> which one is relevant for the request?
>>
>> I think it is better to use single wait point for mmc thread.
> 
> I have sketch a patch to better explain my point. It's not tested it
> barely compiles :)
> The patch tries to introduce your feature and still keep the same code
> path. And it exposes an API that could be used by SDIO as well.
> The intention of my sketch patch is only to explain what I tried to
> visualize in the pseudo code previously in this thread.
> The out come of your final patch should be documented here I think:
> Documentation/mmc/mmc-async-req.txt
This document is ready, attaching it to this mail and will be included
in next version of the patch (or RESEND).
> 
> Here follows my patch sketch:
> ........................................................
> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
> index e360a97..08036a1 100644
> --- a/drivers/mmc/card/queue.c
> +++ b/drivers/mmc/card/queue.c
> @@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
>  		spin_unlock_irq(q->queue_lock);
> 
>  		if (req || mq->mqrq_prev->req) {
> +			if (!req)
> +				mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
>  			set_current_state(TASK_RUNNING);
>  			mq->issue_fn(mq, req);
>  		} else {
> @@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
>  		}
> 
>  		/* Current request becomes previous request and vice versa. */
> -		mq->mqrq_prev->brq.mrq.data = NULL;
> -		mq->mqrq_prev->req = NULL;
> -		tmp = mq->mqrq_prev;
> -		mq->mqrq_prev = mq->mqrq_cur;
> -		mq->mqrq_cur = tmp;
> +		if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
> +			mq->mqrq_prev->brq.mrq.data = NULL;
> +			mq->mqrq_prev->req = NULL;
> +			tmp = mq->mqrq_prev;
> +			mq->mqrq_prev = mq->mqrq_cur;
> +			mq->mqrq_cur = tmp;
> +		}
>  	} while (1);
>  	up(&mq->thread_sem);
> 
> @@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
>  		return;
>  	}
> 
> +	if (mq->prefetch_enable) {
> +		spin_lock(&mq->prefetch_lock);
> +		if (mq->prefetch_completion)
> +			complete(mq->prefetch_completion);
> +		mq->prefetch_pending = true;
> +		spin_unlock(&mq->prefetch_lock);
> +	}
> +
>  	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
>  		wake_up_process(mq->thread);
>  }
> 
> +static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
> +{
> +	struct mmc_queue *mq =
> +		container_of(areq->prefetch, struct mmc_queue, prefetch);
> +
> +	spin_lock(&mq->prefetch_lock);
> +	mq->prefetch_completion = compl;
> +	if (mq->prefetch_pending)
> +		complete(mq->prefetch_completion);
> +	spin_unlock(&mq->prefetch_lock);
> +}
> +
> +static void mmc_req_start(struct mmc_async_req *areq, bool enable)
> +{
> +	struct mmc_queue *mq =
> +		container_of(areq->prefetch, struct mmc_queue, prefetch);
> +	mq->prefetch_enable = enable;
> +}
> +
> +static bool mmc_req_pending(struct mmc_async_req *areq)
> +{
> +	struct mmc_queue *mq =
> +		container_of(areq->prefetch, struct mmc_queue, prefetch);
> +	return mq->prefetch_pending;
> +}
> +
>  static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
>  {
>  	struct scatterlist *sg;
> @@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
> mmc_card *card,
>  	int ret;
>  	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
>  	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
> +	spin_lock_init(&mq->prefetch_lock);
> +	mq->prefetch.wait_req_init = mmc_req_init;
> +	mq->prefetch.wait_req_start = mmc_req_start;
> +	mq->prefetch.wait_req_pending = mmc_req_pending;
> +	mqrq_cur->mmc_active.prefetch = &mq->prefetch;
> +	mqrq_prev->mmc_active.prefetch = &mq->prefetch;
> 
>  	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
>  		limit = *mmc_dev(host)->dma_mask;
> diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
> index d2a1eb4..5afd467 100644
> --- a/drivers/mmc/card/queue.h
> +++ b/drivers/mmc/card/queue.h
> @@ -33,6 +33,12 @@ struct mmc_queue {
>  	struct mmc_queue_req	mqrq[2];
>  	struct mmc_queue_req	*mqrq_cur;
>  	struct mmc_queue_req	*mqrq_prev;
> +
> +	struct mmc_async_prefetch prefetch;
> +	spinlock_t		prefetch_lock;
> +	struct completion	*prefetch_completion;
> +	bool			prefetch_enable;
> +	bool			prefetch_pending;
>  };
> 
>  extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
> diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
> index 9503cab..06fc036 100644
> --- a/drivers/mmc/core/core.c
> +++ b/drivers/mmc/core/core.c
> @@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
>  		mmc_pre_req(host, areq->mrq, !host->areq);
> 
>  	if (host->areq) {
> +		if (!areq)
> +			mmc_prefetch_init(host->areq,
> +					  &host->areq->mrq->completion);
>  		mmc_wait_for_req_done(host, host->areq->mrq);
> +		if (!areq) {
> +			mmc_prefetch_start(host->areq, false);
> +			if (mmc_prefetch_pending(host->areq))
> +				return NULL;
> +		}
>  		err = host->areq->err_check(host->card, host->areq);
>  	}
> 
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index 65c64ee..ce5d03f 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -15,6 +15,7 @@
>  #include <linux/sched.h>
>  #include <linux/device.h>
>  #include <linux/fault-inject.h>
> +#include <linux/completion.h>
> 
>  #include <linux/mmc/core.h>
>  #include <linux/mmc/pm.h>
> @@ -140,6 +141,13 @@ struct mmc_host_ops {
> 
>  struct mmc_card;
>  struct device;
> +struct mmc_async_req;
> +
> +struct mmc_async_prefetch {
> +	void (*wait_req_init)(struct mmc_async_req *, struct completion *);
> +	void (*wait_req_start)(struct mmc_async_req *, bool);
> +	bool (*wait_req_pending)(struct mmc_async_req *);
> +};
> 
>  struct mmc_async_req {
>  	/* active mmc request */
> @@ -149,8 +157,33 @@ struct mmc_async_req {
>  	 * Returns 0 if success otherwise non zero.
>  	 */
>  	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
> +	struct mmc_async_prefetch *prefetch;
>  };
> 
> +/* set completion variable, complete == NULL to reset completion */
> +static inline void mmc_prefetch_init(struct mmc_async_req *areq,
> +				     struct completion *complete)
> +{
> +	if (areq->prefetch && areq->prefetch->wait_req_init)
> +		areq->prefetch->wait_req_init(areq, complete);
> +}
> +
> +/* enable or disable prefetch feature */
> +static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
> +{
> +	if (areq->prefetch && areq->prefetch->wait_req_start)
> +		areq->prefetch->wait_req_start(areq, enable);
> +}
> +
> +/* return true if new request is pending otherwise false */
> +static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
> +{
> +	if (areq->prefetch && areq->prefetch->wait_req_pending)
> +		return areq->prefetch->wait_req_pending(areq);
> +	else
> +		return false;
> +}
> +
>  /**
>   * struct mmc_slot - MMC slot functions
>   *
> ....................................................................
> 
> 
> BR
> Per
I understand your motivation and idea for re-structure the patch. It is
still not clear for me how exactly mmc thread will be awaken on new
request notification in your version, but I have another problem:

We want to expand this event based mechanism (unblock mmc thread from
waiting for the running request) by adding another reason/event. This is
URGENT_REQUEST event. When block layer has Urgent request to execute, it
notifies mmc layer (similar to mmc_request() notification) and mmc layer
handles this urgent request notification by correctly stopping running
request, re-inserting back to I/O scheduler all not yet performed parts
and fetching & starting the urgent request.
The reasoning and general idea are documented together with "New event"
and will be submitted soon. The patch itself will be submitted in a week
or so.

As for our discussion - to handle both events mmc layer should be
capable to be awaken not only in case of no mmc_prefetch_pending() (in
your patch terms), but also when we have perfect case of async requests:
one running on the bus and second prepared and waiting for the first.

I think, that in case SDIO protocol driver need such functionality as
NEW_REQUEST, one need to implement notification to the mmc layer similar
to mmc_request() code in my patch.

Does this make sense?


-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

[-- Attachment #2: mmc-wakeup.txt --]
[-- Type: text/plain, Size: 4893 bytes --]

Introduction
============
The file system adds block requests to the block device queue
asynchronously without waiting for completion. The MMC framework takes
these block request synchronously one by one and pushes them to the host
driver. When the host driver has completed the request, the framework
will pull out a new block request from the block request queue. For each
block request the MMC framework converts the generic request to a mmc
request. The host driver takes the mmc request and does necessary
preparations to process the request. For non-DMA host driver the
necessary pre and post preparations are insignificant, but for DMA the
cache maintenance is significant. Adding non-blocking request to the
MMC framework (between block layer and host driver) will make it
possible to prepare caches for one request while another request is
being processed by the MMC controller.

[Referenced from the Rational section of
https://wiki.linaro.org/WorkingGroups/Kernel/Specs/
StoragePerfMMC-async-req]


Software description
====================
In case of sequential read operations async request mechanism isn't
functional, due to the read ahead mechanism: When "mmcqd" (MMC thread)
reports on completion of a request, there should be a context switch to
allow the insertion of the next read ahead BIOs to the block layer.
Since the "mmcqd" tries to fetch another request immediately after the
completion of the previous request - it gets NULL and begins waiting for
the completion of the previous request.  This wait on completion gives
the FS (File System) the opportunity to insert the next request but the
MMC layer is already blocked on the previous request completion and is
not aware of the new request waiting to be fetched. As a result of this
missed oportunity to start cashe prepeare for the new request, while
previous request is being serviced. Also it can't look to see if the new
request is urgent and the the previous request is not interrupted
immediately.

A solution of the above would be to add a mechanism that allows to wake
up the MMC layer immediately with new block request when it is available
for fetch.

Design
======

The main idea is to replace the blocking wait for a completion with an
event driven mechanism and add a set of events that will unblock the MMC
layer context from waiting on host driver complete.
The new events to be added are:
1. NEW_REQUEST:
	When the block layer notifies the MMC layer on a new request
	(mmc_request() callback), and the MMC layer is waiting on a
	previous request completion and the current request is NULL. In
	such a case the NEW_REQUEST event will be triggered to wakeup
	the waiting thread.  MMC layer will then fetch the new request
	and after its preparation will go back to waiting on the
	completion of previous request.

2. URGENT_REQUEST:
	In order to decrease a latency of a prioritized requests (such
	as READ requests) we might want to stop the transmission of a
	"low priority" request (such as WRITE) in order to handle the
	"high priority one".  The urgency of the request is decided by
	the block layer I/O scheduler.  When the block layer notifies
	the MMC layer of an urgent request and the MMC layer is blocked
	it will be woken up. The decision on whether to stop an ongoing
	transfer is taken according to several parameters, one of them
	being the number of bytes transferred though host controller so
	far. The stopped request (and next prepared request in case it
	is exists) re-inserted back to the I/O scheduler to be handled
	after the completion of the urgent request.
	When the decision taken was not to stop the ongoing transfer,
	MMC context will wait for normal completion of the ongoing
	transfer, then already prepared next request (if it exists)
	will be re-inserted back into block layer and the urgent request
	fetched.
	URGENT_REQUEST handling is possible only when host controller
	driver supports stop_request() API, otherwise
	mmc_urgent_request() notification callback will not be
	registered into block layer.

Performance
===========
With the new event mechanism the  READ throughput is improved by 16%.
With the Urgent-Request Event (and ROW  scheduler) the   READ latency is
decreased by 41%.

Known issues
============
As an performance metric of NEW_REQUEST and URGENT_REQUEST flows we are
using average and worst case latency between:
1) block layer dispatches the request
2) MMC layer fetches the request
This latency should be minimized to achieve best overall performance.
Right now the latency changes due to bus speed, CPU cores and other
changes depending on specific hardware and software configuration.

To do
=====
For URGENT_REQUEST implementation in MMC layer, decision heuristic to
stop ongoing transaction may be more accurate and based not on byte
count transferred by host controller to the card, but on time needed
by MMC card to perform the request.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-30 12:19             ` Konstantin Dorfman
  2012-10-30 19:57               ` Per Forlin
@ 2012-11-13 21:10               ` Per Forlin
  2012-11-14 15:15                 ` Konstantin Dorfman
  1 sibling, 1 reply; 25+ messages in thread
From: Per Forlin @ 2012-11-13 21:10 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Venkatraman S, cjb, linux-mmc, per.forlin

On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
<kdorfman@codeaurora.org> wrote:
> Hello,
>
> On 10/29/2012 11:40 PM, Per Forlin wrote:
>> Hi,
>>
>> I would like to move the focus of my concerns from root cause analysis
>> to the actual patch,
>> My first reflection is that the patch is relatively complex and some
>> of the code looks duplicated.
>> Would it be possible to simplify it and re-use  the current execution flow.
>>
>> Is the following flow feasible?
>>
>> in mmc_start_req():
>> --------------
>> if (rqc == NULL && not_resend)
>>   wait_for_both_mmc_and_arrival_of_new_req
>> else
>>   wait_only_for_mmc
>>
>> if (arrival_of_new_req) {
>>    set flag to indicate fetch-new_req
>>   return NULL;
>> }
>> -----------------
>>
>> in queue.c:
>> if (fetch-new_req)
>>   don't overwrite previous req.
>>
>> BR
>> Per
>
> You are talking about using both waiting mechanisms, old (wait on
> completion) and new - wait_event_interruptible()? But how done()
> callback, called on host controller irq context, will differentiate
> which one is relevant for the request?
>
> I think it is better to use single wait point for mmc thread.

I have sketch a patch to better explain my point. It's not tested it
barely compiles :)
The patch tries to introduce your feature and still keep the same code
path. And it exposes an API that could be used by SDIO as well.
The intention of my sketch patch is only to explain what I tried to
visualize in the pseudo code previously in this thread.
The out come of your final patch should be documented here I think:
Documentation/mmc/mmc-async-req.txt

Here follows my patch sketch:
........................................................
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index e360a97..08036a1 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -66,6 +66,8 @@ static int mmc_queue_thread(void *d)
 		spin_unlock_irq(q->queue_lock);

 		if (req || mq->mqrq_prev->req) {
+			if (!req)
+				mmc_prefetch_start(&mq->mqrq_prev->mmc_active, true);
 			set_current_state(TASK_RUNNING);
 			mq->issue_fn(mq, req);
 		} else {
@@ -79,11 +81,13 @@ static int mmc_queue_thread(void *d)
 		}

 		/* Current request becomes previous request and vice versa. */
-		mq->mqrq_prev->brq.mrq.data = NULL;
-		mq->mqrq_prev->req = NULL;
-		tmp = mq->mqrq_prev;
-		mq->mqrq_prev = mq->mqrq_cur;
-		mq->mqrq_cur = tmp;
+		if (!mmc_prefetch_pending(&mq->mqrq_prev->mmc_active)) {
+			mq->mqrq_prev->brq.mrq.data = NULL;
+			mq->mqrq_prev->req = NULL;
+			tmp = mq->mqrq_prev;
+			mq->mqrq_prev = mq->mqrq_cur;
+			mq->mqrq_cur = tmp;
+		}
 	} while (1);
 	up(&mq->thread_sem);

@@ -109,10 +113,44 @@ static void mmc_request_fn(struct request_queue *q)
 		return;
 	}

+	if (mq->prefetch_enable) {
+		spin_lock(&mq->prefetch_lock);
+		if (mq->prefetch_completion)
+			complete(mq->prefetch_completion);
+		mq->prefetch_pending = true;
+		spin_unlock(&mq->prefetch_lock);
+	}
+
 	if (!mq->mqrq_cur->req && !mq->mqrq_prev->req)
 		wake_up_process(mq->thread);
 }

+static void mmc_req_init(struct mmc_async_req *areq, struct completion *compl)
+{
+	struct mmc_queue *mq =
+		container_of(areq->prefetch, struct mmc_queue, prefetch);
+
+	spin_lock(&mq->prefetch_lock);
+	mq->prefetch_completion = compl;
+	if (mq->prefetch_pending)
+		complete(mq->prefetch_completion);
+	spin_unlock(&mq->prefetch_lock);
+}
+
+static void mmc_req_start(struct mmc_async_req *areq, bool enable)
+{
+	struct mmc_queue *mq =
+		container_of(areq->prefetch, struct mmc_queue, prefetch);
+	mq->prefetch_enable = enable;
+}
+
+static bool mmc_req_pending(struct mmc_async_req *areq)
+{
+	struct mmc_queue *mq =
+		container_of(areq->prefetch, struct mmc_queue, prefetch);
+	return mq->prefetch_pending;
+}
+
 static struct scatterlist *mmc_alloc_sg(int sg_len, int *err)
 {
 	struct scatterlist *sg;
@@ -166,6 +204,12 @@ int mmc_init_queue(struct mmc_queue *mq, struct
mmc_card *card,
 	int ret;
 	struct mmc_queue_req *mqrq_cur = &mq->mqrq[0];
 	struct mmc_queue_req *mqrq_prev = &mq->mqrq[1];
+	spin_lock_init(&mq->prefetch_lock);
+	mq->prefetch.wait_req_init = mmc_req_init;
+	mq->prefetch.wait_req_start = mmc_req_start;
+	mq->prefetch.wait_req_pending = mmc_req_pending;
+	mqrq_cur->mmc_active.prefetch = &mq->prefetch;
+	mqrq_prev->mmc_active.prefetch = &mq->prefetch;

 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
 		limit = *mmc_dev(host)->dma_mask;
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index d2a1eb4..5afd467 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -33,6 +33,12 @@ struct mmc_queue {
 	struct mmc_queue_req	mqrq[2];
 	struct mmc_queue_req	*mqrq_cur;
 	struct mmc_queue_req	*mqrq_prev;
+
+	struct mmc_async_prefetch prefetch;
+	spinlock_t		prefetch_lock;
+	struct completion	*prefetch_completion;
+	bool			prefetch_enable;
+	bool			prefetch_pending;
 };

 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 9503cab..06fc036 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -352,7 +352,15 @@ struct mmc_async_req *mmc_start_req(struct mmc_host *host,
 		mmc_pre_req(host, areq->mrq, !host->areq);

 	if (host->areq) {
+		if (!areq)
+			mmc_prefetch_init(host->areq,
+					  &host->areq->mrq->completion);
 		mmc_wait_for_req_done(host, host->areq->mrq);
+		if (!areq) {
+			mmc_prefetch_start(host->areq, false);
+			if (mmc_prefetch_pending(host->areq))
+				return NULL;
+		}
 		err = host->areq->err_check(host->card, host->areq);
 	}

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 65c64ee..ce5d03f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,6 +15,7 @@
 #include <linux/sched.h>
 #include <linux/device.h>
 #include <linux/fault-inject.h>
+#include <linux/completion.h>

 #include <linux/mmc/core.h>
 #include <linux/mmc/pm.h>
@@ -140,6 +141,13 @@ struct mmc_host_ops {

 struct mmc_card;
 struct device;
+struct mmc_async_req;
+
+struct mmc_async_prefetch {
+	void (*wait_req_init)(struct mmc_async_req *, struct completion *);
+	void (*wait_req_start)(struct mmc_async_req *, bool);
+	bool (*wait_req_pending)(struct mmc_async_req *);
+};

 struct mmc_async_req {
 	/* active mmc request */
@@ -149,8 +157,33 @@ struct mmc_async_req {
 	 * Returns 0 if success otherwise non zero.
 	 */
 	int (*err_check) (struct mmc_card *, struct mmc_async_req *);
+	struct mmc_async_prefetch *prefetch;
 };

+/* set completion variable, complete == NULL to reset completion */
+static inline void mmc_prefetch_init(struct mmc_async_req *areq,
+				     struct completion *complete)
+{
+	if (areq->prefetch && areq->prefetch->wait_req_init)
+		areq->prefetch->wait_req_init(areq, complete);
+}
+
+/* enable or disable prefetch feature */
+static inline void mmc_prefetch_start(struct mmc_async_req *areq, bool enable)
+{
+	if (areq->prefetch && areq->prefetch->wait_req_start)
+		areq->prefetch->wait_req_start(areq, enable);
+}
+
+/* return true if new request is pending otherwise false */
+static inline bool mmc_prefetch_pending(struct mmc_async_req *areq)
+{
+	if (areq->prefetch && areq->prefetch->wait_req_pending)
+		return areq->prefetch->wait_req_pending(areq);
+	else
+		return false;
+}
+
 /**
  * struct mmc_slot - MMC slot functions
  *
....................................................................


BR
Per

^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-30 12:19             ` Konstantin Dorfman
@ 2012-10-30 19:57               ` Per Forlin
  2012-11-13 21:10               ` Per Forlin
  1 sibling, 0 replies; 25+ messages in thread
From: Per Forlin @ 2012-10-30 19:57 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Venkatraman S, cjb, linux-mmc

Hi,

On Tue, Oct 30, 2012 at 1:19 PM, Konstantin Dorfman
<kdorfman@codeaurora.org> wrote:
> Hello,
>
> On 10/29/2012 11:40 PM, Per Forlin wrote:
>> Hi,
>>
>> I would like to move the focus of my concerns from root cause analysis
>> to the actual patch,
>> My first reflection is that the patch is relatively complex and some
>> of the code looks duplicated.
>> Would it be possible to simplify it and re-use  the current execution flow.
>>
>> Is the following flow feasible?
>>
>> in mmc_start_req():
>> --------------
>> if (rqc == NULL && not_resend)
>>   wait_for_both_mmc_and_arrival_of_new_req
>> else
>>   wait_only_for_mmc
>>
>> if (arrival_of_new_req) {
>>    set flag to indicate fetch-new_req
>>   return NULL;
>> }
>> -----------------
>>
>> in queue.c:
>> if (fetch-new_req)
>>   don't overwrite previous req.
>>
>> BR
>> Per
>
> You are talking about using both waiting mechanisms, old (wait on
> completion) and new - wait_event_interruptible()? But how done()
> callback, called on host controller irq context, will differentiate
> which one is relevant for the request?
>
> I think it is better to use single wait point for mmc thread.
>
> Also in future plans to add second reason to wake up mmc thread from
> waiting: this is urgent_request - this notification about next-to-fetch
> read request, that should be executed out-of-order, using new eMMC HPI
> feature (to stop currently running request).
>
> I will publish this patch soon.
>
> Your idea with fetch_new_req flag is good, I'll try to simplify current
> patch and lets talk again.
>
I have not thought this through entirely. But it would be nice to add
a new func() in areq to add support for this a new mechanism. If the
func() is NULL the normal flow will be executed. This could be used in
the SDIO case too. At least it should be possible to use only the core
API and it still make sense without a mmc block driver on top.

How to implement this wait function?
There should be one single wait point, I agree.
One could share the mmc completion perhaps through the new func().
This mean both mmc-queue and mmc-host and completes the same
completion.
When waking up from the completion one needs to stop the new
func-wait-point and check if a new request is available. If yes, fetch
a new request and next time don't re-init the completion (this would
overwrite the mmc-host complete value).

Let's talk again when you have a new patch set.

BR
Per

> Thanks,
> --
> Konstantin Dorfman,
> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
> Inc. is a member of Code Aurora Forum,
> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-30  7:45             ` Per Forlin
@ 2012-10-30 12:23               ` Konstantin Dorfman
  0 siblings, 0 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-30 12:23 UTC (permalink / raw)
  To: Per Forlin; +Cc: Venkatraman S, cjb, linux-mmc

On 10/30/2012 09:45 AM, Per Forlin wrote:
> minor clarification,
> 
> On Mon, Oct 29, 2012 at 10:40 PM, Per Forlin <per.lkml@gmail.com> wrote:

>>
>> Is the following flow feasible?
>>
>> in mmc_start_req():
>> --------------
>> if (rqc == NULL && not_resend)
> && request_is_ongoing (in case of resend request is never ongoing
> 
>>   wait_for_both_mmc_and_arrival_of_new_req
> We should wake up if any of the two events occur.
> 
>> else
>>   wait_only_for_mmc
>>
>> if (arrival_of_new_req) {
>>    set flag to indicate fetch-new_req
>>   return NULL;
>> }
>> -----------------
>>
>> in queue.c:
>> if (fetch-new_req)
>>   don't overwrite previous req.
>>
> 
> This is somewhat a subset of the your patch. Maybe I'm missing parts
> of the complexity.
> I haven't figured out why a new mmc_start_data_req() is needed. A new
> mechanism for waiting is required.
You're right, it was no reason to add new function, we can use
mmc_start_req() and just change mechanism for waiting, I will fix this
to minimize changes.

-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-29 21:40           ` Per Forlin
  2012-10-30  7:45             ` Per Forlin
@ 2012-10-30 12:19             ` Konstantin Dorfman
  2012-10-30 19:57               ` Per Forlin
  2012-11-13 21:10               ` Per Forlin
  1 sibling, 2 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-30 12:19 UTC (permalink / raw)
  To: Per Forlin; +Cc: Venkatraman S, cjb, linux-mmc

Hello,

On 10/29/2012 11:40 PM, Per Forlin wrote:
> Hi,
> 
> I would like to move the focus of my concerns from root cause analysis
> to the actual patch,
> My first reflection is that the patch is relatively complex and some
> of the code looks duplicated.
> Would it be possible to simplify it and re-use  the current execution flow.
> 
> Is the following flow feasible?
> 
> in mmc_start_req():
> --------------
> if (rqc == NULL && not_resend)
>   wait_for_both_mmc_and_arrival_of_new_req
> else
>   wait_only_for_mmc
> 
> if (arrival_of_new_req) {
>    set flag to indicate fetch-new_req
>   return NULL;
> }
> -----------------
> 
> in queue.c:
> if (fetch-new_req)
>   don't overwrite previous req.
> 
> BR
> Per

You are talking about using both waiting mechanisms, old (wait on
completion) and new - wait_event_interruptible()? But how done()
callback, called on host controller irq context, will differentiate
which one is relevant for the request?

I think it is better to use single wait point for mmc thread.

Also in future plans to add second reason to wake up mmc thread from
waiting: this is urgent_request - this notification about next-to-fetch
read request, that should be executed out-of-order, using new eMMC HPI
feature (to stop currently running request).

I will publish this patch soon.

Your idea with fetch_new_req flag is good, I'll try to simplify current
patch and lets talk again.

Thanks,
-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-29 21:40           ` Per Forlin
@ 2012-10-30  7:45             ` Per Forlin
  2012-10-30 12:23               ` Konstantin Dorfman
  2012-10-30 12:19             ` Konstantin Dorfman
  1 sibling, 1 reply; 25+ messages in thread
From: Per Forlin @ 2012-10-30  7:45 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Venkatraman S, cjb, linux-mmc

minor clarification,

On Mon, Oct 29, 2012 at 10:40 PM, Per Forlin <per.lkml@gmail.com> wrote:
> Hi,
>
> I would like to move the focus of my concerns from root cause analysis
> to the actual patch,
> My first reflection is that the patch is relatively complex and some
> of the code looks duplicated.
> Would it be possible to simplify it and re-use  the current execution flow.
>
> Is the following flow feasible?
>
> in mmc_start_req():
> --------------
> if (rqc == NULL && not_resend)
&& request_is_ongoing (in case of resend request is never ongoing

>   wait_for_both_mmc_and_arrival_of_new_req
We should wake up if any of the two events occur.

> else
>   wait_only_for_mmc
>
> if (arrival_of_new_req) {
>    set flag to indicate fetch-new_req
>   return NULL;
> }
> -----------------
>
> in queue.c:
> if (fetch-new_req)
>   don't overwrite previous req.
>

This is somewhat a subset of the your patch. Maybe I'm missing parts
of the complexity.
I haven't figured out why a new mmc_start_data_req() is needed. A new
mechanism for waiting is required.

BR
Per

> BR
> Per
>
>
>
> On Sun, Oct 28, 2012 at 2:12 PM, Konstantin Dorfman
> <kdorfman@codeaurora.org> wrote:
>> Hello,
>>
>> On 10/26/2012 02:07 PM, Venkatraman S wrote:
>>
>>>
>>> Actually there could a lot of reasons why block layer or CFQ would not have
>>> inserted the request into the queue. i.e. you can see a lot of exit paths
>>> where blk_peek_request returns NULL, even though there could be any request
>>> pending in one of the CFQ requests queues.
>>>
>>> Essentially you need to check what makes blk_fetch_request
>>> (cfq_dispatch_requests() ) return NULL when there is an element in
>>> queue, if at all.
>>>
>>
>> This is not important why block layer causes to blk_fetch_request() (or
>> blk_peek_request()) to return NULL to the MMC layer, but what is really
>> important - MMC layer should always be able to fetch the new arrived
>> request ASAP, after block layer notification (mmc_request() ) and this
>> is what my fix goes to implement.
>>
>> And the fix is not changing block layer behavior.
>>
>> Thanks
>>
>> --
>> Konstantin Dorfman,
>> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
>> Inc. is a member of Code Aurora Forum,
>> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-28 13:12         ` Konstantin Dorfman
@ 2012-10-29 21:40           ` Per Forlin
  2012-10-30  7:45             ` Per Forlin
  2012-10-30 12:19             ` Konstantin Dorfman
  0 siblings, 2 replies; 25+ messages in thread
From: Per Forlin @ 2012-10-29 21:40 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Venkatraman S, cjb, linux-mmc

Hi,

I would like to move the focus of my concerns from root cause analysis
to the actual patch,
My first reflection is that the patch is relatively complex and some
of the code looks duplicated.
Would it be possible to simplify it and re-use  the current execution flow.

Is the following flow feasible?

in mmc_start_req():
--------------
if (rqc == NULL && not_resend)
  wait_for_both_mmc_and_arrival_of_new_req
else
  wait_only_for_mmc

if (arrival_of_new_req) {
   set flag to indicate fetch-new_req
  return NULL;
}
-----------------

in queue.c:
if (fetch-new_req)
  don't overwrite previous req.

BR
Per



On Sun, Oct 28, 2012 at 2:12 PM, Konstantin Dorfman
<kdorfman@codeaurora.org> wrote:
> Hello,
>
> On 10/26/2012 02:07 PM, Venkatraman S wrote:
>
>>
>> Actually there could a lot of reasons why block layer or CFQ would not have
>> inserted the request into the queue. i.e. you can see a lot of exit paths
>> where blk_peek_request returns NULL, even though there could be any request
>> pending in one of the CFQ requests queues.
>>
>> Essentially you need to check what makes blk_fetch_request
>> (cfq_dispatch_requests() ) return NULL when there is an element in
>> queue, if at all.
>>
>
> This is not important why block layer causes to blk_fetch_request() (or
> blk_peek_request()) to return NULL to the MMC layer, but what is really
> important - MMC layer should always be able to fetch the new arrived
> request ASAP, after block layer notification (mmc_request() ) and this
> is what my fix goes to implement.
>
> And the fix is not changing block layer behavior.
>
> Thanks
>
> --
> Konstantin Dorfman,
> QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
> Inc. is a member of Code Aurora Forum,
> hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-26 12:07       ` Venkatraman S
@ 2012-10-28 13:12         ` Konstantin Dorfman
  2012-10-29 21:40           ` Per Forlin
  0 siblings, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-28 13:12 UTC (permalink / raw)
  To: Venkatraman S; +Cc: Per Förlin, Per Forlin, cjb, linux-mmc

Hello,

On 10/26/2012 02:07 PM, Venkatraman S wrote:

> 
> Actually there could a lot of reasons why block layer or CFQ would not have
> inserted the request into the queue. i.e. you can see a lot of exit paths
> where blk_peek_request returns NULL, even though there could be any request
> pending in one of the CFQ requests queues.
> 
> Essentially you need to check what makes blk_fetch_request
> (cfq_dispatch_requests() ) return NULL when there is an element in
> queue, if at all.
> 

This is not important why block layer causes to blk_fetch_request() (or
blk_peek_request()) to return NULL to the MMC layer, but what is really
important - MMC layer should always be able to fetch the new arrived
request ASAP, after block layer notification (mmc_request() ) and this
is what my fix goes to implement.

And the fix is not changing block layer behavior.

Thanks

-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-26 11:55     ` Venkatraman S
@ 2012-10-28 12:52       ` Konstantin Dorfman
  0 siblings, 0 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-28 12:52 UTC (permalink / raw)
  To: Venkatraman S; +Cc: Per Förlin, Per Forlin, cjb, linux-mmc

Hello Venkatraman,
You are very welcome with any ideas,

On 10/26/2012 01:55 PM, Venkatraman S wrote:

> 
> Being nitpicky, I think it contradicts with the commit log that you have
> for the patch..
> <Quote>
> When the block layer notifies the MMC layer on a new request, we check
> for the above case where MMC layer is waiting on a previous request
> completion and the current request is NULL.
> </Quote>
> 

Can you be more specific? What is contradiction? Can you suggest better
patch description?

What I'm trying to say here:

the patch will modify block layer notification (mmc_request() callback)
on new incoming request in such way, that when MMC layer is blocked by
waiting for completion of previous request (in core/core.c) - it will
unblock MMC layer, rollback in mmc_queue_thread code up to blk_fetch()
point. This will permit to the MMC layer start processing of newly
notified request immediately (and not after completion of currently
running request).

Does it make sense?


-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-25 15:02     ` Per Förlin
  2012-10-26 12:07       ` Venkatraman S
@ 2012-10-28 12:43       ` Konstantin Dorfman
  1 sibling, 0 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-28 12:43 UTC (permalink / raw)
  To: Per Förlin; +Cc: Per Forlin, cjb, linux-mmc, Venkatraman S, Maya Erez

On 10/25/2012 05:02 PM, Per Förlin wrote:
> On 10/25/2012 03:28 PM, Konstantin Dorfman wrote:
>> On 10/24/2012 07:07 PM, Per Förlin wrote:
>>> On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
>>>> Hello Per,
>>>>
>>>> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>>>>> When mmcqt reports on completion of a request there should be
>>>>>> a context switch to allow the insertion of the next read ahead BIOs
>>>>>> to the block layer. Since the mmcqd tries to fetch another request
>>>>>> immediately after the completion of the previous request it gets NULL
>>>>>> and starts waiting for the completion of the previous request.
>>>>>> This wait on completion gives the FS the opportunity to insert the next
>>>>>> request but the MMC layer is already blocked on the previous request
>>>>>> completion and is not aware of the new request waiting to be fetched.
>>>>> I thought that I could trigger a context switch in order to give
>>>>> execution time for FS to add the new request to the MMC queue.
>>>>> I made a simple hack to call yield() in case the request gets NULL. I
>>>>> thought it may give the FS layer enough time to add a new request to
>>>>> the MMC queue. This would not delay the MMC transfer since the yield()
>>>>> is done in parallel with an ongoing transfer. Anyway it was just meant
>>>>> to be a simple test.
>>>>>
>>>>> One yield was not enough. Just for sanity check I added a msleep as
>>>>> well and that was enough to let FS add a new request,
>>>>> Would it be possible to gain throughput by delaying the fetch of new
>>>>> request? Too avoid unnecessary NULL requests
>>>>>
>>>>> If (ongoing request is read AND size is max read ahead AND new request
>>>>> is NULL) yield();
>>>>>
>>>>> BR
>>>>> Per
>>>> We did the same experiment and it will not give maximum possible
>>>> performance. There is no guarantee that the context switch which was
>>>> manually caused by the MMC layer comes just in time: when it was early
>>>> then next fetch still results in NULL, when it was later, then we miss
>>>> possibility to fetch/prepare new request.
>>>>
>>>> Any delay in fetch of the new request that comes after the new request has
>>>> arrived hits throughput and latency.
>>>>
>>>> The solution we are talking about here will fix not only situation with FS
>>>> read ahead mechanism, but also it will remove penalty of the MMC context
>>>> waiting on completion while any new request arrives.
>>>>
>>>> Thanks,
>>>>
>>> It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
>>> I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.
>>>
>>> BR
>>> Per
>>>
>> The root cause of the issue in incompletion of the current design with
>> well known producer-consumer problem solution (producer is block layer,
>> consumer is mmc layer).
>> Classic definitions states that the buffer is fix size, in our case we
>> have queue, so Producer always capable to put new request into the queue.
>> Consumer context blocked when both buffers (curr and prev) are busy
>> (first started its execution on the bus, second is fetched and waiting
>> for the first).
> This happens but I thought that the block layer would continue to add request to the MMC queue while the consumer is busy.
> When consumer fetches request from the queue again there should be several requests available in the queue, but there is only one.

MMC layer tries to fetch next request immediately after starting prev
request, but block layer has no context to submit new request, after
fetching NULL request MMC layer goes to be blocked and has no chance to
repeat the fetch until current request not completed.

> 
>> Producer context considered to be blocked when FS (or others bio
>> sources) has no requests to put into queue.
> Does the block layer ever wait for outstanding request to finish? Could this be another reason why the producer doesn't add new requests on the MMC queue?
> 
In the case with sequence read request, only after request complete
returned to the FS/read ahead layer, it will generate next read request.
It may depend on READ_AHEAD : mmc_req_size ratio, also it may depend on
other layers (above FS) and user space access patterns. You can't expect
always "good" access patterns coming to the MMC layer

> I never meant yield or sleep to be a permanent fix. I was only curious of how if would affect the performance in order to gain a better knowledge of the root cause.
> My impression is that even if the SD card is very slow you will see the same affect. The behavior of the block layer in this case is not related to the speed for the flash memory.
> On a slow card the MMC-queue runs empty just like it does for a fast eMMC.
> According to you the block layer should have a better chance to feed the MMC queue if the card is slow (more time for the block layer to prepare next requests).
> 
I understand that yield/sleep is not a solution, this is not important
what are reasons for block layer behaviour, but what is really important
- MMC layer should always be able to fetch the new arrived request ASAP
after block layer notification (mmc_request() ) and this is what my fix
goes to implement. And the fix is not changing block layer behavior.

Thanks,
-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-25 15:02     ` Per Förlin
@ 2012-10-26 12:07       ` Venkatraman S
  2012-10-28 13:12         ` Konstantin Dorfman
  2012-10-28 12:43       ` Konstantin Dorfman
  1 sibling, 1 reply; 25+ messages in thread
From: Venkatraman S @ 2012-10-26 12:07 UTC (permalink / raw)
  To: Per Förlin; +Cc: Konstantin Dorfman, Per Forlin, cjb, linux-mmc

On Thu, Oct 25, 2012 at 8:32 PM, Per Förlin <per.forlin@stericsson.com> wrote:
> On 10/25/2012 03:28 PM, Konstantin Dorfman wrote:
>> On 10/24/2012 07:07 PM, Per Förlin wrote:
>>> On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
>>>> Hello Per,
>>>>
>>>> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>>>>> When mmcqt reports on completion of a request there should be
>>>>>> a context switch to allow the insertion of the next read ahead BIOs
>>>>>> to the block layer. Since the mmcqd tries to fetch another request
>>>>>> immediately after the completion of the previous request it gets NULL
>>>>>> and starts waiting for the completion of the previous request.
>>>>>> This wait on completion gives the FS the opportunity to insert the next
>>>>>> request but the MMC layer is already blocked on the previous request
>>>>>> completion and is not aware of the new request waiting to be fetched.
>>>>> I thought that I could trigger a context switch in order to give
>>>>> execution time for FS to add the new request to the MMC queue.
>>>>> I made a simple hack to call yield() in case the request gets NULL. I
>>>>> thought it may give the FS layer enough time to add a new request to
>>>>> the MMC queue. This would not delay the MMC transfer since the yield()
>>>>> is done in parallel with an ongoing transfer. Anyway it was just meant
>>>>> to be a simple test.
>>>>>
>>>>> One yield was not enough. Just for sanity check I added a msleep as
>>>>> well and that was enough to let FS add a new request,
>>>>> Would it be possible to gain throughput by delaying the fetch of new
>>>>> request? Too avoid unnecessary NULL requests
>>>>>
>>>>> If (ongoing request is read AND size is max read ahead AND new request
>>>>> is NULL) yield();
>>>>>
>>>>> BR
>>>>> Per
>>>> We did the same experiment and it will not give maximum possible
>>>> performance. There is no guarantee that the context switch which was
>>>> manually caused by the MMC layer comes just in time: when it was early
>>>> then next fetch still results in NULL, when it was later, then we miss
>>>> possibility to fetch/prepare new request.
>>>>
>>>> Any delay in fetch of the new request that comes after the new request has
>>>> arrived hits throughput and latency.
>>>>
>>>> The solution we are talking about here will fix not only situation with FS
>>>> read ahead mechanism, but also it will remove penalty of the MMC context
>>>> waiting on completion while any new request arrives.
>>>>
>>>> Thanks,
>>>>
>>> It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
>>> I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.
>>>
>>> BR
>>> Per
>>>
>> The root cause of the issue in incompletion of the current design with
>> well known producer-consumer problem solution (producer is block layer,
>> consumer is mmc layer).
>> Classic definitions states that the buffer is fix size, in our case we
>> have queue, so Producer always capable to put new request into the queue.
>> Consumer context blocked when both buffers (curr and prev) are busy
>> (first started its execution on the bus, second is fetched and waiting
>> for the first).
> This happens but I thought that the block layer would continue to add request to the MMC queue while the consumer is busy.
> When consumer fetches request from the queue again there should be several requests available in the queue, but there is only one.
>
>> Producer context considered to be blocked when FS (or others bio
>> sources) has no requests to put into queue.
> Does the block layer ever wait for outstanding request to finish? Could this be another reason why the producer doesn't add new requests on the MMC queue?
>

Actually there could a lot of reasons why block layer or CFQ would not have
inserted the request into the queue. i.e. you can see a lot of exit paths
where blk_peek_request returns NULL, even though there could be any request
pending in one of the CFQ requests queues.

Essentially you need to check what makes blk_fetch_request
(cfq_dispatch_requests() ) return NULL when there is an element in
queue, if at all.

>> To maximize performance there are 2 notifications should be used:
>> 1. Producer notifies Consumer about new item to proceed.
>> 2. Consumer notifies Producer about free place.
>>
>> In our case 2nd notification not need since as I said before - it is
>> always free space in the queue.
>> There is no such notification as 1st, i.e. block layer has no way to
>> notify mmc layer about new request arrived.
>>
>> What you suggesting is to resolve specific case, when FS READ_AHEAD
>> mechanism behavior causes delays in producing new requests.
>> Probably you can resolve this specific case, but do you have guarantee
>> that this is only case that causes delays between new requests events?
>> Flash memory devices these days constantly improved on all levels: NAND,
>> firmware, bus speed and host controller capabilities, this makes any
>> yield/sleep/timeouts solution only temporary hacks.
> I never meant yield or sleep to be a permanent fix. I was only curious of how if would affect the performance in order to gain a better knowledge of the root cause.
> My impression is that even if the SD card is very slow you will see the same affect. The behavior of the block layer in this case is not related to the speed for the flash memory.
> On a slow card the MMC-queue runs empty just like it does for a fast eMMC.
> According to you the block layer should have a better chance to feed the MMC queue if the card is slow (more time for the block layer to prepare next requests).
>
> BR
> Per

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-25 13:28   ` Konstantin Dorfman
  2012-10-25 15:02     ` Per Förlin
@ 2012-10-26 11:55     ` Venkatraman S
  2012-10-28 12:52       ` Konstantin Dorfman
  1 sibling, 1 reply; 25+ messages in thread
From: Venkatraman S @ 2012-10-26 11:55 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Förlin, Per Forlin, cjb, linux-mmc

On Thu, Oct 25, 2012 at 6:58 PM, Konstantin Dorfman
<kdorfman@codeaurora.org> wrote:
> On 10/24/2012 07:07 PM, Per Förlin wrote:
>> On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
>>> Hello Per,
>>>
>>> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>>>> When mmcqt reports on completion of a request there should be
>>>>> a context switch to allow the insertion of the next read ahead BIOs
>>>>> to the block layer. Since the mmcqd tries to fetch another request
>>>>> immediately after the completion of the previous request it gets NULL
>>>>> and starts waiting for the completion of the previous request.
>>>>> This wait on completion gives the FS the opportunity to insert the next
>>>>> request but the MMC layer is already blocked on the previous request
>>>>> completion and is not aware of the new request waiting to be fetched.
>>>> I thought that I could trigger a context switch in order to give
>>>> execution time for FS to add the new request to the MMC queue.
>>>> I made a simple hack to call yield() in case the request gets NULL. I
>>>> thought it may give the FS layer enough time to add a new request to
>>>> the MMC queue. This would not delay the MMC transfer since the yield()
>>>> is done in parallel with an ongoing transfer. Anyway it was just meant
>>>> to be a simple test.
>>>>
>>>> One yield was not enough. Just for sanity check I added a msleep as
>>>> well and that was enough to let FS add a new request,
>>>> Would it be possible to gain throughput by delaying the fetch of new
>>>> request? Too avoid unnecessary NULL requests
>>>>
>>>> If (ongoing request is read AND size is max read ahead AND new request
>>>> is NULL) yield();
>>>>
>>>> BR
>>>> Per
>>> We did the same experiment and it will not give maximum possible
>>> performance. There is no guarantee that the context switch which was
>>> manually caused by the MMC layer comes just in time: when it was early
>>> then next fetch still results in NULL, when it was later, then we miss
>>> possibility to fetch/prepare new request.
>>>
>>> Any delay in fetch of the new request that comes after the new request has
>>> arrived hits throughput and latency.
>>>
>>> The solution we are talking about here will fix not only situation with FS
>>> read ahead mechanism, but also it will remove penalty of the MMC context
>>> waiting on completion while any new request arrives.
>>>
>>> Thanks,
>>>
>> It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
>> I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.
>>
>> BR
>> Per
>>
> The root cause of the issue in incompletion of the current design with
> well known producer-consumer problem solution (producer is block layer,
> consumer is mmc layer).
> Classic definitions states that the buffer is fix size, in our case we
> have queue, so Producer always capable to put new request into the queue.
> Consumer context blocked when both buffers (curr and prev) are busy
> (first started its execution on the bus, second is fetched and waiting
> for the first).
> Producer context considered to be blocked when FS (or others bio
> sources) has no requests to put into queue.
> To maximize performance there are 2 notifications should be used:
> 1. Producer notifies Consumer about new item to proceed.
> 2. Consumer notifies Producer about free place.
>
> In our case 2nd notification not need since as I said before - it is
> always free space in the queue.
> There is no such notification as 1st, i.e. block layer has no way to
> notify mmc layer about new request arrived.

Being nitpicky, I think it contradicts with the commit log that you have
for the patch..
<Quote>
When the block layer notifies the MMC layer on a new request, we check
for the above case where MMC layer is waiting on a previous request
completion and the current request is NULL.
</Quote>

>
> What you suggesting is to resolve specific case, when FS READ_AHEAD
> mechanism behavior causes delays in producing new requests.
> Probably you can resolve this specific case, but do you have guarantee
> that this is only case that causes delays between new requests events?
> Flash memory devices these days constantly improved on all levels: NAND,
> firmware, bus speed and host controller capabilities, this makes any
> yield/sleep/timeouts solution only temporary hacks.
>

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-25 13:28   ` Konstantin Dorfman
@ 2012-10-25 15:02     ` Per Förlin
  2012-10-26 12:07       ` Venkatraman S
  2012-10-28 12:43       ` Konstantin Dorfman
  2012-10-26 11:55     ` Venkatraman S
  1 sibling, 2 replies; 25+ messages in thread
From: Per Förlin @ 2012-10-25 15:02 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Forlin, cjb, linux-mmc

On 10/25/2012 03:28 PM, Konstantin Dorfman wrote:
> On 10/24/2012 07:07 PM, Per Förlin wrote:
>> On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
>>> Hello Per,
>>>
>>> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>>>> When mmcqt reports on completion of a request there should be
>>>>> a context switch to allow the insertion of the next read ahead BIOs
>>>>> to the block layer. Since the mmcqd tries to fetch another request
>>>>> immediately after the completion of the previous request it gets NULL
>>>>> and starts waiting for the completion of the previous request.
>>>>> This wait on completion gives the FS the opportunity to insert the next
>>>>> request but the MMC layer is already blocked on the previous request
>>>>> completion and is not aware of the new request waiting to be fetched.
>>>> I thought that I could trigger a context switch in order to give
>>>> execution time for FS to add the new request to the MMC queue.
>>>> I made a simple hack to call yield() in case the request gets NULL. I
>>>> thought it may give the FS layer enough time to add a new request to
>>>> the MMC queue. This would not delay the MMC transfer since the yield()
>>>> is done in parallel with an ongoing transfer. Anyway it was just meant
>>>> to be a simple test.
>>>>
>>>> One yield was not enough. Just for sanity check I added a msleep as
>>>> well and that was enough to let FS add a new request,
>>>> Would it be possible to gain throughput by delaying the fetch of new
>>>> request? Too avoid unnecessary NULL requests
>>>>
>>>> If (ongoing request is read AND size is max read ahead AND new request
>>>> is NULL) yield();
>>>>
>>>> BR
>>>> Per
>>> We did the same experiment and it will not give maximum possible
>>> performance. There is no guarantee that the context switch which was
>>> manually caused by the MMC layer comes just in time: when it was early
>>> then next fetch still results in NULL, when it was later, then we miss
>>> possibility to fetch/prepare new request.
>>>
>>> Any delay in fetch of the new request that comes after the new request has
>>> arrived hits throughput and latency.
>>>
>>> The solution we are talking about here will fix not only situation with FS
>>> read ahead mechanism, but also it will remove penalty of the MMC context
>>> waiting on completion while any new request arrives.
>>>
>>> Thanks,
>>>
>> It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
>> I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.
>>
>> BR
>> Per
>>
> The root cause of the issue in incompletion of the current design with
> well known producer-consumer problem solution (producer is block layer,
> consumer is mmc layer).
> Classic definitions states that the buffer is fix size, in our case we
> have queue, so Producer always capable to put new request into the queue.
> Consumer context blocked when both buffers (curr and prev) are busy
> (first started its execution on the bus, second is fetched and waiting
> for the first).
This happens but I thought that the block layer would continue to add request to the MMC queue while the consumer is busy.
When consumer fetches request from the queue again there should be several requests available in the queue, but there is only one.

> Producer context considered to be blocked when FS (or others bio
> sources) has no requests to put into queue.
Does the block layer ever wait for outstanding request to finish? Could this be another reason why the producer doesn't add new requests on the MMC queue?

> To maximize performance there are 2 notifications should be used:
> 1. Producer notifies Consumer about new item to proceed.
> 2. Consumer notifies Producer about free place.
> 
> In our case 2nd notification not need since as I said before - it is
> always free space in the queue.
> There is no such notification as 1st, i.e. block layer has no way to
> notify mmc layer about new request arrived.
> 
> What you suggesting is to resolve specific case, when FS READ_AHEAD
> mechanism behavior causes delays in producing new requests.
> Probably you can resolve this specific case, but do you have guarantee
> that this is only case that causes delays between new requests events?
> Flash memory devices these days constantly improved on all levels: NAND,
> firmware, bus speed and host controller capabilities, this makes any
> yield/sleep/timeouts solution only temporary hacks.
I never meant yield or sleep to be a permanent fix. I was only curious of how if would affect the performance in order to gain a better knowledge of the root cause.
My impression is that even if the SD card is very slow you will see the same affect. The behavior of the block layer in this case is not related to the speed for the flash memory.
On a slow card the MMC-queue runs empty just like it does for a fast eMMC.
According to you the block layer should have a better chance to feed the MMC queue if the card is slow (more time for the block layer to prepare next requests).

BR
Per


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-24 17:07 ` Per Förlin
@ 2012-10-25 13:28   ` Konstantin Dorfman
  2012-10-25 15:02     ` Per Förlin
  2012-10-26 11:55     ` Venkatraman S
  0 siblings, 2 replies; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-25 13:28 UTC (permalink / raw)
  To: Per Förlin; +Cc: Per Forlin, cjb, linux-mmc

On 10/24/2012 07:07 PM, Per Förlin wrote:
> On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
>> Hello Per,
>>
>> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>>> When mmcqt reports on completion of a request there should be
>>>> a context switch to allow the insertion of the next read ahead BIOs
>>>> to the block layer. Since the mmcqd tries to fetch another request
>>>> immediately after the completion of the previous request it gets NULL
>>>> and starts waiting for the completion of the previous request.
>>>> This wait on completion gives the FS the opportunity to insert the next
>>>> request but the MMC layer is already blocked on the previous request
>>>> completion and is not aware of the new request waiting to be fetched.
>>> I thought that I could trigger a context switch in order to give
>>> execution time for FS to add the new request to the MMC queue.
>>> I made a simple hack to call yield() in case the request gets NULL. I
>>> thought it may give the FS layer enough time to add a new request to
>>> the MMC queue. This would not delay the MMC transfer since the yield()
>>> is done in parallel with an ongoing transfer. Anyway it was just meant
>>> to be a simple test.
>>>
>>> One yield was not enough. Just for sanity check I added a msleep as
>>> well and that was enough to let FS add a new request,
>>> Would it be possible to gain throughput by delaying the fetch of new
>>> request? Too avoid unnecessary NULL requests
>>>
>>> If (ongoing request is read AND size is max read ahead AND new request
>>> is NULL) yield();
>>>
>>> BR
>>> Per
>> We did the same experiment and it will not give maximum possible
>> performance. There is no guarantee that the context switch which was
>> manually caused by the MMC layer comes just in time: when it was early
>> then next fetch still results in NULL, when it was later, then we miss
>> possibility to fetch/prepare new request.
>>
>> Any delay in fetch of the new request that comes after the new request has
>> arrived hits throughput and latency.
>>
>> The solution we are talking about here will fix not only situation with FS
>> read ahead mechanism, but also it will remove penalty of the MMC context
>> waiting on completion while any new request arrives.
>>
>> Thanks,
>>
> It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
> I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.
>
> BR
> Per
>
The root cause of the issue in incompletion of the current design with
well known producer-consumer problem solution (producer is block layer,
consumer is mmc layer).
Classic definitions states that the buffer is fix size, in our case we
have queue, so Producer always capable to put new request into the queue.
Consumer context blocked when both buffers (curr and prev) are busy
(first started its execution on the bus, second is fetched and waiting
for the first).
Producer context considered to be blocked when FS (or others bio
sources) has no requests to put into queue.
To maximize performance there are 2 notifications should be used:
1. Producer notifies Consumer about new item to proceed.
2. Consumer notifies Producer about free place.

In our case 2nd notification not need since as I said before - it is
always free space in the queue.
There is no such notification as 1st, i.e. block layer has no way to
notify mmc layer about new request arrived.

What you suggesting is to resolve specific case, when FS READ_AHEAD
mechanism behavior causes delays in producing new requests.
Probably you can resolve this specific case, but do you have guarantee
that this is only case that causes delays between new requests events?
Flash memory devices these days constantly improved on all levels: NAND,
firmware, bus speed and host controller capabilities, this makes any
yield/sleep/timeouts solution only temporary hacks.

Thanks
-- 

Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center,
Inc. is a member of Code Aurora Forum,
hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
  2012-10-24  9:41 Konstantin Dorfman
@ 2012-10-24 17:07 ` Per Förlin
  2012-10-25 13:28   ` Konstantin Dorfman
  0 siblings, 1 reply; 25+ messages in thread
From: Per Förlin @ 2012-10-24 17:07 UTC (permalink / raw)
  To: Konstantin Dorfman; +Cc: Per Forlin, cjb, linux-mmc

On 10/24/2012 11:41 AM, Konstantin Dorfman wrote:
> Hello Per,
>
> On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>>> When mmcqt reports on completion of a request there should be
>>> a context switch to allow the insertion of the next read ahead BIOs
>>> to the block layer. Since the mmcqd tries to fetch another request
>>> immediately after the completion of the previous request it gets NULL
>>> and starts waiting for the completion of the previous request.
>>> This wait on completion gives the FS the opportunity to insert the next
>>> request but the MMC layer is already blocked on the previous request
>>> completion and is not aware of the new request waiting to be fetched.
>> I thought that I could trigger a context switch in order to give
>> execution time for FS to add the new request to the MMC queue.
>> I made a simple hack to call yield() in case the request gets NULL. I
>> thought it may give the FS layer enough time to add a new request to
>> the MMC queue. This would not delay the MMC transfer since the yield()
>> is done in parallel with an ongoing transfer. Anyway it was just meant
>> to be a simple test.
>>
>> One yield was not enough. Just for sanity check I added a msleep as
>> well and that was enough to let FS add a new request,
>> Would it be possible to gain throughput by delaying the fetch of new
>> request? Too avoid unnecessary NULL requests
>>
>> If (ongoing request is read AND size is max read ahead AND new request
>> is NULL) yield();
>>
>> BR
>> Per
> We did the same experiment and it will not give maximum possible
> performance. There is no guarantee that the context switch which was
> manually caused by the MMC layer comes just in time: when it was early
> then next fetch still results in NULL, when it was later, then we miss
> possibility to fetch/prepare new request.
>
> Any delay in fetch of the new request that comes after the new request has
> arrived hits throughput and latency.
>
> The solution we are talking about here will fix not only situation with FS
> read ahead mechanism, but also it will remove penalty of the MMC context
> waiting on completion while any new request arrives.
>
> Thanks,
>

It seems strange that the block layer cannot keep up with relatively slow flash media devices. There must be a limitation on number of outstanding request towards MMC.
I need to make up my mind if it's the best way to address this issue in the MMC framework or block layer. I have started to look into the block layer code but it will take some time to dig out the relevant parts.

BR
Per


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v1] mmc: fix async request mechanism for sequential read scenarios
@ 2012-10-24  9:41 Konstantin Dorfman
  2012-10-24 17:07 ` Per Förlin
  0 siblings, 1 reply; 25+ messages in thread
From: Konstantin Dorfman @ 2012-10-24  9:41 UTC (permalink / raw)
  To: Per Forlin; +Cc: Konstantin Dorfman, cjb, linux-mmc

Hello Per,

On Mon, October 22, 2012 1:02 am, Per Forlin wrote:
>> When mmcqt reports on completion of a request there should be
>> a context switch to allow the insertion of the next read ahead BIOs
>> to the block layer. Since the mmcqd tries to fetch another request
>> immediately after the completion of the previous request it gets NULL
>> and starts waiting for the completion of the previous request.
>> This wait on completion gives the FS the opportunity to insert the next
>> request but the MMC layer is already blocked on the previous request
>> completion and is not aware of the new request waiting to be fetched.
> I thought that I could trigger a context switch in order to give
> execution time for FS to add the new request to the MMC queue.
> I made a simple hack to call yield() in case the request gets NULL. I
> thought it may give the FS layer enough time to add a new request to
> the MMC queue. This would not delay the MMC transfer since the yield()
> is done in parallel with an ongoing transfer. Anyway it was just meant
> to be a simple test.
>
> One yield was not enough. Just for sanity check I added a msleep as
> well and that was enough to let FS add a new request,
> Would it be possible to gain throughput by delaying the fetch of new
> request? Too avoid unnecessary NULL requests
>
> If (ongoing request is read AND size is max read ahead AND new request
> is NULL) yield();
>
> BR
> Per
We did the same experiment and it will not give maximum possible
performance. There is no guarantee that the context switch which was
manually caused by the MMC layer comes just in time: when it was early
then next fetch still results in NULL, when it was later, then we miss
possibility to fetch/prepare new request.

Any delay in fetch of the new request that comes after the new request has
arrived hits throughput and latency.

The solution we are talking about here will fix not only situation with FS
read ahead mechanism, but also it will remove penalty of the MMC context
waiting on completion while any new request arrives.

Thanks,

-- 
Konstantin Dorfman,
QUALCOMM ISRAEL, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2012-11-26 15:28 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-15 15:36 [PATCH v1] mmc: fix async request mechanism for sequential read scenarios Konstantin Dorfman
2012-10-21 23:02 ` Per Forlin
2012-10-24  9:41 Konstantin Dorfman
2012-10-24 17:07 ` Per Förlin
2012-10-25 13:28   ` Konstantin Dorfman
2012-10-25 15:02     ` Per Förlin
2012-10-26 12:07       ` Venkatraman S
2012-10-28 13:12         ` Konstantin Dorfman
2012-10-29 21:40           ` Per Forlin
2012-10-30  7:45             ` Per Forlin
2012-10-30 12:23               ` Konstantin Dorfman
2012-10-30 12:19             ` Konstantin Dorfman
2012-10-30 19:57               ` Per Forlin
2012-11-13 21:10               ` Per Forlin
2012-11-14 15:15                 ` Konstantin Dorfman
2012-11-15 16:38                   ` Per Förlin
2012-11-19  9:48                     ` Konstantin Dorfman
2012-11-19 14:32                       ` Per Förlin
2012-11-19 21:34                         ` Per Förlin
2012-11-20 16:26                           ` Konstantin Dorfman
2012-11-20 18:57                             ` Konstantin Dorfman
2012-11-26 15:28                           ` Konstantin Dorfman
2012-10-28 12:43       ` Konstantin Dorfman
2012-10-26 11:55     ` Venkatraman S
2012-10-28 12:52       ` Konstantin Dorfman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.