All of lore.kernel.org
 help / color / mirror / Atom feed
* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
@ 2017-02-17 17:22 ` Philipp Tomsich
  2017-02-19 22:08   ` Jaehoon Chung
  2017-02-21 18:00   ` Maxime Ripard
  0 siblings, 2 replies; 9+ messages in thread
From: Philipp Tomsich @ 2017-02-17 17:22 UTC (permalink / raw)
  To: u-boot

Throughput tests have shown the sunxi_mmc driver to take over 10s to
read 10MB from a fast eMMC device due to excessive delays in polling
loops.

This commit restructures the main polling loops to use get_timer(...)
to determine whether a (millisecond) timeout has expired.  We choose
not to use the wait_bit function, as we don't need interruptability
with ctrl-c and have at least one case where two bits (one for an
error condition and another one for completion) need to be read and
using wait_bit would have not added to the clarity.

The observed speedup in testing on a A31 is greater than 10x (e.g. a
10MB write decreases from 9.302s to 0.884s).

X-Affected-platforms: A31-uQ7, A80-Q7, A64-uQ7
Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
---
 drivers/mmc/sunxi_mmc.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c
index d3106db..46abe4a 100644
--- a/drivers/mmc/sunxi_mmc.c
+++ b/drivers/mmc/sunxi_mmc.c
@@ -180,22 +180,22 @@ static int mmc_clk_io_on(int sdc_no)
 static int mmc_update_clk(struct mmc *mmc)
 {
 	struct sunxi_mmc_host *mmchost = mmc->priv;
 	unsigned int cmd;
 	unsigned timeout_msecs = 2000;
+	unsigned long start = get_timer(0);
 
 	cmd = SUNXI_MMC_CMD_START |
 	      SUNXI_MMC_CMD_UPCLK_ONLY |
 	      SUNXI_MMC_CMD_WAIT_PRE_OVER;
 	writel(cmd, &mmchost->reg->cmd);
 	while (readl(&mmchost->reg->cmd) & SUNXI_MMC_CMD_START) {
-		if (!timeout_msecs--)
+		if (get_timer(start) > timeout_msecs)
 			return -1;
-		udelay(1000);
 	}
 
 	/* clock update sets various irq status bits, clear these */
 	writel(readl(&mmchost->reg->rint), &mmchost->reg->rint);
 
 	return 0;
 }
 
@@ -265,161 +265,165 @@ static int sunxi_mmc_core_init(struct mmc *mmc)
 static int mmc_trans_data_by_cpu(struct mmc *mmc, struct mmc_data *data)
 {
 	struct sunxi_mmc_host *mmchost = mmc->priv;
 	const int reading = !!(data->flags & MMC_DATA_READ);
 	const uint32_t status_bit = reading ? SUNXI_MMC_STATUS_FIFO_EMPTY :
 					      SUNXI_MMC_STATUS_FIFO_FULL;
 	unsigned i;
 	unsigned *buff = (unsigned int *)(reading ? data->dest : data->src);
 	unsigned byte_cnt = data->blocksize * data->blocks;
-	unsigned timeout_usecs = (byte_cnt >> 8) * 1000;
-	if (timeout_usecs < 2000000)
-		timeout_usecs = 2000000;
+	unsigned timeout_msecs = byte_cnt >> 8;
+	unsigned long  start;
+
+	if (timeout_msecs < 2000)
+		timeout_msecs = 2000;
 
 	/* Always read / write data through the CPU */
 	setbits_le32(&mmchost->reg->gctrl, SUNXI_MMC_GCTRL_ACCESS_BY_AHB);
 
+	start = get_timer(0);
+
 	for (i = 0; i < (byte_cnt >> 2); i++) {
 		while (readl(&mmchost->reg->status) & status_bit) {
-			if (!timeout_usecs--)
+			if (get_timer(start) > timeout_msecs)
 				return -1;
-			udelay(1);
 		}
 
 		if (reading)
 			buff[i] = readl(&mmchost->reg->fifo);
 		else
 			writel(buff[i], &mmchost->reg->fifo);
 	}
 
 	return 0;
 }
 
 static int mmc_rint_wait(struct mmc *mmc, unsigned int timeout_msecs,
 			 unsigned int done_bit, const char *what)
 {
 	struct sunxi_mmc_host *mmchost = mmc->priv;
 	unsigned int status;
+	unsigned long start = get_timer(0);
 
 	do {
 		status = readl(&mmchost->reg->rint);
-		if (!timeout_msecs-- ||
+		if ((get_timer(start) > timeout_msecs) ||
 		    (status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT)) {
 			debug("%s timeout %x\n", what,
 			      status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT);
 			return -ETIMEDOUT;
 		}
-		udelay(1000);
 	} while (!(status & done_bit));
 
 	return 0;
 }
 
 static int sunxi_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
 			      struct mmc_data *data)
 {
 	struct sunxi_mmc_host *mmchost = mmc->priv;
 	unsigned int cmdval = SUNXI_MMC_CMD_START;
 	unsigned int timeout_msecs;
 	int error = 0;
 	unsigned int status = 0;
 	unsigned int bytecnt = 0;
 
 	if (mmchost->fatal_err)
 		return -1;
 	if (cmd->resp_type & MMC_RSP_BUSY)
 		debug("mmc cmd %d check rsp busy\n", cmd->cmdidx);
 	if (cmd->cmdidx == 12)
 		return 0;
 
 	if (!cmd->cmdidx)
 		cmdval |= SUNXI_MMC_CMD_SEND_INIT_SEQ;
 	if (cmd->resp_type & MMC_RSP_PRESENT)
 		cmdval |= SUNXI_MMC_CMD_RESP_EXPIRE;
 	if (cmd->resp_type & MMC_RSP_136)
 		cmdval |= SUNXI_MMC_CMD_LONG_RESPONSE;
 	if (cmd->resp_type & MMC_RSP_CRC)
 		cmdval |= SUNXI_MMC_CMD_CHK_RESPONSE_CRC;
 
 	if (data) {
 		if ((u32)(long)data->dest & 0x3) {
 			error = -1;
 			goto out;
 		}
 
 		cmdval |= SUNXI_MMC_CMD_DATA_EXPIRE|SUNXI_MMC_CMD_WAIT_PRE_OVER;
 		if (data->flags & MMC_DATA_WRITE)
 			cmdval |= SUNXI_MMC_CMD_WRITE;
 		if (data->blocks > 1)
 			cmdval |= SUNXI_MMC_CMD_AUTO_STOP;
 		writel(data->blocksize, &mmchost->reg->blksz);
 		writel(data->blocks * data->blocksize, &mmchost->reg->bytecnt);
 	}
 
 	debug("mmc %d, cmd %d(0x%08x), arg 0x%08x\n", mmchost->mmc_no,
 	      cmd->cmdidx, cmdval | cmd->cmdidx, cmd->cmdarg);
 	writel(cmd->cmdarg, &mmchost->reg->arg);
 
 	if (!data)
 		writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
 
 	/*
 	 * transfer data and check status
 	 * STATREG[2] : FIFO empty
 	 * STATREG[3] : FIFO full
 	 */
 	if (data) {
 		int ret = 0;
 
 		bytecnt = data->blocksize * data->blocks;
 		debug("trans data %d bytes\n", bytecnt);
 		writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
 		ret = mmc_trans_data_by_cpu(mmc, data);
 		if (ret) {
 			error = readl(&mmchost->reg->rint) & \
 				SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT;
 			error = -ETIMEDOUT;
 			goto out;
 		}
 	}
 
 	error = mmc_rint_wait(mmc, 1000, SUNXI_MMC_RINT_COMMAND_DONE, "cmd");
 	if (error)
 		goto out;
 
 	if (data) {
 		timeout_msecs = 120;
 		debug("cacl timeout %x msec\n", timeout_msecs);
 		error = mmc_rint_wait(mmc, timeout_msecs,
 				      data->blocks > 1 ?
 				      SUNXI_MMC_RINT_AUTO_COMMAND_DONE :
 				      SUNXI_MMC_RINT_DATA_OVER,
 				      "data");
 		if (error)
 			goto out;
 	}
 
 	if (cmd->resp_type & MMC_RSP_BUSY) {
+		unsigned long start = get_timer(0);
 		timeout_msecs = 2000;
+
 		do {
 			status = readl(&mmchost->reg->status);
-			if (!timeout_msecs--) {
+			if (get_timer(start) > timeout_msecs) {
 				debug("busy timeout\n");
 				error = -ETIMEDOUT;
 				goto out;
 			}
-			udelay(1000);
 		} while (status & SUNXI_MMC_STATUS_CARD_DATA_BUSY);
 	}
 
 	if (cmd->resp_type & MMC_RSP_136) {
 		cmd->response[0] = readl(&mmchost->reg->resp3);
 		cmd->response[1] = readl(&mmchost->reg->resp2);
 		cmd->response[2] = readl(&mmchost->reg->resp1);
 		cmd->response[3] = readl(&mmchost->reg->resp0);
 		debug("mmc resp 0x%08x 0x%08x 0x%08x 0x%08x\n",
 		      cmd->response[3], cmd->response[2],
 		      cmd->response[1], cmd->response[0]);
 	} else {
 		cmd->response[0] = readl(&mmchost->reg->resp0);
 		debug("mmc resp 0x%08x\n", cmd->response[0]);
 	}
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-17 17:22 ` [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver Philipp Tomsich
@ 2017-02-19 22:08   ` Jaehoon Chung
  2017-02-20  6:16     ` Chen-Yu Tsai
  2017-02-21 18:00   ` Maxime Ripard
  1 sibling, 1 reply; 9+ messages in thread
From: Jaehoon Chung @ 2017-02-19 22:08 UTC (permalink / raw)
  To: u-boot

Hi,

On 02/18/2017 02:22 AM, Philipp Tomsich wrote:
> Throughput tests have shown the sunxi_mmc driver to take over 10s to
> read 10MB from a fast eMMC device due to excessive delays in polling
> loops.
> 
> This commit restructures the main polling loops to use get_timer(...)
> to determine whether a (millisecond) timeout has expired.  We choose
> not to use the wait_bit function, as we don't need interruptability
> with ctrl-c and have at least one case where two bits (one for an
> error condition and another one for completion) need to be read and
> using wait_bit would have not added to the clarity.
> 
> The observed speedup in testing on a A31 is greater than 10x (e.g. a
> 10MB write decreases from 9.302s to 0.884s).

Your patch's format looks strange.
Some unchanging codes are included in patches. 
Except them, Looks good to me. 
If you will resend the patch, you can resend with my acked-by tag.

Best Regards,
Jaehoon Chung

> 
> X-Affected-platforms: A31-uQ7, A80-Q7, A64-uQ7
> Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
> ---
>  drivers/mmc/sunxi_mmc.c | 26 +++++++++++++++-----------
>  1 file changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c
> index d3106db..46abe4a 100644
> --- a/drivers/mmc/sunxi_mmc.c
> +++ b/drivers/mmc/sunxi_mmc.c
> @@ -180,22 +180,22 @@ static int mmc_clk_io_on(int sdc_no)
>  static int mmc_update_clk(struct mmc *mmc)
>  {
>  	struct sunxi_mmc_host *mmchost = mmc->priv;
>  	unsigned int cmd;
>  	unsigned timeout_msecs = 2000;
> +	unsigned long start = get_timer(0);
>  
>  	cmd = SUNXI_MMC_CMD_START |
>  	      SUNXI_MMC_CMD_UPCLK_ONLY |
>  	      SUNXI_MMC_CMD_WAIT_PRE_OVER;
>  	writel(cmd, &mmchost->reg->cmd);
>  	while (readl(&mmchost->reg->cmd) & SUNXI_MMC_CMD_START) {
> -		if (!timeout_msecs--)
> +		if (get_timer(start) > timeout_msecs)
>  			return -1;
> -		udelay(1000);
>  	}
>  
>  	/* clock update sets various irq status bits, clear these */
>  	writel(readl(&mmchost->reg->rint), &mmchost->reg->rint);
>  
>  	return 0;
>  }
>  
> @@ -265,161 +265,165 @@ static int sunxi_mmc_core_init(struct mmc *mmc)
>  static int mmc_trans_data_by_cpu(struct mmc *mmc, struct mmc_data *data)
>  {
>  	struct sunxi_mmc_host *mmchost = mmc->priv;
>  	const int reading = !!(data->flags & MMC_DATA_READ);
>  	const uint32_t status_bit = reading ? SUNXI_MMC_STATUS_FIFO_EMPTY :
>  					      SUNXI_MMC_STATUS_FIFO_FULL;
>  	unsigned i;
>  	unsigned *buff = (unsigned int *)(reading ? data->dest : data->src);
>  	unsigned byte_cnt = data->blocksize * data->blocks;
> -	unsigned timeout_usecs = (byte_cnt >> 8) * 1000;
> -	if (timeout_usecs < 2000000)
> -		timeout_usecs = 2000000;
> +	unsigned timeout_msecs = byte_cnt >> 8;
> +	unsigned long  start;
> +
> +	if (timeout_msecs < 2000)
> +		timeout_msecs = 2000;
>  
>  	/* Always read / write data through the CPU */
>  	setbits_le32(&mmchost->reg->gctrl, SUNXI_MMC_GCTRL_ACCESS_BY_AHB);
>  
> +	start = get_timer(0);
> +
>  	for (i = 0; i < (byte_cnt >> 2); i++) {
>  		while (readl(&mmchost->reg->status) & status_bit) {
> -			if (!timeout_usecs--)
> +			if (get_timer(start) > timeout_msecs)
>  				return -1;
> -			udelay(1);
>  		}
>  
>  		if (reading)
>  			buff[i] = readl(&mmchost->reg->fifo);
>  		else
>  			writel(buff[i], &mmchost->reg->fifo);
>  	}
>  
>  	return 0;
>  }
>  
>  static int mmc_rint_wait(struct mmc *mmc, unsigned int timeout_msecs,
>  			 unsigned int done_bit, const char *what)
>  {
>  	struct sunxi_mmc_host *mmchost = mmc->priv;
>  	unsigned int status;
> +	unsigned long start = get_timer(0);
>  
>  	do {
>  		status = readl(&mmchost->reg->rint);
> -		if (!timeout_msecs-- ||
> +		if ((get_timer(start) > timeout_msecs) ||
>  		    (status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT)) {
>  			debug("%s timeout %x\n", what,
>  			      status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT);
>  			return -ETIMEDOUT;
>  		}
> -		udelay(1000);
>  	} while (!(status & done_bit));
>  
>  	return 0;
>  }
>  
>  static int sunxi_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>  			      struct mmc_data *data)
>  {
>  	struct sunxi_mmc_host *mmchost = mmc->priv;
>  	unsigned int cmdval = SUNXI_MMC_CMD_START;
>  	unsigned int timeout_msecs;
>  	int error = 0;
>  	unsigned int status = 0;
>  	unsigned int bytecnt = 0;
>  
>  	if (mmchost->fatal_err)
>  		return -1;
>  	if (cmd->resp_type & MMC_RSP_BUSY)
>  		debug("mmc cmd %d check rsp busy\n", cmd->cmdidx);
>  	if (cmd->cmdidx == 12)
>  		return 0;
>  
>  	if (!cmd->cmdidx)
>  		cmdval |= SUNXI_MMC_CMD_SEND_INIT_SEQ;
>  	if (cmd->resp_type & MMC_RSP_PRESENT)
>  		cmdval |= SUNXI_MMC_CMD_RESP_EXPIRE;
>  	if (cmd->resp_type & MMC_RSP_136)
>  		cmdval |= SUNXI_MMC_CMD_LONG_RESPONSE;
>  	if (cmd->resp_type & MMC_RSP_CRC)
>  		cmdval |= SUNXI_MMC_CMD_CHK_RESPONSE_CRC;
>  
>  	if (data) {
>  		if ((u32)(long)data->dest & 0x3) {
>  			error = -1;
>  			goto out;
>  		}
>  
>  		cmdval |= SUNXI_MMC_CMD_DATA_EXPIRE|SUNXI_MMC_CMD_WAIT_PRE_OVER;
>  		if (data->flags & MMC_DATA_WRITE)
>  			cmdval |= SUNXI_MMC_CMD_WRITE;
>  		if (data->blocks > 1)
>  			cmdval |= SUNXI_MMC_CMD_AUTO_STOP;
>  		writel(data->blocksize, &mmchost->reg->blksz);
>  		writel(data->blocks * data->blocksize, &mmchost->reg->bytecnt);
>  	}
>  
>  	debug("mmc %d, cmd %d(0x%08x), arg 0x%08x\n", mmchost->mmc_no,
>  	      cmd->cmdidx, cmdval | cmd->cmdidx, cmd->cmdarg);
>  	writel(cmd->cmdarg, &mmchost->reg->arg);
>  
>  	if (!data)
>  		writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
>  
>  	/*
>  	 * transfer data and check status
>  	 * STATREG[2] : FIFO empty
>  	 * STATREG[3] : FIFO full
>  	 */
>  	if (data) {
>  		int ret = 0;
>  
>  		bytecnt = data->blocksize * data->blocks;
>  		debug("trans data %d bytes\n", bytecnt);
>  		writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
>  		ret = mmc_trans_data_by_cpu(mmc, data);
>  		if (ret) {
>  			error = readl(&mmchost->reg->rint) & \
>  				SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT;
>  			error = -ETIMEDOUT;
>  			goto out;
>  		}
>  	}
>  
>  	error = mmc_rint_wait(mmc, 1000, SUNXI_MMC_RINT_COMMAND_DONE, "cmd");
>  	if (error)
>  		goto out;
>  
>  	if (data) {
>  		timeout_msecs = 120;
>  		debug("cacl timeout %x msec\n", timeout_msecs);
>  		error = mmc_rint_wait(mmc, timeout_msecs,
>  				      data->blocks > 1 ?
>  				      SUNXI_MMC_RINT_AUTO_COMMAND_DONE :
>  				      SUNXI_MMC_RINT_DATA_OVER,
>  				      "data");
>  		if (error)
>  			goto out;
>  	}
>  
>  	if (cmd->resp_type & MMC_RSP_BUSY) {
> +		unsigned long start = get_timer(0);
>  		timeout_msecs = 2000;
> +
>  		do {
>  			status = readl(&mmchost->reg->status);
> -			if (!timeout_msecs--) {
> +			if (get_timer(start) > timeout_msecs) {
>  				debug("busy timeout\n");
>  				error = -ETIMEDOUT;
>  				goto out;
>  			}
> -			udelay(1000);
>  		} while (status & SUNXI_MMC_STATUS_CARD_DATA_BUSY);
>  	}
>  
>  	if (cmd->resp_type & MMC_RSP_136) {
>  		cmd->response[0] = readl(&mmchost->reg->resp3);
>  		cmd->response[1] = readl(&mmchost->reg->resp2);
>  		cmd->response[2] = readl(&mmchost->reg->resp1);
>  		cmd->response[3] = readl(&mmchost->reg->resp0);
>  		debug("mmc resp 0x%08x 0x%08x 0x%08x 0x%08x\n",
>  		      cmd->response[3], cmd->response[2],
>  		      cmd->response[1], cmd->response[0]);
>  	} else {
>  		cmd->response[0] = readl(&mmchost->reg->resp0);
>  		debug("mmc resp 0x%08x\n", cmd->response[0]);
>  	}
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-19 22:08   ` Jaehoon Chung
@ 2017-02-20  6:16     ` Chen-Yu Tsai
  2017-02-20  9:05       ` Dr. Philipp Tomsich
  0 siblings, 1 reply; 9+ messages in thread
From: Chen-Yu Tsai @ 2017-02-20  6:16 UTC (permalink / raw)
  To: u-boot

On Mon, Feb 20, 2017 at 6:08 AM, Jaehoon Chung <jh80.chung@samsung.com> wrote:
> Hi,
>
> On 02/18/2017 02:22 AM, Philipp Tomsich wrote:
>> Throughput tests have shown the sunxi_mmc driver to take over 10s to
>> read 10MB from a fast eMMC device due to excessive delays in polling
>> loops.
>>
>> This commit restructures the main polling loops to use get_timer(...)
>> to determine whether a (millisecond) timeout has expired.  We choose
>> not to use the wait_bit function, as we don't need interruptability
>> with ctrl-c and have at least one case where two bits (one for an
>> error condition and another one for completion) need to be read and
>> using wait_bit would have not added to the clarity.
>>
>> The observed speedup in testing on a A31 is greater than 10x (e.g. a
>> 10MB write decreases from 9.302s to 0.884s).
>
> Your patch's format looks strange.
> Some unchanging codes are included in patches.
> Except them, Looks good to me.
> If you will resend the patch, you can resend with my acked-by tag.

They were probably produced the patches with "git format-patch
--function-context", which includes the whole body of any changed
functions.

I suppose git would ignore all the extra code when applying the patch,
but it probably increases the likelihood of conflicts.

ChenYu

>
> Best Regards,
> Jaehoon Chung
>
>>
>> X-Affected-platforms: A31-uQ7, A80-Q7, A64-uQ7
>> Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>> ---
>>  drivers/mmc/sunxi_mmc.c | 26 +++++++++++++++-----------
>>  1 file changed, 15 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/mmc/sunxi_mmc.c b/drivers/mmc/sunxi_mmc.c
>> index d3106db..46abe4a 100644
>> --- a/drivers/mmc/sunxi_mmc.c
>> +++ b/drivers/mmc/sunxi_mmc.c
>> @@ -180,22 +180,22 @@ static int mmc_clk_io_on(int sdc_no)
>>  static int mmc_update_clk(struct mmc *mmc)
>>  {
>>       struct sunxi_mmc_host *mmchost = mmc->priv;
>>       unsigned int cmd;
>>       unsigned timeout_msecs = 2000;
>> +     unsigned long start = get_timer(0);
>>
>>       cmd = SUNXI_MMC_CMD_START |
>>             SUNXI_MMC_CMD_UPCLK_ONLY |
>>             SUNXI_MMC_CMD_WAIT_PRE_OVER;
>>       writel(cmd, &mmchost->reg->cmd);
>>       while (readl(&mmchost->reg->cmd) & SUNXI_MMC_CMD_START) {
>> -             if (!timeout_msecs--)
>> +             if (get_timer(start) > timeout_msecs)
>>                       return -1;
>> -             udelay(1000);
>>       }
>>
>>       /* clock update sets various irq status bits, clear these */
>>       writel(readl(&mmchost->reg->rint), &mmchost->reg->rint);
>>
>>       return 0;
>>  }
>>
>> @@ -265,161 +265,165 @@ static int sunxi_mmc_core_init(struct mmc *mmc)
>>  static int mmc_trans_data_by_cpu(struct mmc *mmc, struct mmc_data *data)
>>  {
>>       struct sunxi_mmc_host *mmchost = mmc->priv;
>>       const int reading = !!(data->flags & MMC_DATA_READ);
>>       const uint32_t status_bit = reading ? SUNXI_MMC_STATUS_FIFO_EMPTY :
>>                                             SUNXI_MMC_STATUS_FIFO_FULL;
>>       unsigned i;
>>       unsigned *buff = (unsigned int *)(reading ? data->dest : data->src);
>>       unsigned byte_cnt = data->blocksize * data->blocks;
>> -     unsigned timeout_usecs = (byte_cnt >> 8) * 1000;
>> -     if (timeout_usecs < 2000000)
>> -             timeout_usecs = 2000000;
>> +     unsigned timeout_msecs = byte_cnt >> 8;
>> +     unsigned long  start;
>> +
>> +     if (timeout_msecs < 2000)
>> +             timeout_msecs = 2000;
>>
>>       /* Always read / write data through the CPU */
>>       setbits_le32(&mmchost->reg->gctrl, SUNXI_MMC_GCTRL_ACCESS_BY_AHB);
>>
>> +     start = get_timer(0);
>> +
>>       for (i = 0; i < (byte_cnt >> 2); i++) {
>>               while (readl(&mmchost->reg->status) & status_bit) {
>> -                     if (!timeout_usecs--)
>> +                     if (get_timer(start) > timeout_msecs)
>>                               return -1;
>> -                     udelay(1);
>>               }
>>
>>               if (reading)
>>                       buff[i] = readl(&mmchost->reg->fifo);
>>               else
>>                       writel(buff[i], &mmchost->reg->fifo);
>>       }
>>
>>       return 0;
>>  }
>>
>>  static int mmc_rint_wait(struct mmc *mmc, unsigned int timeout_msecs,
>>                        unsigned int done_bit, const char *what)
>>  {
>>       struct sunxi_mmc_host *mmchost = mmc->priv;
>>       unsigned int status;
>> +     unsigned long start = get_timer(0);
>>
>>       do {
>>               status = readl(&mmchost->reg->rint);
>> -             if (!timeout_msecs-- ||
>> +             if ((get_timer(start) > timeout_msecs) ||
>>                   (status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT)) {
>>                       debug("%s timeout %x\n", what,
>>                             status & SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT);
>>                       return -ETIMEDOUT;
>>               }
>> -             udelay(1000);
>>       } while (!(status & done_bit));
>>
>>       return 0;
>>  }
>>
>>  static int sunxi_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
>>                             struct mmc_data *data)
>>  {
>>       struct sunxi_mmc_host *mmchost = mmc->priv;
>>       unsigned int cmdval = SUNXI_MMC_CMD_START;
>>       unsigned int timeout_msecs;
>>       int error = 0;
>>       unsigned int status = 0;
>>       unsigned int bytecnt = 0;
>>
>>       if (mmchost->fatal_err)
>>               return -1;
>>       if (cmd->resp_type & MMC_RSP_BUSY)
>>               debug("mmc cmd %d check rsp busy\n", cmd->cmdidx);
>>       if (cmd->cmdidx == 12)
>>               return 0;
>>
>>       if (!cmd->cmdidx)
>>               cmdval |= SUNXI_MMC_CMD_SEND_INIT_SEQ;
>>       if (cmd->resp_type & MMC_RSP_PRESENT)
>>               cmdval |= SUNXI_MMC_CMD_RESP_EXPIRE;
>>       if (cmd->resp_type & MMC_RSP_136)
>>               cmdval |= SUNXI_MMC_CMD_LONG_RESPONSE;
>>       if (cmd->resp_type & MMC_RSP_CRC)
>>               cmdval |= SUNXI_MMC_CMD_CHK_RESPONSE_CRC;
>>
>>       if (data) {
>>               if ((u32)(long)data->dest & 0x3) {
>>                       error = -1;
>>                       goto out;
>>               }
>>
>>               cmdval |= SUNXI_MMC_CMD_DATA_EXPIRE|SUNXI_MMC_CMD_WAIT_PRE_OVER;
>>               if (data->flags & MMC_DATA_WRITE)
>>                       cmdval |= SUNXI_MMC_CMD_WRITE;
>>               if (data->blocks > 1)
>>                       cmdval |= SUNXI_MMC_CMD_AUTO_STOP;
>>               writel(data->blocksize, &mmchost->reg->blksz);
>>               writel(data->blocks * data->blocksize, &mmchost->reg->bytecnt);
>>       }
>>
>>       debug("mmc %d, cmd %d(0x%08x), arg 0x%08x\n", mmchost->mmc_no,
>>             cmd->cmdidx, cmdval | cmd->cmdidx, cmd->cmdarg);
>>       writel(cmd->cmdarg, &mmchost->reg->arg);
>>
>>       if (!data)
>>               writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
>>
>>       /*
>>        * transfer data and check status
>>        * STATREG[2] : FIFO empty
>>        * STATREG[3] : FIFO full
>>        */
>>       if (data) {
>>               int ret = 0;
>>
>>               bytecnt = data->blocksize * data->blocks;
>>               debug("trans data %d bytes\n", bytecnt);
>>               writel(cmdval | cmd->cmdidx, &mmchost->reg->cmd);
>>               ret = mmc_trans_data_by_cpu(mmc, data);
>>               if (ret) {
>>                       error = readl(&mmchost->reg->rint) & \
>>                               SUNXI_MMC_RINT_INTERRUPT_ERROR_BIT;
>>                       error = -ETIMEDOUT;
>>                       goto out;
>>               }
>>       }
>>
>>       error = mmc_rint_wait(mmc, 1000, SUNXI_MMC_RINT_COMMAND_DONE, "cmd");
>>       if (error)
>>               goto out;
>>
>>       if (data) {
>>               timeout_msecs = 120;
>>               debug("cacl timeout %x msec\n", timeout_msecs);
>>               error = mmc_rint_wait(mmc, timeout_msecs,
>>                                     data->blocks > 1 ?
>>                                     SUNXI_MMC_RINT_AUTO_COMMAND_DONE :
>>                                     SUNXI_MMC_RINT_DATA_OVER,
>>                                     "data");
>>               if (error)
>>                       goto out;
>>       }
>>
>>       if (cmd->resp_type & MMC_RSP_BUSY) {
>> +             unsigned long start = get_timer(0);
>>               timeout_msecs = 2000;
>> +
>>               do {
>>                       status = readl(&mmchost->reg->status);
>> -                     if (!timeout_msecs--) {
>> +                     if (get_timer(start) > timeout_msecs) {
>>                               debug("busy timeout\n");
>>                               error = -ETIMEDOUT;
>>                               goto out;
>>                       }
>> -                     udelay(1000);
>>               } while (status & SUNXI_MMC_STATUS_CARD_DATA_BUSY);
>>       }
>>
>>       if (cmd->resp_type & MMC_RSP_136) {
>>               cmd->response[0] = readl(&mmchost->reg->resp3);
>>               cmd->response[1] = readl(&mmchost->reg->resp2);
>>               cmd->response[2] = readl(&mmchost->reg->resp1);
>>               cmd->response[3] = readl(&mmchost->reg->resp0);
>>               debug("mmc resp 0x%08x 0x%08x 0x%08x 0x%08x\n",
>>                     cmd->response[3], cmd->response[2],
>>                     cmd->response[1], cmd->response[0]);
>>       } else {
>>               cmd->response[0] = readl(&mmchost->reg->resp0);
>>               debug("mmc resp 0x%08x\n", cmd->response[0]);
>>       }
>>
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-20  6:16     ` Chen-Yu Tsai
@ 2017-02-20  9:05       ` Dr. Philipp Tomsich
  0 siblings, 0 replies; 9+ messages in thread
From: Dr. Philipp Tomsich @ 2017-02-20  9:05 UTC (permalink / raw)
  To: u-boot


> On 20 Feb 2017, at 07:16, Chen-Yu Tsai <wens@csie.org> wrote:
> 
> On Mon, Feb 20, 2017 at 6:08 AM, Jaehoon Chung <jh80.chung at samsung.com <mailto:jh80.chung@samsung.com>> wrote:
>> Hi,
>> 
>> On 02/18/2017 02:22 AM, Philipp Tomsich wrote:
>>> Throughput tests have shown the sunxi_mmc driver to take over 10s to
>>> read 10MB from a fast eMMC device due to excessive delays in polling
>>> loops.
>>> 
>>> This commit restructures the main polling loops to use get_timer(...)
>>> to determine whether a (millisecond) timeout has expired.  We choose
>>> not to use the wait_bit function, as we don't need interruptability
>>> with ctrl-c and have at least one case where two bits (one for an
>>> error condition and another one for completion) need to be read and
>>> using wait_bit would have not added to the clarity.
>>> 
>>> The observed speedup in testing on a A31 is greater than 10x (e.g. a
>>> 10MB write decreases from 9.302s to 0.884s).
>> 
>> Your patch's format looks strange.
>> Some unchanging codes are included in patches.
>> Except them, Looks good to me.
>> If you will resend the patch, you can resend with my acked-by tag.
> 
> They were probably produced the patches with "git format-patch
> --function-context", which includes the whole body of any changed
> functions.

Yes, that?s my default setting for easier review (having the context helps
spotting logic-issues, in my experience).

I?ll just reroll with less context for a v2, as the consensus prefers the
smaller context.

Regards,
Phil.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-17 17:22 ` [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver Philipp Tomsich
  2017-02-19 22:08   ` Jaehoon Chung
@ 2017-02-21 18:00   ` Maxime Ripard
  2017-02-21 18:11     ` Dr. Philipp Tomsich
  1 sibling, 1 reply; 9+ messages in thread
From: Maxime Ripard @ 2017-02-21 18:00 UTC (permalink / raw)
  To: u-boot

On Fri, Feb 17, 2017 at 06:22:45PM +0100, Philipp Tomsich wrote:
> Throughput tests have shown the sunxi_mmc driver to take over 10s to
> read 10MB from a fast eMMC device due to excessive delays in polling
> loops.
> 
> This commit restructures the main polling loops to use get_timer(...)
> to determine whether a (millisecond) timeout has expired.  We choose
> not to use the wait_bit function, as we don't need interruptability
> with ctrl-c and have at least one case where two bits (one for an
> error condition and another one for completion) need to be read and
> using wait_bit would have not added to the clarity.
> 
> The observed speedup in testing on a A31 is greater than 10x (e.g. a
> 10MB write decreases from 9.302s to 0.884s).
> 
> X-Affected-platforms: A31-uQ7, A80-Q7, A64-uQ7
> Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

Awesome, I was about to start looking into the poor performances we
had, but you beat me to it.

Do you know if the same changes can apply to the Linux MMC driver?

Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20170221/80915845/attachment.sig>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-21 18:00   ` Maxime Ripard
@ 2017-02-21 18:11     ` Dr. Philipp Tomsich
  2017-02-21 18:34       ` Dr. Philipp Tomsich
  0 siblings, 1 reply; 9+ messages in thread
From: Dr. Philipp Tomsich @ 2017-02-21 18:11 UTC (permalink / raw)
  To: u-boot


> On 21 Feb 2017, at 19:00, Maxime Ripard <maxime.ripard@free-electrons.com> wrote:
> 
> On Fri, Feb 17, 2017 at 06:22:45PM +0100, Philipp Tomsich wrote:
>> Throughput tests have shown the sunxi_mmc driver to take over 10s to
>> read 10MB from a fast eMMC device due to excessive delays in polling
>> loops.
>> 
>> This commit restructures the main polling loops to use get_timer(...)
>> to determine whether a (millisecond) timeout has expired.  We choose
>> not to use the wait_bit function, as we don't need interruptability
>> with ctrl-c and have at least one case where two bits (one for an
>> error condition and another one for completion) need to be read and
>> using wait_bit would have not added to the clarity.
>> 
>> The observed speedup in testing on a A31 is greater than 10x (e.g. a
>> 10MB write decreases from 9.302s to 0.884s).
>> 
>> X-Affected-platforms: A31-uQ7, A80-Q7, A64-uQ7
>> Signed-off-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
> 
> Awesome, I was about to start looking into the poor performances we
> had, but you beat me to it.
> 
> Do you know if the same changes can apply to the Linux MMC driver?

We?ve never seen performance problems on Linux and benchmarked at
up to ~50MByte/s on the A31-uQ7.

You might want to follow up off-list with Klaus on this, though...

Cheers,
Philipp.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-21 18:11     ` Dr. Philipp Tomsich
@ 2017-02-21 18:34       ` Dr. Philipp Tomsich
  2017-02-22 17:22         ` Maxime Ripard
  0 siblings, 1 reply; 9+ messages in thread
From: Dr. Philipp Tomsich @ 2017-02-21 18:34 UTC (permalink / raw)
  To: u-boot


> On 21 Feb 2017, at 19:11, Dr. Philipp Tomsich <philipp.tomsich@theobroma-systems.com> wrote:
> 
> We?ve never seen performance problems on Linux and benchmarked at
> up to ~50MByte/s on the A31-uQ7.

I dug into our disti training slides from early 2015?looks like the throughput
number (sustained) for the eMMC was 12MB/s on write and 40MB/s on read
when using our standard part (at that time), which was a SDIN7DP2-4G.

Cheers,
Philipp.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-21 18:34       ` Dr. Philipp Tomsich
@ 2017-02-22 17:22         ` Maxime Ripard
  2017-02-22 17:28           ` Dr. Philipp Tomsich
  0 siblings, 1 reply; 9+ messages in thread
From: Maxime Ripard @ 2017-02-22 17:22 UTC (permalink / raw)
  To: u-boot

On Tue, Feb 21, 2017 at 07:34:40PM +0100, Dr. Philipp Tomsich wrote:
> 
> > On 21 Feb 2017, at 19:11, Dr. Philipp Tomsich <philipp.tomsich@theobroma-systems.com> wrote:
> > 
> > We?ve never seen performance problems on Linux and benchmarked at
> > up to ~50MByte/s on the A31-uQ7.
> 
> I dug into our disti training slides from early 2015?looks like the throughput
> number (sustained) for the eMMC was 12MB/s on write and 40MB/s on read
> when using our standard part (at that time), which was a SDIN7DP2-4G.

I was getting around 60MB/s on an A64 with an HS200 eMMC, which is
quite good but left me the feeling that it could be better. But it was
just a feeling :)

Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.denx.de/pipermail/u-boot/attachments/20170222/1f48e69b/attachment.sig>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver
  2017-02-22 17:22         ` Maxime Ripard
@ 2017-02-22 17:28           ` Dr. Philipp Tomsich
  0 siblings, 0 replies; 9+ messages in thread
From: Dr. Philipp Tomsich @ 2017-02-22 17:28 UTC (permalink / raw)
  To: u-boot


> On 22 Feb 2017, at 18:22, Maxime Ripard <maxime.ripard@free-electrons.com> wrote:
> 
> On Tue, Feb 21, 2017 at 07:34:40PM +0100, Dr. Philipp Tomsich wrote:
>> 
>>> On 21 Feb 2017, at 19:11, Dr. Philipp Tomsich <philipp.tomsich@theobroma-systems.com> wrote:
>>> 
>>> We?ve never seen performance problems on Linux and benchmarked at
>>> up to ~50MByte/s on the A31-uQ7.
>> 
>> I dug into our disti training slides from early 2015?looks like the throughput
>> number (sustained) for the eMMC was 12MB/s on write and 40MB/s on read
>> when using our standard part (at that time), which was a SDIN7DP2-4G.
> 
> I was getting around 60MB/s on an A64 with an HS200 eMMC, which is
> quite good but left me the feeling that it could be better. But it was
> just a feeling :)

What does your eMMC datasheet list as expected throughput at HS200?

We haven?t even bothered benchmarking the eMMC on our boards lately,
as it?s ?fast enough? for all practical purposes.

Regards,
Philipp.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-02-22 17:28 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <CGME20170217172303epcas4p1c610b5ea1f9763e7bde24b6f8e1047fa@epcas4p1.samsung.com>
2017-02-17 17:22 ` [U-Boot] [PATCH v1] sunxi: improve throughput in the sunxi_mmc driver Philipp Tomsich
2017-02-19 22:08   ` Jaehoon Chung
2017-02-20  6:16     ` Chen-Yu Tsai
2017-02-20  9:05       ` Dr. Philipp Tomsich
2017-02-21 18:00   ` Maxime Ripard
2017-02-21 18:11     ` Dr. Philipp Tomsich
2017-02-21 18:34       ` Dr. Philipp Tomsich
2017-02-22 17:22         ` Maxime Ripard
2017-02-22 17:28           ` Dr. Philipp Tomsich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.