All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] mmc: rtsx: add support for async request
@ 2014-06-06  7:05 ` micky_ching
  0 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: devel, linux-kernel, linux-mmc, gregkh, dan.carpenter, rogerable,
	wei_wang, Micky Ching

From: Micky Ching <micky_ching@realsil.com.cn>

Add support for sd/mmc async request, which makes next request do dma_map_sg()
while previous request transfering data. This behaviour can improve card io
performance more than 10%.

Since rtsx mfd driver only provide a single function for transfering data,
so add three split functions for rtsx mmc driver.

Micky Ching (2):
  mfd: rtsx: add dma transfer function
  mmc: rtsx: add support for async request

 drivers/mfd/rtsx_pcr.c            |   76 +++++++++++++--------
 drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
 include/linux/mfd/rtsx_pci.h      |    6 ++
 3 files changed, 181 insertions(+), 34 deletions(-)

--
1.7.9.5

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 0/2] mmc: rtsx: add support for async request
@ 2014-06-06  7:05 ` micky_ching
  0 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: devel, linux-kernel, linux-mmc, gregkh, dan.carpenter, rogerable,
	wei_wang, Micky Ching

From: Micky Ching <micky_ching@realsil.com.cn>

Add support for sd/mmc async request, which makes next request do dma_map_sg()
while previous request transfering data. This behaviour can improve card io
performance more than 10%.

Since rtsx mfd driver only provide a single function for transfering data,
so add three split functions for rtsx mmc driver.

Micky Ching (2):
  mfd: rtsx: add dma transfer function
  mmc: rtsx: add support for async request

 drivers/mfd/rtsx_pcr.c            |   76 +++++++++++++--------
 drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
 include/linux/mfd/rtsx_pci.h      |    6 ++
 3 files changed, 181 insertions(+), 34 deletions(-)

--
1.7.9.5

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-06  7:05 ` micky_ching
@ 2014-06-06  7:05   ` micky_ching
  -1 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: devel, linux-kernel, linux-mmc, gregkh, dan.carpenter, rogerable,
	wei_wang, Micky Ching

From: Micky Ching <micky_ching@realsil.com.cn>

rtsx driver using a single function for transfer data, dma map/unmap are
placed in one fix function. We need map/unmap dma in different place(for
mmc async driver), so add three function for dma map, dma transfer and
dma unmap.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
---
 drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
 include/linux/mfd/rtsx_pci.h |    6 ++++
 2 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 1d15735..d01b8c2 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout)
 {
-	struct completion trans_done;
-	u8 dir;
-	int err = 0, i, count;
-	long timeleft;
-	unsigned long flags;
-	struct scatterlist *sg;
-	enum dma_data_direction dma_dir;
-	u32 val;
-	dma_addr_t addr;
-	unsigned int len;
+	int err = 0, count;
 
 	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 
-	/* don't transfer data during abort processing */
 	if (pcr->remove_pci)
 		return -EINVAL;
 
 	if ((sglist == NULL) || (num_sg <= 0))
 		return -EINVAL;
 
-	if (read) {
-		dir = DEVICE_TO_HOST;
-		dma_dir = DMA_FROM_DEVICE;
-	} else {
-		dir = HOST_TO_DEVICE;
-		dma_dir = DMA_TO_DEVICE;
-	}
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
 
-	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-	if (count < 1) {
-		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
 		return -EINVAL;
-	}
-	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
 
 	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
 	pcr->sgi = 0;
@@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
-
 	if (pcr->trans_result == TRANS_RESULT_FAIL)
 		err = -EINVAL;
 	else if (pcr->trans_result == TRANS_NO_DEVICE)
 		err = -ENODEV;
-
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
 out:
@@ -413,8 +435,6 @@ out:
 	pcr->done = NULL;
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-
 	if ((err < 0) && (err != -ENODEV))
 		rtsx_pci_stop_cmd(pcr);
 
@@ -423,7 +443,7 @@ out:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
 
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
 {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a383597..74346d5 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout);
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout);
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 1/2] mfd: rtsx: add dma transfer function
@ 2014-06-06  7:05   ` micky_ching
  0 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: gregkh, linux-mmc, linux-kernel, wei_wang, rogerable, devel,
	dan.carpenter

From: Micky Ching <micky_ching@realsil.com.cn>

rtsx driver using a single function for transfer data, dma map/unmap are
placed in one fix function. We need map/unmap dma in different place(for
mmc async driver), so add three function for dma map, dma transfer and
dma unmap.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
---
 drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
 include/linux/mfd/rtsx_pci.h |    6 ++++
 2 files changed, 54 insertions(+), 28 deletions(-)

diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
index 1d15735..d01b8c2 100644
--- a/drivers/mfd/rtsx_pcr.c
+++ b/drivers/mfd/rtsx_pcr.c
@@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout)
 {
-	struct completion trans_done;
-	u8 dir;
-	int err = 0, i, count;
-	long timeleft;
-	unsigned long flags;
-	struct scatterlist *sg;
-	enum dma_data_direction dma_dir;
-	u32 val;
-	dma_addr_t addr;
-	unsigned int len;
+	int err = 0, count;
 
 	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
+	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
+	if (count < 1)
+		return -EINVAL;
+	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
+
+	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
+
+	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 
-	/* don't transfer data during abort processing */
 	if (pcr->remove_pci)
 		return -EINVAL;
 
 	if ((sglist == NULL) || (num_sg <= 0))
 		return -EINVAL;
 
-	if (read) {
-		dir = DEVICE_TO_HOST;
-		dma_dir = DMA_FROM_DEVICE;
-	} else {
-		dir = HOST_TO_DEVICE;
-		dma_dir = DMA_TO_DEVICE;
-	}
+	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
 
-	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-	if (count < 1) {
-		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read)
+{
+	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+
+	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
+}
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
+
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout)
+{
+	struct completion trans_done;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	long timeleft;
+	unsigned long flags;
+	unsigned int len;
+	int i, err = 0;
+	u32 val;
+	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
+
+	if (pcr->remove_pci)
+		return -ENODEV;
+
+	if ((sglist == NULL) || (count < 1))
 		return -EINVAL;
-	}
-	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
 
 	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
 	pcr->sgi = 0;
@@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 	}
 
 	spin_lock_irqsave(&pcr->lock, flags);
-
 	if (pcr->trans_result == TRANS_RESULT_FAIL)
 		err = -EINVAL;
 	else if (pcr->trans_result == TRANS_NO_DEVICE)
 		err = -ENODEV;
-
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
 out:
@@ -413,8 +435,6 @@ out:
 	pcr->done = NULL;
 	spin_unlock_irqrestore(&pcr->lock, flags);
 
-	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
-
 	if ((err < 0) && (err != -ENODEV))
 		rtsx_pci_stop_cmd(pcr);
 
@@ -423,7 +443,7 @@ out:
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
+EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
 
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
 {
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index a383597..74346d5 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
 int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
 int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
 		int num_sg, bool read, int timeout);
+int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int num_sg, bool read);
+int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
+		int count, bool read, int timeout);
 int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
 int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-06  7:05 ` micky_ching
@ 2014-06-06  7:05   ` micky_ching
  -1 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: devel, linux-kernel, linux-mmc, gregkh, dan.carpenter, rogerable,
	wei_wang, Micky Ching

From: Micky Ching <micky_ching@realsil.com.cn>

Add support for non-blocking request, pre_req() runs dma_map_sg() and
post_req() runs dma_unmap_sg(). This patch can increase card read/write
speed, especially for high speed card and slow speed CPU.

Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
clock 208MHz

run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
before:
67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
after:
67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
---
 drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 1c68e0d..a2c0858 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/workqueue.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
@@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
 	struct rtsx_pcr		*pcr;
 	struct mmc_host		*mmc;
 	struct mmc_request	*mrq;
+	struct workqueue_struct *workq;
+#define SDMMC_WORKQ_NAME	"rtsx_pci_sdmmc_workq"
 
+	struct work_struct	work;
 	struct mutex		host_mutex;
 
 	u8			ssc_depth;
@@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
 	int			power_state;
 #define SDMMC_POWER_ON		1
 #define SDMMC_POWER_OFF		0
+
+	unsigned int		sg_count;
+	s32			cookie;
+	unsigned int		cookie_sg_count;
+	bool			using_cookie;
 };
 
 static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
@@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
 #define sd_print_debug_regs(host)
 #endif /* DEBUG */
 
+/*
+ * sd_pre_dma_transfer - do dma_map_sg() or using cookie
+ *
+ * @pre: if called in pre_req()
+ * return:
+ *	0 - do dma_map_sg()
+ *	1 - using cookie
+ */
+static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
+		struct mmc_data *data, bool pre)
+{
+	struct rtsx_pcr *pcr = host->pcr;
+	int read = data->flags & MMC_DATA_READ;
+	int count = 0;
+	int using_cookie = 0;
+
+	if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
+		dev_err(sdmmc_dev(host),
+			"error: data->host_cookie = %d, host->cookie = %d\n",
+			data->host_cookie, host->cookie);
+		data->host_cookie = 0;
+	}
+
+	if (pre || data->host_cookie != host->cookie) {
+		count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
+	} else {
+		count = host->cookie_sg_count;
+		using_cookie = 1;
+	}
+
+	if (pre) {
+		host->cookie_sg_count = count;
+		if (++host->cookie < 0)
+			host->cookie = 1;
+		data->host_cookie = host->cookie;
+	} else {
+		host->sg_count = count;
+	}
+
+	return using_cookie;
+}
+
+static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
+		bool is_first_req)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+
+	if (data->host_cookie) {
+		dev_err(sdmmc_dev(host),
+			"error: reset data->host_cookie = %d\n",
+			data->host_cookie);
+		data->host_cookie = 0;
+	}
+
+	sd_pre_dma_transfer(host, data, true);
+	dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
+}
+
+static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
+		int err)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct rtsx_pcr *pcr = host->pcr;
+	struct mmc_data *data = mrq->data;
+	int read = data->flags & MMC_DATA_READ;
+
+	rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
+	data->host_cookie = 0;
+}
+
 static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
 		u8 *buf, int buf_len, int timeout)
 {
@@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
 
 	rtsx_pci_send_cmd_no_wait(pcr);
 
-	err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
+	err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
 	if (err < 0) {
 		sd_clear_error(host);
 		return err;
@@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
 	return 0;
 }
 
-static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+static inline int sd_rw_cmd(struct mmc_command *cmd)
 {
-	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	return mmc_op_multi(cmd->opcode) ||
+		(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
+		(cmd->opcode == MMC_WRITE_BLOCK);
+}
+
+static void sd_request(struct work_struct *work)
+{
+	struct realtek_pci_sdmmc *host = container_of(work,
+			struct realtek_pci_sdmmc, work);
 	struct rtsx_pcr *pcr = host->pcr;
+
+	struct mmc_host *mmc = host->mmc;
+	struct mmc_request *mrq = host->mrq;
 	struct mmc_command *cmd = mrq->cmd;
 	struct mmc_data *data = mrq->data;
+
 	unsigned int data_size = 0;
 	int err;
 
@@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (mrq->data)
 		data_size = data->blocks * data->blksz;
 
-	if (!data_size || mmc_op_multi(cmd->opcode) ||
-			(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
-			(cmd->opcode == MMC_WRITE_BLOCK)) {
+	if (!data_size || sd_rw_cmd(cmd)) {
 		sd_send_cmd_get_rsp(host, cmd);
 
 		if (!cmd->error && data_size) {
 			sd_rw_multi(host, mrq);
+			if (!host->using_cookie)
+				sdmmc_post_req(host->mmc, host->mrq, 0);
 
 			if (mmc_op_multi(cmd->opcode) && mrq->stop)
 				sd_send_cmd_get_rsp(host, mrq->stop);
@@ -712,6 +804,21 @@ finish:
 	mmc_request_done(mmc, mrq);
 }
 
+static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+
+	mutex_lock(&host->host_mutex);
+	host->mrq = mrq;
+	mutex_unlock(&host->host_mutex);
+
+	if (sd_rw_cmd(mrq->cmd))
+		host->using_cookie = sd_pre_dma_transfer(host, data, false);
+
+	queue_work(host->workq, &host->work);
+}
+
 static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
 		unsigned char bus_width)
 {
@@ -1144,6 +1251,8 @@ out:
 }
 
 static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
+	.pre_req = sdmmc_pre_req,
+	.post_req = sdmmc_post_req,
 	.request = sdmmc_request,
 	.set_ios = sdmmc_set_ios,
 	.get_ro = sdmmc_get_ro,
@@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	host = mmc_priv(mmc);
+	host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
+	if (!host->workq) {
+		mmc_free_host(mmc);
+		return -ENOMEM;
+	}
 	host->pcr = pcr;
 	host->mmc = mmc;
 	host->pdev = pdev;
 	host->power_state = SDMMC_POWER_OFF;
+	INIT_WORK(&host->work, sd_request);
 	platform_set_drvdata(pdev, host);
 	pcr->slots[RTSX_SD_CARD].p_dev = pdev;
 	pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
@@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	pcr->slots[RTSX_SD_CARD].card_event = NULL;
 	mmc = host->mmc;
 
+	cancel_work_sync(&host->work);
+
 	mutex_lock(&host->host_mutex);
 	if (host->mrq) {
 		dev_dbg(&(pdev->dev),
@@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	mmc_remove_host(mmc);
 	host->eject = true;
 
+	flush_workqueue(host->workq);
+	destroy_workqueue(host->workq);
+	host->workq = NULL;
+
 	mmc_free_host(mmc);
 
 	dev_dbg(&(pdev->dev),
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-06  7:05   ` micky_ching
  0 siblings, 0 replies; 40+ messages in thread
From: micky_ching @ 2014-06-06  7:05 UTC (permalink / raw)
  To: sameo, lee.jones, chris, ulf.hansson
  Cc: gregkh, linux-mmc, linux-kernel, wei_wang, rogerable, devel,
	dan.carpenter

From: Micky Ching <micky_ching@realsil.com.cn>

Add support for non-blocking request, pre_req() runs dma_map_sg() and
post_req() runs dma_unmap_sg(). This patch can increase card read/write
speed, especially for high speed card and slow speed CPU.

Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
clock 208MHz

run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
before:
67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
after:
67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
---
 drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 1c68e0d..a2c0858 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -24,6 +24,7 @@
 #include <linux/highmem.h>
 #include <linux/delay.h>
 #include <linux/platform_device.h>
+#include <linux/workqueue.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
@@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
 	struct rtsx_pcr		*pcr;
 	struct mmc_host		*mmc;
 	struct mmc_request	*mrq;
+	struct workqueue_struct *workq;
+#define SDMMC_WORKQ_NAME	"rtsx_pci_sdmmc_workq"
 
+	struct work_struct	work;
 	struct mutex		host_mutex;
 
 	u8			ssc_depth;
@@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
 	int			power_state;
 #define SDMMC_POWER_ON		1
 #define SDMMC_POWER_OFF		0
+
+	unsigned int		sg_count;
+	s32			cookie;
+	unsigned int		cookie_sg_count;
+	bool			using_cookie;
 };
 
 static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
@@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
 #define sd_print_debug_regs(host)
 #endif /* DEBUG */
 
+/*
+ * sd_pre_dma_transfer - do dma_map_sg() or using cookie
+ *
+ * @pre: if called in pre_req()
+ * return:
+ *	0 - do dma_map_sg()
+ *	1 - using cookie
+ */
+static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
+		struct mmc_data *data, bool pre)
+{
+	struct rtsx_pcr *pcr = host->pcr;
+	int read = data->flags & MMC_DATA_READ;
+	int count = 0;
+	int using_cookie = 0;
+
+	if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
+		dev_err(sdmmc_dev(host),
+			"error: data->host_cookie = %d, host->cookie = %d\n",
+			data->host_cookie, host->cookie);
+		data->host_cookie = 0;
+	}
+
+	if (pre || data->host_cookie != host->cookie) {
+		count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
+	} else {
+		count = host->cookie_sg_count;
+		using_cookie = 1;
+	}
+
+	if (pre) {
+		host->cookie_sg_count = count;
+		if (++host->cookie < 0)
+			host->cookie = 1;
+		data->host_cookie = host->cookie;
+	} else {
+		host->sg_count = count;
+	}
+
+	return using_cookie;
+}
+
+static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
+		bool is_first_req)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+
+	if (data->host_cookie) {
+		dev_err(sdmmc_dev(host),
+			"error: reset data->host_cookie = %d\n",
+			data->host_cookie);
+		data->host_cookie = 0;
+	}
+
+	sd_pre_dma_transfer(host, data, true);
+	dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
+}
+
+static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
+		int err)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct rtsx_pcr *pcr = host->pcr;
+	struct mmc_data *data = mrq->data;
+	int read = data->flags & MMC_DATA_READ;
+
+	rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
+	data->host_cookie = 0;
+}
+
 static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
 		u8 *buf, int buf_len, int timeout)
 {
@@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
 
 	rtsx_pci_send_cmd_no_wait(pcr);
 
-	err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
+	err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
 	if (err < 0) {
 		sd_clear_error(host);
 		return err;
@@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
 	return 0;
 }
 
-static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+static inline int sd_rw_cmd(struct mmc_command *cmd)
 {
-	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	return mmc_op_multi(cmd->opcode) ||
+		(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
+		(cmd->opcode == MMC_WRITE_BLOCK);
+}
+
+static void sd_request(struct work_struct *work)
+{
+	struct realtek_pci_sdmmc *host = container_of(work,
+			struct realtek_pci_sdmmc, work);
 	struct rtsx_pcr *pcr = host->pcr;
+
+	struct mmc_host *mmc = host->mmc;
+	struct mmc_request *mrq = host->mrq;
 	struct mmc_command *cmd = mrq->cmd;
 	struct mmc_data *data = mrq->data;
+
 	unsigned int data_size = 0;
 	int err;
 
@@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (mrq->data)
 		data_size = data->blocks * data->blksz;
 
-	if (!data_size || mmc_op_multi(cmd->opcode) ||
-			(cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
-			(cmd->opcode == MMC_WRITE_BLOCK)) {
+	if (!data_size || sd_rw_cmd(cmd)) {
 		sd_send_cmd_get_rsp(host, cmd);
 
 		if (!cmd->error && data_size) {
 			sd_rw_multi(host, mrq);
+			if (!host->using_cookie)
+				sdmmc_post_req(host->mmc, host->mrq, 0);
 
 			if (mmc_op_multi(cmd->opcode) && mrq->stop)
 				sd_send_cmd_get_rsp(host, mrq->stop);
@@ -712,6 +804,21 @@ finish:
 	mmc_request_done(mmc, mrq);
 }
 
+static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct realtek_pci_sdmmc *host = mmc_priv(mmc);
+	struct mmc_data *data = mrq->data;
+
+	mutex_lock(&host->host_mutex);
+	host->mrq = mrq;
+	mutex_unlock(&host->host_mutex);
+
+	if (sd_rw_cmd(mrq->cmd))
+		host->using_cookie = sd_pre_dma_transfer(host, data, false);
+
+	queue_work(host->workq, &host->work);
+}
+
 static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
 		unsigned char bus_width)
 {
@@ -1144,6 +1251,8 @@ out:
 }
 
 static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
+	.pre_req = sdmmc_pre_req,
+	.post_req = sdmmc_post_req,
 	.request = sdmmc_request,
 	.set_ios = sdmmc_set_ios,
 	.get_ro = sdmmc_get_ro,
@@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	host = mmc_priv(mmc);
+	host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
+	if (!host->workq) {
+		mmc_free_host(mmc);
+		return -ENOMEM;
+	}
 	host->pcr = pcr;
 	host->mmc = mmc;
 	host->pdev = pdev;
 	host->power_state = SDMMC_POWER_OFF;
+	INIT_WORK(&host->work, sd_request);
 	platform_set_drvdata(pdev, host);
 	pcr->slots[RTSX_SD_CARD].p_dev = pdev;
 	pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
@@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	pcr->slots[RTSX_SD_CARD].card_event = NULL;
 	mmc = host->mmc;
 
+	cancel_work_sync(&host->work);
+
 	mutex_lock(&host->host_mutex);
 	if (host->mrq) {
 		dev_dbg(&(pdev->dev),
@@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
 	mmc_remove_host(mmc);
 	host->eject = true;
 
+	flush_workqueue(host->workq);
+	destroy_workqueue(host->workq);
+	host->workq = NULL;
+
 	mmc_free_host(mmc);
 
 	dev_dbg(&(pdev->dev),
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-06  7:05   ` micky_ching
@ 2014-06-16  8:42     ` Ulf Hansson
  -1 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-16  8:42 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 6 June 2014 09:05,  <micky_ching@realsil.com.cn> wrote:
> From: Micky Ching <micky_ching@realsil.com.cn>
>
> Add support for non-blocking request, pre_req() runs dma_map_sg() and
> post_req() runs dma_unmap_sg(). This patch can increase card read/write
> speed, especially for high speed card and slow speed CPU.
>
> Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
> clock 208MHz
>
> run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
> before:
> 67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
> after:
> 67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s
>
> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
> ---
>  drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 127 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
> index 1c68e0d..a2c0858 100644
> --- a/drivers/mmc/host/rtsx_pci_sdmmc.c
> +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
> @@ -24,6 +24,7 @@
>  #include <linux/highmem.h>
>  #include <linux/delay.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>  #include <linux/mmc/host.h>
>  #include <linux/mmc/mmc.h>
>  #include <linux/mmc/sd.h>
> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>         struct rtsx_pcr         *pcr;
>         struct mmc_host         *mmc;
>         struct mmc_request      *mrq;
> +       struct workqueue_struct *workq;
> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>
> +       struct work_struct      work;

I am trying to understand why you need a work/workqueue to implement
this feature. Is that really the case?

Could you elaborate on the reasons?

Kind regards
Uffe

>         struct mutex            host_mutex;
>
>         u8                      ssc_depth;
> @@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
>         int                     power_state;
>  #define SDMMC_POWER_ON         1
>  #define SDMMC_POWER_OFF                0
> +
> +       unsigned int            sg_count;
> +       s32                     cookie;
> +       unsigned int            cookie_sg_count;
> +       bool                    using_cookie;
>  };
>
>  static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
> @@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
>  #define sd_print_debug_regs(host)
>  #endif /* DEBUG */
>
> +/*
> + * sd_pre_dma_transfer - do dma_map_sg() or using cookie
> + *
> + * @pre: if called in pre_req()
> + * return:
> + *     0 - do dma_map_sg()
> + *     1 - using cookie
> + */
> +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
> +               struct mmc_data *data, bool pre)
> +{
> +       struct rtsx_pcr *pcr = host->pcr;
> +       int read = data->flags & MMC_DATA_READ;
> +       int count = 0;
> +       int using_cookie = 0;
> +
> +       if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: data->host_cookie = %d, host->cookie = %d\n",
> +                       data->host_cookie, host->cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       if (pre || data->host_cookie != host->cookie) {
> +               count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
> +       } else {
> +               count = host->cookie_sg_count;
> +               using_cookie = 1;
> +       }
> +
> +       if (pre) {
> +               host->cookie_sg_count = count;
> +               if (++host->cookie < 0)
> +                       host->cookie = 1;
> +               data->host_cookie = host->cookie;
> +       } else {
> +               host->sg_count = count;
> +       }
> +
> +       return using_cookie;
> +}
> +
> +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               bool is_first_req)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       if (data->host_cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: reset data->host_cookie = %d\n",
> +                       data->host_cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       sd_pre_dma_transfer(host, data, true);
> +       dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
> +}
> +
> +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               int err)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct rtsx_pcr *pcr = host->pcr;
> +       struct mmc_data *data = mrq->data;
> +       int read = data->flags & MMC_DATA_READ;
> +
> +       rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
> +       data->host_cookie = 0;
> +}
> +
>  static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
>                 u8 *buf, int buf_len, int timeout)
>  {
> @@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
>
>         rtsx_pci_send_cmd_no_wait(pcr);
>
> -       err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
> +       err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
>         if (err < 0) {
>                 sd_clear_error(host);
>                 return err;
> @@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
>         return 0;
>  }
>
> -static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +static inline int sd_rw_cmd(struct mmc_command *cmd)
>  {
> -       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       return mmc_op_multi(cmd->opcode) ||
> +               (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> +               (cmd->opcode == MMC_WRITE_BLOCK);
> +}
> +
> +static void sd_request(struct work_struct *work)
> +{
> +       struct realtek_pci_sdmmc *host = container_of(work,
> +                       struct realtek_pci_sdmmc, work);
>         struct rtsx_pcr *pcr = host->pcr;
> +
> +       struct mmc_host *mmc = host->mmc;
> +       struct mmc_request *mrq = host->mrq;
>         struct mmc_command *cmd = mrq->cmd;
>         struct mmc_data *data = mrq->data;
> +
>         unsigned int data_size = 0;
>         int err;
>
> @@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>         if (mrq->data)
>                 data_size = data->blocks * data->blksz;
>
> -       if (!data_size || mmc_op_multi(cmd->opcode) ||
> -                       (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> -                       (cmd->opcode == MMC_WRITE_BLOCK)) {
> +       if (!data_size || sd_rw_cmd(cmd)) {
>                 sd_send_cmd_get_rsp(host, cmd);
>
>                 if (!cmd->error && data_size) {
>                         sd_rw_multi(host, mrq);
> +                       if (!host->using_cookie)
> +                               sdmmc_post_req(host->mmc, host->mrq, 0);
>
>                         if (mmc_op_multi(cmd->opcode) && mrq->stop)
>                                 sd_send_cmd_get_rsp(host, mrq->stop);
> @@ -712,6 +804,21 @@ finish:
>         mmc_request_done(mmc, mrq);
>  }
>
> +static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       mutex_lock(&host->host_mutex);
> +       host->mrq = mrq;
> +       mutex_unlock(&host->host_mutex);
> +
> +       if (sd_rw_cmd(mrq->cmd))
> +               host->using_cookie = sd_pre_dma_transfer(host, data, false);
> +
> +       queue_work(host->workq, &host->work);
> +}
> +
>  static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
>                 unsigned char bus_width)
>  {
> @@ -1144,6 +1251,8 @@ out:
>  }
>
>  static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
> +       .pre_req = sdmmc_pre_req,
> +       .post_req = sdmmc_post_req,
>         .request = sdmmc_request,
>         .set_ios = sdmmc_set_ios,
>         .get_ro = sdmmc_get_ro,
> @@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
>                 return -ENOMEM;
>
>         host = mmc_priv(mmc);
> +       host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
> +       if (!host->workq) {
> +               mmc_free_host(mmc);
> +               return -ENOMEM;
> +       }
>         host->pcr = pcr;
>         host->mmc = mmc;
>         host->pdev = pdev;
>         host->power_state = SDMMC_POWER_OFF;
> +       INIT_WORK(&host->work, sd_request);
>         platform_set_drvdata(pdev, host);
>         pcr->slots[RTSX_SD_CARD].p_dev = pdev;
>         pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
> @@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         pcr->slots[RTSX_SD_CARD].card_event = NULL;
>         mmc = host->mmc;
>
> +       cancel_work_sync(&host->work);
> +
>         mutex_lock(&host->host_mutex);
>         if (host->mrq) {
>                 dev_dbg(&(pdev->dev),
> @@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         mmc_remove_host(mmc);
>         host->eject = true;
>
> +       flush_workqueue(host->workq);
> +       destroy_workqueue(host->workq);
> +       host->workq = NULL;
> +
>         mmc_free_host(mmc);
>
>         dev_dbg(&(pdev->dev),
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-16  8:42     ` Ulf Hansson
  0 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-16  8:42 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 6 June 2014 09:05,  <micky_ching@realsil.com.cn> wrote:
> From: Micky Ching <micky_ching@realsil.com.cn>
>
> Add support for non-blocking request, pre_req() runs dma_map_sg() and
> post_req() runs dma_unmap_sg(). This patch can increase card read/write
> speed, especially for high speed card and slow speed CPU.
>
> Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
> clock 208MHz
>
> run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
> before:
> 67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
> after:
> 67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s
>
> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
> ---
>  drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 127 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
> index 1c68e0d..a2c0858 100644
> --- a/drivers/mmc/host/rtsx_pci_sdmmc.c
> +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
> @@ -24,6 +24,7 @@
>  #include <linux/highmem.h>
>  #include <linux/delay.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>  #include <linux/mmc/host.h>
>  #include <linux/mmc/mmc.h>
>  #include <linux/mmc/sd.h>
> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>         struct rtsx_pcr         *pcr;
>         struct mmc_host         *mmc;
>         struct mmc_request      *mrq;
> +       struct workqueue_struct *workq;
> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>
> +       struct work_struct      work;

I am trying to understand why you need a work/workqueue to implement
this feature. Is that really the case?

Could you elaborate on the reasons?

Kind regards
Uffe

>         struct mutex            host_mutex;
>
>         u8                      ssc_depth;
> @@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
>         int                     power_state;
>  #define SDMMC_POWER_ON         1
>  #define SDMMC_POWER_OFF                0
> +
> +       unsigned int            sg_count;
> +       s32                     cookie;
> +       unsigned int            cookie_sg_count;
> +       bool                    using_cookie;
>  };
>
>  static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
> @@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
>  #define sd_print_debug_regs(host)
>  #endif /* DEBUG */
>
> +/*
> + * sd_pre_dma_transfer - do dma_map_sg() or using cookie
> + *
> + * @pre: if called in pre_req()
> + * return:
> + *     0 - do dma_map_sg()
> + *     1 - using cookie
> + */
> +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
> +               struct mmc_data *data, bool pre)
> +{
> +       struct rtsx_pcr *pcr = host->pcr;
> +       int read = data->flags & MMC_DATA_READ;
> +       int count = 0;
> +       int using_cookie = 0;
> +
> +       if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: data->host_cookie = %d, host->cookie = %d\n",
> +                       data->host_cookie, host->cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       if (pre || data->host_cookie != host->cookie) {
> +               count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
> +       } else {
> +               count = host->cookie_sg_count;
> +               using_cookie = 1;
> +       }
> +
> +       if (pre) {
> +               host->cookie_sg_count = count;
> +               if (++host->cookie < 0)
> +                       host->cookie = 1;
> +               data->host_cookie = host->cookie;
> +       } else {
> +               host->sg_count = count;
> +       }
> +
> +       return using_cookie;
> +}
> +
> +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               bool is_first_req)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       if (data->host_cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: reset data->host_cookie = %d\n",
> +                       data->host_cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       sd_pre_dma_transfer(host, data, true);
> +       dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
> +}
> +
> +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               int err)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct rtsx_pcr *pcr = host->pcr;
> +       struct mmc_data *data = mrq->data;
> +       int read = data->flags & MMC_DATA_READ;
> +
> +       rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
> +       data->host_cookie = 0;
> +}
> +
>  static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
>                 u8 *buf, int buf_len, int timeout)
>  {
> @@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
>
>         rtsx_pci_send_cmd_no_wait(pcr);
>
> -       err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
> +       err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
>         if (err < 0) {
>                 sd_clear_error(host);
>                 return err;
> @@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
>         return 0;
>  }
>
> -static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +static inline int sd_rw_cmd(struct mmc_command *cmd)
>  {
> -       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       return mmc_op_multi(cmd->opcode) ||
> +               (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> +               (cmd->opcode == MMC_WRITE_BLOCK);
> +}
> +
> +static void sd_request(struct work_struct *work)
> +{
> +       struct realtek_pci_sdmmc *host = container_of(work,
> +                       struct realtek_pci_sdmmc, work);
>         struct rtsx_pcr *pcr = host->pcr;
> +
> +       struct mmc_host *mmc = host->mmc;
> +       struct mmc_request *mrq = host->mrq;
>         struct mmc_command *cmd = mrq->cmd;
>         struct mmc_data *data = mrq->data;
> +
>         unsigned int data_size = 0;
>         int err;
>
> @@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>         if (mrq->data)
>                 data_size = data->blocks * data->blksz;
>
> -       if (!data_size || mmc_op_multi(cmd->opcode) ||
> -                       (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> -                       (cmd->opcode == MMC_WRITE_BLOCK)) {
> +       if (!data_size || sd_rw_cmd(cmd)) {
>                 sd_send_cmd_get_rsp(host, cmd);
>
>                 if (!cmd->error && data_size) {
>                         sd_rw_multi(host, mrq);
> +                       if (!host->using_cookie)
> +                               sdmmc_post_req(host->mmc, host->mrq, 0);
>
>                         if (mmc_op_multi(cmd->opcode) && mrq->stop)
>                                 sd_send_cmd_get_rsp(host, mrq->stop);
> @@ -712,6 +804,21 @@ finish:
>         mmc_request_done(mmc, mrq);
>  }
>
> +static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       mutex_lock(&host->host_mutex);
> +       host->mrq = mrq;
> +       mutex_unlock(&host->host_mutex);
> +
> +       if (sd_rw_cmd(mrq->cmd))
> +               host->using_cookie = sd_pre_dma_transfer(host, data, false);
> +
> +       queue_work(host->workq, &host->work);
> +}
> +
>  static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
>                 unsigned char bus_width)
>  {
> @@ -1144,6 +1251,8 @@ out:
>  }
>
>  static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
> +       .pre_req = sdmmc_pre_req,
> +       .post_req = sdmmc_post_req,
>         .request = sdmmc_request,
>         .set_ios = sdmmc_set_ios,
>         .get_ro = sdmmc_get_ro,
> @@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
>                 return -ENOMEM;
>
>         host = mmc_priv(mmc);
> +       host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
> +       if (!host->workq) {
> +               mmc_free_host(mmc);
> +               return -ENOMEM;
> +       }
>         host->pcr = pcr;
>         host->mmc = mmc;
>         host->pdev = pdev;
>         host->power_state = SDMMC_POWER_OFF;
> +       INIT_WORK(&host->work, sd_request);
>         platform_set_drvdata(pdev, host);
>         pcr->slots[RTSX_SD_CARD].p_dev = pdev;
>         pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
> @@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         pcr->slots[RTSX_SD_CARD].card_event = NULL;
>         mmc = host->mmc;
>
> +       cancel_work_sync(&host->work);
> +
>         mutex_lock(&host->host_mutex);
>         if (host->mrq) {
>                 dev_dbg(&(pdev->dev),
> @@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         mmc_remove_host(mmc);
>         host->eject = true;
>
> +       flush_workqueue(host->workq);
> +       destroy_workqueue(host->workq);
> +       host->workq = NULL;
> +
>         mmc_free_host(mmc);
>
>         dev_dbg(&(pdev->dev),
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-16  8:42     ` Ulf Hansson
@ 2014-06-16  8:51       ` Arend van Spriel
  -1 siblings, 0 replies; 40+ messages in thread
From: Arend van Spriel @ 2014-06-16  8:51 UTC (permalink / raw)
  To: Ulf Hansson, micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 16-06-14 10:42, Ulf Hansson wrote:
> On 6 June 2014 09:05,  <micky_ching@realsil.com.cn> wrote:
>> From: Micky Ching <micky_ching@realsil.com.cn>
>>
>> Add support for non-blocking request, pre_req() runs dma_map_sg() and
>> post_req() runs dma_unmap_sg(). This patch can increase card read/write
>> speed, especially for high speed card and slow speed CPU.

Interesting statement about slow speed CPU, but why then test with CPU 
running at 2.3GHz. Did you also run at 800MHz?

Regards,
Arend

>> Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
>> clock 208MHz
>>
>> run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
>> before:
>> 67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
>> after:
>> 67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s
>>
>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>> ---
>>   drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
>>   1 file changed, 127 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
>> index 1c68e0d..a2c0858 100644
>> --- a/drivers/mmc/host/rtsx_pci_sdmmc.c
>> +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
>> @@ -24,6 +24,7 @@
>>   #include <linux/highmem.h>
>>   #include <linux/delay.h>
>>   #include <linux/platform_device.h>
>> +#include <linux/workqueue.h>
>>   #include <linux/mmc/host.h>
>>   #include <linux/mmc/mmc.h>
>>   #include <linux/mmc/sd.h>
>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>          struct rtsx_pcr         *pcr;
>>          struct mmc_host         *mmc;
>>          struct mmc_request      *mrq;
>> +       struct workqueue_struct *workq;
>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>
>> +       struct work_struct      work;
>
> I am trying to understand why you need a work/workqueue to implement
> this feature. Is that really the case?
>
> Could you elaborate on the reasons?
>
> Kind regards
> Uffe
>
>>          struct mutex            host_mutex;
>>
>>          u8                      ssc_depth;
>> @@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
>>          int                     power_state;
>>   #define SDMMC_POWER_ON         1
>>   #define SDMMC_POWER_OFF                0
>> +
>> +       unsigned int            sg_count;
>> +       s32                     cookie;
>> +       unsigned int            cookie_sg_count;
>> +       bool                    using_cookie;
>>   };
>>
>>   static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
>> @@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
>>   #define sd_print_debug_regs(host)
>>   #endif /* DEBUG */
>>
>> +/*
>> + * sd_pre_dma_transfer - do dma_map_sg() or using cookie
>> + *
>> + * @pre: if called in pre_req()
>> + * return:
>> + *     0 - do dma_map_sg()
>> + *     1 - using cookie
>> + */
>> +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
>> +               struct mmc_data *data, bool pre)
>> +{
>> +       struct rtsx_pcr *pcr = host->pcr;
>> +       int read = data->flags & MMC_DATA_READ;
>> +       int count = 0;
>> +       int using_cookie = 0;
>> +
>> +       if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
>> +               dev_err(sdmmc_dev(host),
>> +                       "error: data->host_cookie = %d, host->cookie = %d\n",
>> +                       data->host_cookie, host->cookie);
>> +               data->host_cookie = 0;
>> +       }
>> +
>> +       if (pre || data->host_cookie != host->cookie) {
>> +               count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
>> +       } else {
>> +               count = host->cookie_sg_count;
>> +               using_cookie = 1;
>> +       }
>> +
>> +       if (pre) {
>> +               host->cookie_sg_count = count;
>> +               if (++host->cookie < 0)
>> +                       host->cookie = 1;
>> +               data->host_cookie = host->cookie;
>> +       } else {
>> +               host->sg_count = count;
>> +       }
>> +
>> +       return using_cookie;
>> +}
>> +
>> +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
>> +               bool is_first_req)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct mmc_data *data = mrq->data;
>> +
>> +       if (data->host_cookie) {
>> +               dev_err(sdmmc_dev(host),
>> +                       "error: reset data->host_cookie = %d\n",
>> +                       data->host_cookie);
>> +               data->host_cookie = 0;
>> +       }
>> +
>> +       sd_pre_dma_transfer(host, data, true);
>> +       dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
>> +}
>> +
>> +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
>> +               int err)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct rtsx_pcr *pcr = host->pcr;
>> +       struct mmc_data *data = mrq->data;
>> +       int read = data->flags & MMC_DATA_READ;
>> +
>> +       rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
>> +       data->host_cookie = 0;
>> +}
>> +
>>   static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
>>                  u8 *buf, int buf_len, int timeout)
>>   {
>> @@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
>>
>>          rtsx_pci_send_cmd_no_wait(pcr);
>>
>> -       err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
>> +       err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
>>          if (err < 0) {
>>                  sd_clear_error(host);
>>                  return err;
>> @@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
>>          return 0;
>>   }
>>
>> -static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>> +static inline int sd_rw_cmd(struct mmc_command *cmd)
>>   {
>> -       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       return mmc_op_multi(cmd->opcode) ||
>> +               (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
>> +               (cmd->opcode == MMC_WRITE_BLOCK);
>> +}
>> +
>> +static void sd_request(struct work_struct *work)
>> +{
>> +       struct realtek_pci_sdmmc *host = container_of(work,
>> +                       struct realtek_pci_sdmmc, work);
>>          struct rtsx_pcr *pcr = host->pcr;
>> +
>> +       struct mmc_host *mmc = host->mmc;
>> +       struct mmc_request *mrq = host->mrq;
>>          struct mmc_command *cmd = mrq->cmd;
>>          struct mmc_data *data = mrq->data;
>> +
>>          unsigned int data_size = 0;
>>          int err;
>>
>> @@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>>          if (mrq->data)
>>                  data_size = data->blocks * data->blksz;
>>
>> -       if (!data_size || mmc_op_multi(cmd->opcode) ||
>> -                       (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
>> -                       (cmd->opcode == MMC_WRITE_BLOCK)) {
>> +       if (!data_size || sd_rw_cmd(cmd)) {
>>                  sd_send_cmd_get_rsp(host, cmd);
>>
>>                  if (!cmd->error && data_size) {
>>                          sd_rw_multi(host, mrq);
>> +                       if (!host->using_cookie)
>> +                               sdmmc_post_req(host->mmc, host->mrq, 0);
>>
>>                          if (mmc_op_multi(cmd->opcode) && mrq->stop)
>>                                  sd_send_cmd_get_rsp(host, mrq->stop);
>> @@ -712,6 +804,21 @@ finish:
>>          mmc_request_done(mmc, mrq);
>>   }
>>
>> +static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct mmc_data *data = mrq->data;
>> +
>> +       mutex_lock(&host->host_mutex);
>> +       host->mrq = mrq;
>> +       mutex_unlock(&host->host_mutex);
>> +
>> +       if (sd_rw_cmd(mrq->cmd))
>> +               host->using_cookie = sd_pre_dma_transfer(host, data, false);
>> +
>> +       queue_work(host->workq, &host->work);
>> +}
>> +
>>   static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
>>                  unsigned char bus_width)
>>   {
>> @@ -1144,6 +1251,8 @@ out:
>>   }
>>
>>   static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
>> +       .pre_req = sdmmc_pre_req,
>> +       .post_req = sdmmc_post_req,
>>          .request = sdmmc_request,
>>          .set_ios = sdmmc_set_ios,
>>          .get_ro = sdmmc_get_ro,
>> @@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
>>                  return -ENOMEM;
>>
>>          host = mmc_priv(mmc);
>> +       host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
>> +       if (!host->workq) {
>> +               mmc_free_host(mmc);
>> +               return -ENOMEM;
>> +       }
>>          host->pcr = pcr;
>>          host->mmc = mmc;
>>          host->pdev = pdev;
>>          host->power_state = SDMMC_POWER_OFF;
>> +       INIT_WORK(&host->work, sd_request);
>>          platform_set_drvdata(pdev, host);
>>          pcr->slots[RTSX_SD_CARD].p_dev = pdev;
>>          pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
>> @@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>>          pcr->slots[RTSX_SD_CARD].card_event = NULL;
>>          mmc = host->mmc;
>>
>> +       cancel_work_sync(&host->work);
>> +
>>          mutex_lock(&host->host_mutex);
>>          if (host->mrq) {
>>                  dev_dbg(&(pdev->dev),
>> @@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>>          mmc_remove_host(mmc);
>>          host->eject = true;
>>
>> +       flush_workqueue(host->workq);
>> +       destroy_workqueue(host->workq);
>> +       host->workq = NULL;
>> +
>>          mmc_free_host(mmc);
>>
>>          dev_dbg(&(pdev->dev),
>> --
>> 1.7.9.5
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-16  8:51       ` Arend van Spriel
  0 siblings, 0 replies; 40+ messages in thread
From: Arend van Spriel @ 2014-06-16  8:51 UTC (permalink / raw)
  To: Ulf Hansson, micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 16-06-14 10:42, Ulf Hansson wrote:
> On 6 June 2014 09:05,  <micky_ching@realsil.com.cn> wrote:
>> From: Micky Ching <micky_ching@realsil.com.cn>
>>
>> Add support for non-blocking request, pre_req() runs dma_map_sg() and
>> post_req() runs dma_unmap_sg(). This patch can increase card read/write
>> speed, especially for high speed card and slow speed CPU.

Interesting statement about slow speed CPU, but why then test with CPU 
running at 2.3GHz. Did you also run at 800MHz?

Regards,
Arend

>> Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
>> clock 208MHz
>>
>> run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
>> before:
>> 67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
>> after:
>> 67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s
>>
>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>> ---
>>   drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
>>   1 file changed, 127 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
>> index 1c68e0d..a2c0858 100644
>> --- a/drivers/mmc/host/rtsx_pci_sdmmc.c
>> +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
>> @@ -24,6 +24,7 @@
>>   #include <linux/highmem.h>
>>   #include <linux/delay.h>
>>   #include <linux/platform_device.h>
>> +#include <linux/workqueue.h>
>>   #include <linux/mmc/host.h>
>>   #include <linux/mmc/mmc.h>
>>   #include <linux/mmc/sd.h>
>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>          struct rtsx_pcr         *pcr;
>>          struct mmc_host         *mmc;
>>          struct mmc_request      *mrq;
>> +       struct workqueue_struct *workq;
>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>
>> +       struct work_struct      work;
>
> I am trying to understand why you need a work/workqueue to implement
> this feature. Is that really the case?
>
> Could you elaborate on the reasons?
>
> Kind regards
> Uffe
>
>>          struct mutex            host_mutex;
>>
>>          u8                      ssc_depth;
>> @@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
>>          int                     power_state;
>>   #define SDMMC_POWER_ON         1
>>   #define SDMMC_POWER_OFF                0
>> +
>> +       unsigned int            sg_count;
>> +       s32                     cookie;
>> +       unsigned int            cookie_sg_count;
>> +       bool                    using_cookie;
>>   };
>>
>>   static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
>> @@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
>>   #define sd_print_debug_regs(host)
>>   #endif /* DEBUG */
>>
>> +/*
>> + * sd_pre_dma_transfer - do dma_map_sg() or using cookie
>> + *
>> + * @pre: if called in pre_req()
>> + * return:
>> + *     0 - do dma_map_sg()
>> + *     1 - using cookie
>> + */
>> +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
>> +               struct mmc_data *data, bool pre)
>> +{
>> +       struct rtsx_pcr *pcr = host->pcr;
>> +       int read = data->flags & MMC_DATA_READ;
>> +       int count = 0;
>> +       int using_cookie = 0;
>> +
>> +       if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
>> +               dev_err(sdmmc_dev(host),
>> +                       "error: data->host_cookie = %d, host->cookie = %d\n",
>> +                       data->host_cookie, host->cookie);
>> +               data->host_cookie = 0;
>> +       }
>> +
>> +       if (pre || data->host_cookie != host->cookie) {
>> +               count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
>> +       } else {
>> +               count = host->cookie_sg_count;
>> +               using_cookie = 1;
>> +       }
>> +
>> +       if (pre) {
>> +               host->cookie_sg_count = count;
>> +               if (++host->cookie < 0)
>> +                       host->cookie = 1;
>> +               data->host_cookie = host->cookie;
>> +       } else {
>> +               host->sg_count = count;
>> +       }
>> +
>> +       return using_cookie;
>> +}
>> +
>> +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
>> +               bool is_first_req)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct mmc_data *data = mrq->data;
>> +
>> +       if (data->host_cookie) {
>> +               dev_err(sdmmc_dev(host),
>> +                       "error: reset data->host_cookie = %d\n",
>> +                       data->host_cookie);
>> +               data->host_cookie = 0;
>> +       }
>> +
>> +       sd_pre_dma_transfer(host, data, true);
>> +       dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
>> +}
>> +
>> +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
>> +               int err)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct rtsx_pcr *pcr = host->pcr;
>> +       struct mmc_data *data = mrq->data;
>> +       int read = data->flags & MMC_DATA_READ;
>> +
>> +       rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
>> +       data->host_cookie = 0;
>> +}
>> +
>>   static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
>>                  u8 *buf, int buf_len, int timeout)
>>   {
>> @@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
>>
>>          rtsx_pci_send_cmd_no_wait(pcr);
>>
>> -       err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
>> +       err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
>>          if (err < 0) {
>>                  sd_clear_error(host);
>>                  return err;
>> @@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
>>          return 0;
>>   }
>>
>> -static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>> +static inline int sd_rw_cmd(struct mmc_command *cmd)
>>   {
>> -       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       return mmc_op_multi(cmd->opcode) ||
>> +               (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
>> +               (cmd->opcode == MMC_WRITE_BLOCK);
>> +}
>> +
>> +static void sd_request(struct work_struct *work)
>> +{
>> +       struct realtek_pci_sdmmc *host = container_of(work,
>> +                       struct realtek_pci_sdmmc, work);
>>          struct rtsx_pcr *pcr = host->pcr;
>> +
>> +       struct mmc_host *mmc = host->mmc;
>> +       struct mmc_request *mrq = host->mrq;
>>          struct mmc_command *cmd = mrq->cmd;
>>          struct mmc_data *data = mrq->data;
>> +
>>          unsigned int data_size = 0;
>>          int err;
>>
>> @@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>>          if (mrq->data)
>>                  data_size = data->blocks * data->blksz;
>>
>> -       if (!data_size || mmc_op_multi(cmd->opcode) ||
>> -                       (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
>> -                       (cmd->opcode == MMC_WRITE_BLOCK)) {
>> +       if (!data_size || sd_rw_cmd(cmd)) {
>>                  sd_send_cmd_get_rsp(host, cmd);
>>
>>                  if (!cmd->error && data_size) {
>>                          sd_rw_multi(host, mrq);
>> +                       if (!host->using_cookie)
>> +                               sdmmc_post_req(host->mmc, host->mrq, 0);
>>
>>                          if (mmc_op_multi(cmd->opcode) && mrq->stop)
>>                                  sd_send_cmd_get_rsp(host, mrq->stop);
>> @@ -712,6 +804,21 @@ finish:
>>          mmc_request_done(mmc, mrq);
>>   }
>>
>> +static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>> +{
>> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
>> +       struct mmc_data *data = mrq->data;
>> +
>> +       mutex_lock(&host->host_mutex);
>> +       host->mrq = mrq;
>> +       mutex_unlock(&host->host_mutex);
>> +
>> +       if (sd_rw_cmd(mrq->cmd))
>> +               host->using_cookie = sd_pre_dma_transfer(host, data, false);
>> +
>> +       queue_work(host->workq, &host->work);
>> +}
>> +
>>   static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
>>                  unsigned char bus_width)
>>   {
>> @@ -1144,6 +1251,8 @@ out:
>>   }
>>
>>   static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
>> +       .pre_req = sdmmc_pre_req,
>> +       .post_req = sdmmc_post_req,
>>          .request = sdmmc_request,
>>          .set_ios = sdmmc_set_ios,
>>          .get_ro = sdmmc_get_ro,
>> @@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
>>                  return -ENOMEM;
>>
>>          host = mmc_priv(mmc);
>> +       host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
>> +       if (!host->workq) {
>> +               mmc_free_host(mmc);
>> +               return -ENOMEM;
>> +       }
>>          host->pcr = pcr;
>>          host->mmc = mmc;
>>          host->pdev = pdev;
>>          host->power_state = SDMMC_POWER_OFF;
>> +       INIT_WORK(&host->work, sd_request);
>>          platform_set_drvdata(pdev, host);
>>          pcr->slots[RTSX_SD_CARD].p_dev = pdev;
>>          pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
>> @@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>>          pcr->slots[RTSX_SD_CARD].card_event = NULL;
>>          mmc = host->mmc;
>>
>> +       cancel_work_sync(&host->work);
>> +
>>          mutex_lock(&host->host_mutex);
>>          if (host->mrq) {
>>                  dev_dbg(&(pdev->dev),
>> @@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>>          mmc_remove_host(mmc);
>>          host->eject = true;
>>
>> +       flush_workqueue(host->workq);
>> +       destroy_workqueue(host->workq);
>> +       host->workq = NULL;
>> +
>>          mmc_free_host(mmc);
>>
>>          dev_dbg(&(pdev->dev),
>> --
>> 1.7.9.5
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-16  8:42     ` Ulf Hansson
@ 2014-06-16  9:09       ` micky
  -1 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-16  9:09 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>> >         struct rtsx_pcr         *pcr;
>> >         struct mmc_host         *mmc;
>> >         struct mmc_request      *mrq;
>> >+       struct workqueue_struct *workq;
>> >+#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>> >
>> >+       struct work_struct      work;
> I am trying to understand why you need a work/workqueue to implement
> this feature. Is that really the case?
>
> Could you elaborate on the reasons?
Hi Uffe,

we need return as fast as possible in mmc_host_ops request(ops->request) 
callback,
so the mmc core can continue handle next request.
when next request everything is ready, it will wait previous done(if not 
done),
then call ops->request().

we can't use atomic context, because we use mutex_lock() to protect
resource, and we have to hold the lock during handle request.
So I use workq, we just queue a work and return in ops->request(),
The mmc core can continue without blocking at ops->request().

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-16  9:09       ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-16  9:09 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>> >         struct rtsx_pcr         *pcr;
>> >         struct mmc_host         *mmc;
>> >         struct mmc_request      *mrq;
>> >+       struct workqueue_struct *workq;
>> >+#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>> >
>> >+       struct work_struct      work;
> I am trying to understand why you need a work/workqueue to implement
> this feature. Is that really the case?
>
> Could you elaborate on the reasons?
Hi Uffe,

we need return as fast as possible in mmc_host_ops request(ops->request) 
callback,
so the mmc core can continue handle next request.
when next request everything is ready, it will wait previous done(if not 
done),
then call ops->request().

we can't use atomic context, because we use mutex_lock() to protect
resource, and we have to hold the lock during handle request.
So I use workq, we just queue a work and return in ops->request(),
The mmc core can continue without blocking at ops->request().

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-06  7:05   ` micky_ching
  (?)
@ 2014-06-16 12:20   ` Lee Jones
  2014-06-16 14:24       ` Ulf Hansson
  2014-06-17  1:08       ` micky
  -1 siblings, 2 replies; 40+ messages in thread
From: Lee Jones @ 2014-06-16 12:20 UTC (permalink / raw)
  To: micky_ching
  Cc: sameo, chris, ulf.hansson, devel, linux-kernel, linux-mmc,
	gregkh, dan.carpenter, rogerable, wei_wang

> From: Micky Ching <micky_ching@realsil.com.cn>
> 
> rtsx driver using a single function for transfer data, dma map/unmap are
> placed in one fix function. We need map/unmap dma in different place(for
> mmc async driver), so add three function for dma map, dma transfer and
> dma unmap.
> 
> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
> ---
>  drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
>  include/linux/mfd/rtsx_pci.h |    6 ++++
>  2 files changed, 54 insertions(+), 28 deletions(-)

I don't see any glaring issues with this patch.  Does it rely on the
first patch, or vise versa, or can it just be applied?

> diff --git a/drivers/mfd/rtsx_pcr.c b/drivers/mfd/rtsx_pcr.c
> index 1d15735..d01b8c2 100644
> --- a/drivers/mfd/rtsx_pcr.c
> +++ b/drivers/mfd/rtsx_pcr.c
> @@ -337,40 +337,64 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
>  int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
>  		int num_sg, bool read, int timeout)
>  {
> -	struct completion trans_done;
> -	u8 dir;
> -	int err = 0, i, count;
> -	long timeleft;
> -	unsigned long flags;
> -	struct scatterlist *sg;
> -	enum dma_data_direction dma_dir;
> -	u32 val;
> -	dma_addr_t addr;
> -	unsigned int len;
> +	int err = 0, count;
>  
>  	dev_dbg(&(pcr->pci->dev), "--> %s: num_sg = %d\n", __func__, num_sg);
> +	count = rtsx_pci_dma_map_sg(pcr, sglist, num_sg, read);
> +	if (count < 1)
> +		return -EINVAL;
> +	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
> +
> +	err = rtsx_pci_dma_transfer(pcr, sglist, count, read, timeout);
> +
> +	rtsx_pci_dma_unmap_sg(pcr, sglist, num_sg, read);
> +
> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
> +
> +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int num_sg, bool read)
> +{
> +	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
>  
> -	/* don't transfer data during abort processing */
>  	if (pcr->remove_pci)
>  		return -EINVAL;
>  
>  	if ((sglist == NULL) || (num_sg <= 0))
>  		return -EINVAL;
>  
> -	if (read) {
> -		dir = DEVICE_TO_HOST;
> -		dma_dir = DMA_FROM_DEVICE;
> -	} else {
> -		dir = HOST_TO_DEVICE;
> -		dma_dir = DMA_TO_DEVICE;
> -	}
> +	return dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dir);
> +}
> +EXPORT_SYMBOL_GPL(rtsx_pci_dma_map_sg);
>  
> -	count = dma_map_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
> -	if (count < 1) {
> -		dev_err(&(pcr->pci->dev), "scatterlist map failed\n");
> +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int num_sg, bool read)
> +{
> +	enum dma_data_direction dir = read ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> +
> +	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dir);
> +}
> +EXPORT_SYMBOL_GPL(rtsx_pci_dma_unmap_sg);
> +
> +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int count, bool read, int timeout)
> +{
> +	struct completion trans_done;
> +	struct scatterlist *sg;
> +	dma_addr_t addr;
> +	long timeleft;
> +	unsigned long flags;
> +	unsigned int len;
> +	int i, err = 0;
> +	u32 val;
> +	u8 dir = read ? DEVICE_TO_HOST : HOST_TO_DEVICE;
> +
> +	if (pcr->remove_pci)
> +		return -ENODEV;
> +
> +	if ((sglist == NULL) || (count < 1))
>  		return -EINVAL;
> -	}
> -	dev_dbg(&(pcr->pci->dev), "DMA mapping count: %d\n", count);
>  
>  	val = ((u32)(dir & 0x01) << 29) | TRIG_DMA | ADMA_MODE;
>  	pcr->sgi = 0;
> @@ -400,12 +424,10 @@ int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
>  	}
>  
>  	spin_lock_irqsave(&pcr->lock, flags);
> -
>  	if (pcr->trans_result == TRANS_RESULT_FAIL)
>  		err = -EINVAL;
>  	else if (pcr->trans_result == TRANS_NO_DEVICE)
>  		err = -ENODEV;
> -
>  	spin_unlock_irqrestore(&pcr->lock, flags);
>  
>  out:
> @@ -413,8 +435,6 @@ out:
>  	pcr->done = NULL;
>  	spin_unlock_irqrestore(&pcr->lock, flags);
>  
> -	dma_unmap_sg(&(pcr->pci->dev), sglist, num_sg, dma_dir);
> -
>  	if ((err < 0) && (err != -ENODEV))
>  		rtsx_pci_stop_cmd(pcr);
>  
> @@ -423,7 +443,7 @@ out:
>  
>  	return err;
>  }
> -EXPORT_SYMBOL_GPL(rtsx_pci_transfer_data);
> +EXPORT_SYMBOL_GPL(rtsx_pci_dma_transfer);
>  
>  int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len)
>  {
> diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
> index a383597..74346d5 100644
> --- a/include/linux/mfd/rtsx_pci.h
> +++ b/include/linux/mfd/rtsx_pci.h
> @@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr);
>  int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout);
>  int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist,
>  		int num_sg, bool read, int timeout);
> +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int num_sg, bool read);
> +void rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int num_sg, bool read);
> +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist,
> +		int count, bool read, int timeout);
>  int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
>  int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len);
>  int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card);

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-16  9:09       ` micky
  (?)
@ 2014-06-16 12:40       ` Ulf Hansson
  2014-06-17  1:04           ` micky
  -1 siblings, 1 reply; 40+ messages in thread
From: Ulf Hansson @ 2014-06-16 12:40 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>
>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>> >         struct rtsx_pcr         *pcr;
>>> >         struct mmc_host         *mmc;
>>> >         struct mmc_request      *mrq;
>>> >+       struct workqueue_struct *workq;
>>> >+#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>> >
>>> >+       struct work_struct      work;
>>
>> I am trying to understand why you need a work/workqueue to implement
>> this feature. Is that really the case?
>>
>> Could you elaborate on the reasons?
>
> Hi Uffe,
>
> we need return as fast as possible in mmc_host_ops request(ops->request)
> callback,
> so the mmc core can continue handle next request.
> when next request everything is ready, it will wait previous done(if not
> done),
> then call ops->request().
>
> we can't use atomic context, because we use mutex_lock() to protect

ops->request should never executed in atomic context. Is that your concern?

> resource, and we have to hold the lock during handle request.
> So I use workq, we just queue a work and return in ops->request(),
> The mmc core can continue without blocking at ops->request().
>
> Best Regards.
> micky.

Kind regards
Uffe

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-16 12:20   ` Lee Jones
@ 2014-06-16 14:24       ` Ulf Hansson
  2014-06-17  1:08       ` micky
  1 sibling, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-16 14:24 UTC (permalink / raw)
  To: Lee Jones
  Cc: micky, Samuel Ortiz, Chris Ball, devel, linux-kernel, linux-mmc,
	Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
>> From: Micky Ching <micky_ching@realsil.com.cn>
>>
>> rtsx driver using a single function for transfer data, dma map/unmap are
>> placed in one fix function. We need map/unmap dma in different place(for
>> mmc async driver), so add three function for dma map, dma transfer and
>> dma unmap.
>>
>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>> ---
>>  drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
>>  include/linux/mfd/rtsx_pci.h |    6 ++++
>>  2 files changed, 54 insertions(+), 28 deletions(-)
>
> I don't see any glaring issues with this patch.  Does it rely on the
> first patch, or vise versa, or can it just be applied?

The mmc part in patch2 relies on this one, but please go ahead and
apply the mfd patch if you see it good.

I can later provide my ack for the mmc parts, in patch2, when it's a
reviewed properly and thus you can take it through your tree.

Kind regards
Uffe

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
@ 2014-06-16 14:24       ` Ulf Hansson
  0 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-16 14:24 UTC (permalink / raw)
  To: Lee Jones
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, Chris Ball,
	linux-kernel, Wei WANG, Roger, devel, Dan Carpenter

On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
>> From: Micky Ching <micky_ching@realsil.com.cn>
>>
>> rtsx driver using a single function for transfer data, dma map/unmap are
>> placed in one fix function. We need map/unmap dma in different place(for
>> mmc async driver), so add three function for dma map, dma transfer and
>> dma unmap.
>>
>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>> ---
>>  drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
>>  include/linux/mfd/rtsx_pci.h |    6 ++++
>>  2 files changed, 54 insertions(+), 28 deletions(-)
>
> I don't see any glaring issues with this patch.  Does it rely on the
> first patch, or vise versa, or can it just be applied?

The mmc part in patch2 relies on this one, but please go ahead and
apply the mfd patch if you see it good.

I can later provide my ack for the mmc parts, in patch2, when it's a
reviewed properly and thus you can take it through your tree.

Kind regards
Uffe

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-16 12:40       ` Ulf Hansson
@ 2014-06-17  1:04           ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-17  1:04 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/16/2014 08:40 PM, Ulf Hansson wrote:
> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>          struct rtsx_pcr         *pcr;
>>>>>          struct mmc_host         *mmc;
>>>>>          struct mmc_request      *mrq;
>>>>> +       struct workqueue_struct *workq;
>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>
>>>>> +       struct work_struct      work;
>>> I am trying to understand why you need a work/workqueue to implement
>>> this feature. Is that really the case?
>>>
>>> Could you elaborate on the reasons?
>> Hi Uffe,
>>
>> we need return as fast as possible in mmc_host_ops request(ops->request)
>> callback,
>> so the mmc core can continue handle next request.
>> when next request everything is ready, it will wait previous done(if not
>> done),
>> then call ops->request().
>>
>> we can't use atomic context, because we use mutex_lock() to protect
> ops->request should never executed in atomic context. Is that your concern?
Yes.
>
>> resource, and we have to hold the lock during handle request.
>> So I use workq, we just queue a work and return in ops->request(),
>> The mmc core can continue without blocking at ops->request().
>>
>> Best Regards.
>> micky.
> Kind regards
> Uffe
> .
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-17  1:04           ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-17  1:04 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 06/16/2014 08:40 PM, Ulf Hansson wrote:
> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>          struct rtsx_pcr         *pcr;
>>>>>          struct mmc_host         *mmc;
>>>>>          struct mmc_request      *mrq;
>>>>> +       struct workqueue_struct *workq;
>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>
>>>>> +       struct work_struct      work;
>>> I am trying to understand why you need a work/workqueue to implement
>>> this feature. Is that really the case?
>>>
>>> Could you elaborate on the reasons?
>> Hi Uffe,
>>
>> we need return as fast as possible in mmc_host_ops request(ops->request)
>> callback,
>> so the mmc core can continue handle next request.
>> when next request everything is ready, it will wait previous done(if not
>> done),
>> then call ops->request().
>>
>> we can't use atomic context, because we use mutex_lock() to protect
> ops->request should never executed in atomic context. Is that your concern?
Yes.
>
>> resource, and we have to hold the lock during handle request.
>> So I use workq, we just queue a work and return in ops->request(),
>> The mmc core can continue without blocking at ops->request().
>>
>> Best Regards.
>> micky.
> Kind regards
> Uffe
> .
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-16 12:20   ` Lee Jones
@ 2014-06-17  1:08       ` micky
  2014-06-17  1:08       ` micky
  1 sibling, 0 replies; 40+ messages in thread
From: micky @ 2014-06-17  1:08 UTC (permalink / raw)
  To: Lee Jones
  Cc: sameo, chris, ulf.hansson, devel, linux-kernel, linux-mmc,
	gregkh, dan.carpenter, rogerable, wei_wang

On 06/16/2014 08:20 PM, Lee Jones wrote:
> I don't see any glaring issues with this patch.  Does it rely on the
> first patch, or vise versa, or can it just be applied?
Hi Jones,

[PATCH 2/2] need function defined in [PATCH 1/2],
so we provide interface in [mfd] and called in [mmc].

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
@ 2014-06-17  1:08       ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-17  1:08 UTC (permalink / raw)
  To: Lee Jones
  Cc: sameo, chris, ulf.hansson, devel, linux-kernel, linux-mmc,
	gregkh, dan.carpenter, rogerable, wei_wang

On 06/16/2014 08:20 PM, Lee Jones wrote:
> I don't see any glaring issues with this patch.  Does it rely on the
> first patch, or vise versa, or can it just be applied?
Hi Jones,

[PATCH 2/2] need function defined in [PATCH 1/2],
so we provide interface in [mfd] and called in [mmc].

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-17  1:04           ` micky
@ 2014-06-17  7:45             ` Ulf Hansson
  -1 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-17  7:45 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>
>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>
>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>
>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>
>>>>>>          struct rtsx_pcr         *pcr;
>>>>>>          struct mmc_host         *mmc;
>>>>>>          struct mmc_request      *mrq;
>>>>>> +       struct workqueue_struct *workq;
>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>
>>>>>> +       struct work_struct      work;
>>>>
>>>> I am trying to understand why you need a work/workqueue to implement
>>>> this feature. Is that really the case?
>>>>
>>>> Could you elaborate on the reasons?
>>>
>>> Hi Uffe,
>>>
>>> we need return as fast as possible in mmc_host_ops request(ops->request)
>>> callback,
>>> so the mmc core can continue handle next request.
>>> when next request everything is ready, it will wait previous done(if not
>>> done),
>>> then call ops->request().
>>>
>>> we can't use atomic context, because we use mutex_lock() to protect
>>
>> ops->request should never executed in atomic context. Is that your
>> concern?
>
> Yes.

Okay. Unless I missed your point, I don't think you need the work/workqueue.

Because, ops->request isn't ever executed in atomic context. That's
due to the mmc core, which handles the async mechanism, are waiting
for a completion variable in process context, before it invokes the
ops->request() callback.

That completion variable will be kicked, from your host driver, when
you invoke mmc_request_done(), .

Kind regards
Uffe

>>
>>
>>> resource, and we have to hold the lock during handle request.
>>> So I use workq, we just queue a work and return in ops->request(),
>>> The mmc core can continue without blocking at ops->request().
>>>
>>> Best Regards.
>>> micky.
>>
>> Kind regards
>> Uffe
>> .
>>
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-17  7:45             ` Ulf Hansson
  0 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-17  7:45 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>
>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>
>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>
>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>
>>>>>>          struct rtsx_pcr         *pcr;
>>>>>>          struct mmc_host         *mmc;
>>>>>>          struct mmc_request      *mrq;
>>>>>> +       struct workqueue_struct *workq;
>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>
>>>>>> +       struct work_struct      work;
>>>>
>>>> I am trying to understand why you need a work/workqueue to implement
>>>> this feature. Is that really the case?
>>>>
>>>> Could you elaborate on the reasons?
>>>
>>> Hi Uffe,
>>>
>>> we need return as fast as possible in mmc_host_ops request(ops->request)
>>> callback,
>>> so the mmc core can continue handle next request.
>>> when next request everything is ready, it will wait previous done(if not
>>> done),
>>> then call ops->request().
>>>
>>> we can't use atomic context, because we use mutex_lock() to protect
>>
>> ops->request should never executed in atomic context. Is that your
>> concern?
>
> Yes.

Okay. Unless I missed your point, I don't think you need the work/workqueue.

Because, ops->request isn't ever executed in atomic context. That's
due to the mmc core, which handles the async mechanism, are waiting
for a completion variable in process context, before it invokes the
ops->request() callback.

That completion variable will be kicked, from your host driver, when
you invoke mmc_request_done(), .

Kind regards
Uffe

>>
>>
>>> resource, and we have to hold the lock during handle request.
>>> So I use workq, we just queue a work and return in ops->request(),
>>> The mmc core can continue without blocking at ops->request().
>>>
>>> Best Regards.
>>> micky.
>>
>> Kind regards
>> Uffe
>> .
>>
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-17  7:45             ` Ulf Hansson
@ 2014-06-18  1:17               ` micky
  -1 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-18  1:17 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/17/2014 03:45 PM, Ulf Hansson wrote:
> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>           struct rtsx_pcr         *pcr;
>>>>>>>           struct mmc_host         *mmc;
>>>>>>>           struct mmc_request      *mrq;
>>>>>>> +       struct workqueue_struct *workq;
>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>
>>>>>>> +       struct work_struct      work;
>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>> this feature. Is that really the case?
>>>>>
>>>>> Could you elaborate on the reasons?
>>>> Hi Uffe,
>>>>
>>>> we need return as fast as possible in mmc_host_ops request(ops->request)
>>>> callback,
>>>> so the mmc core can continue handle next request.
>>>> when next request everything is ready, it will wait previous done(if not
>>>> done),
>>>> then call ops->request().
>>>>
>>>> we can't use atomic context, because we use mutex_lock() to protect
>>> ops->request should never executed in atomic context. Is that your
>>> concern?
>> Yes.
> Okay. Unless I missed your point, I don't think you need the work/workqueue.
any other method?
>
> Because, ops->request isn't ever executed in atomic context. That's
> due to the mmc core, which handles the async mechanism, are waiting
> for a completion variable in process context, before it invokes the
> ops->request() callback.
>
> That completion variable will be kicked, from your host driver, when
> you invoke mmc_request_done(), .
Sorry, I don't understand here, how kicked?

I think the flow is:
- not wait for first req
- init mrq->done
- ops->request()                         ---         A.rtsx: start queue 
work.
- continue fetch next req
- prepare next req ok,
- wait previous done.                --- B.(mmc_request_done() may be 
called at any time from A to B)
- init mrq->done
- ops->request()                         ---         C.rtsx: start queue 
next work.
...
and seems no problem.

Best Regards.
micky.
> Kind regards
> Uffe
>
>>>
>>>> resource, and we have to hold the lock during handle request.
>>>> So I use workq, we just queue a work and return in ops->request(),
>>>> The mmc core can continue without blocking at ops->request().
>>>>
>>>> Best Regards.
>>>> micky.
>>> Kind regards
>>> Uffe
>>> .
>>>
> .
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-18  1:17               ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-18  1:17 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 06/17/2014 03:45 PM, Ulf Hansson wrote:
> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>           struct rtsx_pcr         *pcr;
>>>>>>>           struct mmc_host         *mmc;
>>>>>>>           struct mmc_request      *mrq;
>>>>>>> +       struct workqueue_struct *workq;
>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>
>>>>>>> +       struct work_struct      work;
>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>> this feature. Is that really the case?
>>>>>
>>>>> Could you elaborate on the reasons?
>>>> Hi Uffe,
>>>>
>>>> we need return as fast as possible in mmc_host_ops request(ops->request)
>>>> callback,
>>>> so the mmc core can continue handle next request.
>>>> when next request everything is ready, it will wait previous done(if not
>>>> done),
>>>> then call ops->request().
>>>>
>>>> we can't use atomic context, because we use mutex_lock() to protect
>>> ops->request should never executed in atomic context. Is that your
>>> concern?
>> Yes.
> Okay. Unless I missed your point, I don't think you need the work/workqueue.
any other method?
>
> Because, ops->request isn't ever executed in atomic context. That's
> due to the mmc core, which handles the async mechanism, are waiting
> for a completion variable in process context, before it invokes the
> ops->request() callback.
>
> That completion variable will be kicked, from your host driver, when
> you invoke mmc_request_done(), .
Sorry, I don't understand here, how kicked?

I think the flow is:
- not wait for first req
- init mrq->done
- ops->request()                         ---         A.rtsx: start queue 
work.
- continue fetch next req
- prepare next req ok,
- wait previous done.                --- B.(mmc_request_done() may be 
called at any time from A to B)
- init mrq->done
- ops->request()                         ---         C.rtsx: start queue 
next work.
...
and seems no problem.

Best Regards.
micky.
> Kind regards
> Uffe
>
>>>
>>>> resource, and we have to hold the lock during handle request.
>>>> So I use workq, we just queue a work and return in ops->request(),
>>>> The mmc core can continue without blocking at ops->request().
>>>>
>>>> Best Regards.
>>>> micky.
>>> Kind regards
>>> Uffe
>>> .
>>>
> .
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-18  1:17               ` micky
@ 2014-06-18  7:39                 ` Ulf Hansson
  -1 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-18  7:39 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>
>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>
>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>
>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>
>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>
>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>
>>>>>>>>           struct rtsx_pcr         *pcr;
>>>>>>>>           struct mmc_host         *mmc;
>>>>>>>>           struct mmc_request      *mrq;
>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>
>>>>>>>> +       struct work_struct      work;
>>>>>>
>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>> this feature. Is that really the case?
>>>>>>
>>>>>> Could you elaborate on the reasons?
>>>>>
>>>>> Hi Uffe,
>>>>>
>>>>> we need return as fast as possible in mmc_host_ops
>>>>> request(ops->request)
>>>>> callback,
>>>>> so the mmc core can continue handle next request.
>>>>> when next request everything is ready, it will wait previous done(if
>>>>> not
>>>>> done),
>>>>> then call ops->request().
>>>>>
>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>
>>>> ops->request should never executed in atomic context. Is that your
>>>> concern?
>>>
>>> Yes.
>>
>> Okay. Unless I missed your point, I don't think you need the
>> work/workqueue.
>
> any other method?
>
>>
>> Because, ops->request isn't ever executed in atomic context. That's
>> due to the mmc core, which handles the async mechanism, are waiting
>> for a completion variable in process context, before it invokes the
>> ops->request() callback.
>>
>> That completion variable will be kicked, from your host driver, when
>> you invoke mmc_request_done(), .
>
> Sorry, I don't understand here, how kicked?

mmc_request_done()
    ->mrq->done()
        ->mmc_wait_done()
            ->complete(&mrq->completion);

>
> I think the flow is:
> - not wait for first req
> - init mrq->done
> - ops->request()                         ---         A.rtsx: start queue
> work.
> - continue fetch next req
> - prepare next req ok,
> - wait previous done.                --- B.(mmc_request_done() may be called
> at any time from A to B)
> - init mrq->done
> - ops->request()                         ---         C.rtsx: start queue
> next work.
> ...
> and seems no problem.

Right, I don't think there are any _problem_ by using the workqueue as
you have implemented, but I am questioning if it's correct. Simply
because I don't think there are any reasons to why you need a
workqueue, it doesn't solve any problem for you - it just adds
overhead.

Kind regards
Ulf Hansson

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-18  7:39                 ` Ulf Hansson
  0 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-06-18  7:39 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>
>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>
>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>
>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>
>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>
>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>
>>>>>>>>           struct rtsx_pcr         *pcr;
>>>>>>>>           struct mmc_host         *mmc;
>>>>>>>>           struct mmc_request      *mrq;
>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>
>>>>>>>> +       struct work_struct      work;
>>>>>>
>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>> this feature. Is that really the case?
>>>>>>
>>>>>> Could you elaborate on the reasons?
>>>>>
>>>>> Hi Uffe,
>>>>>
>>>>> we need return as fast as possible in mmc_host_ops
>>>>> request(ops->request)
>>>>> callback,
>>>>> so the mmc core can continue handle next request.
>>>>> when next request everything is ready, it will wait previous done(if
>>>>> not
>>>>> done),
>>>>> then call ops->request().
>>>>>
>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>
>>>> ops->request should never executed in atomic context. Is that your
>>>> concern?
>>>
>>> Yes.
>>
>> Okay. Unless I missed your point, I don't think you need the
>> work/workqueue.
>
> any other method?
>
>>
>> Because, ops->request isn't ever executed in atomic context. That's
>> due to the mmc core, which handles the async mechanism, are waiting
>> for a completion variable in process context, before it invokes the
>> ops->request() callback.
>>
>> That completion variable will be kicked, from your host driver, when
>> you invoke mmc_request_done(), .
>
> Sorry, I don't understand here, how kicked?

mmc_request_done()
    ->mrq->done()
        ->mmc_wait_done()
            ->complete(&mrq->completion);

>
> I think the flow is:
> - not wait for first req
> - init mrq->done
> - ops->request()                         ---         A.rtsx: start queue
> work.
> - continue fetch next req
> - prepare next req ok,
> - wait previous done.                --- B.(mmc_request_done() may be called
> at any time from A to B)
> - init mrq->done
> - ops->request()                         ---         C.rtsx: start queue
> next work.
> ...
> and seems no problem.

Right, I don't think there are any _problem_ by using the workqueue as
you have implemented, but I am questioning if it's correct. Simply
because I don't think there are any reasons to why you need a
workqueue, it doesn't solve any problem for you - it just adds
overhead.

Kind regards
Ulf Hansson

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-16 14:24       ` Ulf Hansson
  (?)
@ 2014-06-18  8:00       ` Lee Jones
  2014-07-02  9:14           ` micky
  -1 siblings, 1 reply; 40+ messages in thread
From: Lee Jones @ 2014-06-18  8:00 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: micky, Samuel Ortiz, Chris Ball, devel, linux-kernel, linux-mmc,
	Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On Mon, 16 Jun 2014, Ulf Hansson wrote:

> On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
> >> From: Micky Ching <micky_ching@realsil.com.cn>
> >>
> >> rtsx driver using a single function for transfer data, dma map/unmap are
> >> placed in one fix function. We need map/unmap dma in different place(for
> >> mmc async driver), so add three function for dma map, dma transfer and
> >> dma unmap.
> >>
> >> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
> >> ---
> >>  drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
> >>  include/linux/mfd/rtsx_pci.h |    6 ++++
> >>  2 files changed, 54 insertions(+), 28 deletions(-)
> >
> > I don't see any glaring issues with this patch.  Does it rely on the
> > first patch, or vise versa, or can it just be applied?
> 
> The mmc part in patch2 relies on this one, but please go ahead and
> apply the mfd patch if you see it good.
> 
> I can later provide my ack for the mmc parts, in patch2, when it's a
> reviewed properly and thus you can take it through your tree.

There's no rush.  Once you're happy with the MMC patch, Micky can
submit them both again with my Ack on the MFD part and I'll take them
as a set.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-18  7:39                 ` Ulf Hansson
@ 2014-06-18 10:08                   ` micky
  -1 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-18 10:08 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/18/2014 03:39 PM, Ulf Hansson wrote:
> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>            struct rtsx_pcr         *pcr;
>>>>>>>>>            struct mmc_host         *mmc;
>>>>>>>>>            struct mmc_request      *mrq;
>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>>
>>>>>>>>> +       struct work_struct      work;
>>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>>> this feature. Is that really the case?
>>>>>>>
>>>>>>> Could you elaborate on the reasons?
>>>>>> Hi Uffe,
>>>>>>
>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>> request(ops->request)
>>>>>> callback,
>>>>>> so the mmc core can continue handle next request.
>>>>>> when next request everything is ready, it will wait previous done(if
>>>>>> not
>>>>>> done),
>>>>>> then call ops->request().
>>>>>>
>>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>> ops->request should never executed in atomic context. Is that your
>>>>> concern?
>>>> Yes.
>>> Okay. Unless I missed your point, I don't think you need the
>>> work/workqueue.
>> any other method?
>>
>>> Because, ops->request isn't ever executed in atomic context. That's
>>> due to the mmc core, which handles the async mechanism, are waiting
>>> for a completion variable in process context, before it invokes the
>>> ops->request() callback.
>>>
>>> That completion variable will be kicked, from your host driver, when
>>> you invoke mmc_request_done(), .
>> Sorry, I don't understand here, how kicked?
> mmc_request_done()
>      ->mrq->done()
>          ->mmc_wait_done()
>              ->complete(&mrq->completion);
>
>> I think the flow is:
>> - not wait for first req
>> - init mrq->done
>> - ops->request()                         ---         A.rtsx: start queue
>> work.
>> - continue fetch next req
>> - prepare next req ok,
>> - wait previous done.                --- B.(mmc_request_done() may be called
>> at any time from A to B)
>> - init mrq->done
>> - ops->request()                         ---         C.rtsx: start queue
>> next work.
>> ...
>> and seems no problem.
> Right, I don't think there are any _problem_ by using the workqueue as
> you have implemented, but I am questioning if it's correct. Simply
> because I don't think there are any reasons to why you need a
> workqueue, it doesn't solve any problem for you - it just adds
> overhead.
Hi Uffe,

we have two driver under mfd, the rtsx-mmc and rtsx-ms,
we use mutex lock(pcr_mutex) to protect resource,
when we handle mmc request, we need hold the mutex until we finish the 
request,
so it will not interruptted by rtsx-ms request.

If we not use workq, once the request hold the mutex, we have to wait 
until the request finish,
then release mutex, so the mmc core is blocking at here.
To implement nonblocking request, we have to use workq.

Best Regards.
micky.

>
> Kind regards
> Ulf Hansson
> .
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-18 10:08                   ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-18 10:08 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 06/18/2014 03:39 PM, Ulf Hansson wrote:
> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>            struct rtsx_pcr         *pcr;
>>>>>>>>>            struct mmc_host         *mmc;
>>>>>>>>>            struct mmc_request      *mrq;
>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>>
>>>>>>>>> +       struct work_struct      work;
>>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>>> this feature. Is that really the case?
>>>>>>>
>>>>>>> Could you elaborate on the reasons?
>>>>>> Hi Uffe,
>>>>>>
>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>> request(ops->request)
>>>>>> callback,
>>>>>> so the mmc core can continue handle next request.
>>>>>> when next request everything is ready, it will wait previous done(if
>>>>>> not
>>>>>> done),
>>>>>> then call ops->request().
>>>>>>
>>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>> ops->request should never executed in atomic context. Is that your
>>>>> concern?
>>>> Yes.
>>> Okay. Unless I missed your point, I don't think you need the
>>> work/workqueue.
>> any other method?
>>
>>> Because, ops->request isn't ever executed in atomic context. That's
>>> due to the mmc core, which handles the async mechanism, are waiting
>>> for a completion variable in process context, before it invokes the
>>> ops->request() callback.
>>>
>>> That completion variable will be kicked, from your host driver, when
>>> you invoke mmc_request_done(), .
>> Sorry, I don't understand here, how kicked?
> mmc_request_done()
>      ->mrq->done()
>          ->mmc_wait_done()
>              ->complete(&mrq->completion);
>
>> I think the flow is:
>> - not wait for first req
>> - init mrq->done
>> - ops->request()                         ---         A.rtsx: start queue
>> work.
>> - continue fetch next req
>> - prepare next req ok,
>> - wait previous done.                --- B.(mmc_request_done() may be called
>> at any time from A to B)
>> - init mrq->done
>> - ops->request()                         ---         C.rtsx: start queue
>> next work.
>> ...
>> and seems no problem.
> Right, I don't think there are any _problem_ by using the workqueue as
> you have implemented, but I am questioning if it's correct. Simply
> because I don't think there are any reasons to why you need a
> workqueue, it doesn't solve any problem for you - it just adds
> overhead.
Hi Uffe,

we have two driver under mfd, the rtsx-mmc and rtsx-ms,
we use mutex lock(pcr_mutex) to protect resource,
when we handle mmc request, we need hold the mutex until we finish the 
request,
so it will not interruptted by rtsx-ms request.

If we not use workq, once the request hold the mutex, we have to wait 
until the request finish,
then release mutex, so the mmc core is blocking at here.
To implement nonblocking request, we have to use workq.

Best Regards.
micky.

>
> Kind regards
> Ulf Hansson
> .
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-18 10:08                   ` micky
  (?)
@ 2014-06-18 11:03                   ` Ulf Hansson
  2014-06-19  1:57                       ` micky
  -1 siblings, 1 reply; 40+ messages in thread
From: Ulf Hansson @ 2014-06-18 11:03 UTC (permalink / raw)
  To: micky
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 18 June 2014 12:08, micky <micky_ching@realsil.com.cn> wrote:
> On 06/18/2014 03:39 PM, Ulf Hansson wrote:
>>
>> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>>>
>>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>>>
>>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>>>
>>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>>>
>>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>>
>>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>>>
>>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>>
>>>>>>>>>>            struct rtsx_pcr         *pcr;
>>>>>>>>>>            struct mmc_host         *mmc;
>>>>>>>>>>            struct mmc_request      *mrq;
>>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>>>
>>>>>>>>>> +       struct work_struct      work;
>>>>>>>>
>>>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>>>> this feature. Is that really the case?
>>>>>>>>
>>>>>>>> Could you elaborate on the reasons?
>>>>>>>
>>>>>>> Hi Uffe,
>>>>>>>
>>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>>> request(ops->request)
>>>>>>> callback,
>>>>>>> so the mmc core can continue handle next request.
>>>>>>> when next request everything is ready, it will wait previous done(if
>>>>>>> not
>>>>>>> done),
>>>>>>> then call ops->request().
>>>>>>>
>>>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>>>
>>>>>> ops->request should never executed in atomic context. Is that your
>>>>>> concern?
>>>>>
>>>>> Yes.
>>>>
>>>> Okay. Unless I missed your point, I don't think you need the
>>>> work/workqueue.
>>>
>>> any other method?
>>>
>>>> Because, ops->request isn't ever executed in atomic context. That's
>>>> due to the mmc core, which handles the async mechanism, are waiting
>>>> for a completion variable in process context, before it invokes the
>>>> ops->request() callback.
>>>>
>>>> That completion variable will be kicked, from your host driver, when
>>>> you invoke mmc_request_done(), .
>>>
>>> Sorry, I don't understand here, how kicked?
>>
>> mmc_request_done()
>>      ->mrq->done()
>>          ->mmc_wait_done()
>>              ->complete(&mrq->completion);
>>
>>> I think the flow is:
>>> - not wait for first req
>>> - init mrq->done
>>> - ops->request()                         ---         A.rtsx: start queue
>>> work.
>>> - continue fetch next req
>>> - prepare next req ok,
>>> - wait previous done.                --- B.(mmc_request_done() may be
>>> called
>>> at any time from A to B)
>>> - init mrq->done
>>> - ops->request()                         ---         C.rtsx: start queue
>>> next work.
>>> ...
>>> and seems no problem.
>>
>> Right, I don't think there are any _problem_ by using the workqueue as
>> you have implemented, but I am questioning if it's correct. Simply
>> because I don't think there are any reasons to why you need a
>> workqueue, it doesn't solve any problem for you - it just adds
>> overhead.
>
> Hi Uffe,
>
> we have two driver under mfd, the rtsx-mmc and rtsx-ms,
> we use mutex lock(pcr_mutex) to protect resource,
> when we handle mmc request, we need hold the mutex until we finish the
> request,
> so it will not interruptted by rtsx-ms request.

Ahh, I see. Now, _that_ explains why you want the workqueue. :-) Thanks!

>
> If we not use workq, once the request hold the mutex, we have to wait until
> the request finish,
> then release mutex, so the mmc core is blocking at here.
> To implement nonblocking request, we have to use workq.

One minor suggestion below, please consider this as an optimization
which goes outside the context of this patch.

There are cases when I think you should be able to skip the overhead
from scheduling the work from ->request(). Those cases can be
described as when the mutex are available which can be tested by using
mutex_trylock().

Kind regards
Uffe

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-18 11:03                   ` Ulf Hansson
@ 2014-06-19  1:57                       ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-19  1:57 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/18/2014 07:03 PM, Ulf Hansson wrote:
> On 18 June 2014 12:08, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/18/2014 03:39 PM, Ulf Hansson wrote:
>>> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>>>             struct rtsx_pcr         *pcr;
>>>>>>>>>>>             struct mmc_host         *mmc;
>>>>>>>>>>>             struct mmc_request      *mrq;
>>>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>>>>
>>>>>>>>>>> +       struct work_struct      work;
>>>>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>>>>> this feature. Is that really the case?
>>>>>>>>>
>>>>>>>>> Could you elaborate on the reasons?
>>>>>>>> Hi Uffe,
>>>>>>>>
>>>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>>>> request(ops->request)
>>>>>>>> callback,
>>>>>>>> so the mmc core can continue handle next request.
>>>>>>>> when next request everything is ready, it will wait previous done(if
>>>>>>>> not
>>>>>>>> done),
>>>>>>>> then call ops->request().
>>>>>>>>
>>>>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>>>> ops->request should never executed in atomic context. Is that your
>>>>>>> concern?
>>>>>> Yes.
>>>>> Okay. Unless I missed your point, I don't think you need the
>>>>> work/workqueue.
>>>> any other method?
>>>>
>>>>> Because, ops->request isn't ever executed in atomic context. That's
>>>>> due to the mmc core, which handles the async mechanism, are waiting
>>>>> for a completion variable in process context, before it invokes the
>>>>> ops->request() callback.
>>>>>
>>>>> That completion variable will be kicked, from your host driver, when
>>>>> you invoke mmc_request_done(), .
>>>> Sorry, I don't understand here, how kicked?
>>> mmc_request_done()
>>>       ->mrq->done()
>>>           ->mmc_wait_done()
>>>               ->complete(&mrq->completion);
>>>
>>>> I think the flow is:
>>>> - not wait for first req
>>>> - init mrq->done
>>>> - ops->request()                         ---         A.rtsx: start queue
>>>> work.
>>>> - continue fetch next req
>>>> - prepare next req ok,
>>>> - wait previous done.                --- B.(mmc_request_done() may be
>>>> called
>>>> at any time from A to B)
>>>> - init mrq->done
>>>> - ops->request()                         ---         C.rtsx: start queue
>>>> next work.
>>>> ...
>>>> and seems no problem.
>>> Right, I don't think there are any _problem_ by using the workqueue as
>>> you have implemented, but I am questioning if it's correct. Simply
>>> because I don't think there are any reasons to why you need a
>>> workqueue, it doesn't solve any problem for you - it just adds
>>> overhead.
>> Hi Uffe,
>>
>> we have two driver under mfd, the rtsx-mmc and rtsx-ms,
>> we use mutex lock(pcr_mutex) to protect resource,
>> when we handle mmc request, we need hold the mutex until we finish the
>> request,
>> so it will not interruptted by rtsx-ms request.
> Ahh, I see. Now, _that_ explains why you want the workqueue. :-) Thanks!
>
>> If we not use workq, once the request hold the mutex, we have to wait until
>> the request finish,
>> then release mutex, so the mmc core is blocking at here.
>> To implement nonblocking request, we have to use workq.
> One minor suggestion below, please consider this as an optimization
> which goes outside the context of this patch.
>
> There are cases when I think you should be able to skip the overhead
> from scheduling the work from ->request(). Those cases can be
> described as when the mutex are available which can be tested by using
> mutex_trylock().
Thanks for your suggestion.

we need schedule the work every time mmc core call ops->request(),
if we want to handle request, we need hold mutex and do the work.
so mutex_trylock() will not help decrease overhead.
if we not schedule the work, the ops->request will do nothing.

Best Regards.
micky
> Kind regards
> Uffe
> .
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-19  1:57                       ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-19  1:57 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

On 06/18/2014 07:03 PM, Ulf Hansson wrote:
> On 18 June 2014 12:08, micky <micky_ching@realsil.com.cn> wrote:
>> On 06/18/2014 03:39 PM, Ulf Hansson wrote:
>>> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>>>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>>>             struct rtsx_pcr         *pcr;
>>>>>>>>>>>             struct mmc_host         *mmc;
>>>>>>>>>>>             struct mmc_request      *mrq;
>>>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>>>> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>>>>>>>>>>>
>>>>>>>>>>> +       struct work_struct      work;
>>>>>>>>> I am trying to understand why you need a work/workqueue to implement
>>>>>>>>> this feature. Is that really the case?
>>>>>>>>>
>>>>>>>>> Could you elaborate on the reasons?
>>>>>>>> Hi Uffe,
>>>>>>>>
>>>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>>>> request(ops->request)
>>>>>>>> callback,
>>>>>>>> so the mmc core can continue handle next request.
>>>>>>>> when next request everything is ready, it will wait previous done(if
>>>>>>>> not
>>>>>>>> done),
>>>>>>>> then call ops->request().
>>>>>>>>
>>>>>>>> we can't use atomic context, because we use mutex_lock() to protect
>>>>>>> ops->request should never executed in atomic context. Is that your
>>>>>>> concern?
>>>>>> Yes.
>>>>> Okay. Unless I missed your point, I don't think you need the
>>>>> work/workqueue.
>>>> any other method?
>>>>
>>>>> Because, ops->request isn't ever executed in atomic context. That's
>>>>> due to the mmc core, which handles the async mechanism, are waiting
>>>>> for a completion variable in process context, before it invokes the
>>>>> ops->request() callback.
>>>>>
>>>>> That completion variable will be kicked, from your host driver, when
>>>>> you invoke mmc_request_done(), .
>>>> Sorry, I don't understand here, how kicked?
>>> mmc_request_done()
>>>       ->mrq->done()
>>>           ->mmc_wait_done()
>>>               ->complete(&mrq->completion);
>>>
>>>> I think the flow is:
>>>> - not wait for first req
>>>> - init mrq->done
>>>> - ops->request()                         ---         A.rtsx: start queue
>>>> work.
>>>> - continue fetch next req
>>>> - prepare next req ok,
>>>> - wait previous done.                --- B.(mmc_request_done() may be
>>>> called
>>>> at any time from A to B)
>>>> - init mrq->done
>>>> - ops->request()                         ---         C.rtsx: start queue
>>>> next work.
>>>> ...
>>>> and seems no problem.
>>> Right, I don't think there are any _problem_ by using the workqueue as
>>> you have implemented, but I am questioning if it's correct. Simply
>>> because I don't think there are any reasons to why you need a
>>> workqueue, it doesn't solve any problem for you - it just adds
>>> overhead.
>> Hi Uffe,
>>
>> we have two driver under mfd, the rtsx-mmc and rtsx-ms,
>> we use mutex lock(pcr_mutex) to protect resource,
>> when we handle mmc request, we need hold the mutex until we finish the
>> request,
>> so it will not interruptted by rtsx-ms request.
> Ahh, I see. Now, _that_ explains why you want the workqueue. :-) Thanks!
>
>> If we not use workq, once the request hold the mutex, we have to wait until
>> the request finish,
>> then release mutex, so the mmc core is blocking at here.
>> To implement nonblocking request, we have to use workq.
> One minor suggestion below, please consider this as an optimization
> which goes outside the context of this patch.
>
> There are cases when I think you should be able to skip the overhead
> from scheduling the work from ->request(). Those cases can be
> described as when the mutex are available which can be tested by using
> mutex_trylock().
Thanks for your suggestion.

we need schedule the work every time mmc core call ops->request(),
if we want to handle request, we need hold mutex and do the work.
so mutex_trylock() will not help decrease overhead.
if we not schedule the work, the ops->request will do nothing.

Best Regards.
micky
> Kind regards
> Uffe
> .
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-19  1:57                       ` micky
@ 2014-06-23  9:26                         ` micky
  -1 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-23  9:26 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Lee Jones, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

Hi Uffe,

do you accepted this patch?
if accepted, I will submit again with jones ack.

Best Regards.
micky.

On 06/19/2014 09:57 AM, micky wrote:
> On 06/18/2014 07:03 PM, Ulf Hansson wrote:
>> On 18 June 2014 12:08, micky <micky_ching@realsil.com.cn> wrote:
>>> On 06/18/2014 03:39 PM, Ulf Hansson wrote:
>>>> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>>>>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>>>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>>>>             struct rtsx_pcr         *pcr;
>>>>>>>>>>>>             struct mmc_host         *mmc;
>>>>>>>>>>>>             struct mmc_request      *mrq;
>>>>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>>>>> +#define SDMMC_WORKQ_NAME "rtsx_pci_sdmmc_workq"
>>>>>>>>>>>>
>>>>>>>>>>>> +       struct work_struct      work;
>>>>>>>>>> I am trying to understand why you need a work/workqueue to 
>>>>>>>>>> implement
>>>>>>>>>> this feature. Is that really the case?
>>>>>>>>>>
>>>>>>>>>> Could you elaborate on the reasons?
>>>>>>>>> Hi Uffe,
>>>>>>>>>
>>>>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>>>>> request(ops->request)
>>>>>>>>> callback,
>>>>>>>>> so the mmc core can continue handle next request.
>>>>>>>>> when next request everything is ready, it will wait previous 
>>>>>>>>> done(if
>>>>>>>>> not
>>>>>>>>> done),
>>>>>>>>> then call ops->request().
>>>>>>>>>
>>>>>>>>> we can't use atomic context, because we use mutex_lock() to 
>>>>>>>>> protect
>>>>>>>> ops->request should never executed in atomic context. Is that your
>>>>>>>> concern?
>>>>>>> Yes.
>>>>>> Okay. Unless I missed your point, I don't think you need the
>>>>>> work/workqueue.
>>>>> any other method?
>>>>>
>>>>>> Because, ops->request isn't ever executed in atomic context. That's
>>>>>> due to the mmc core, which handles the async mechanism, are waiting
>>>>>> for a completion variable in process context, before it invokes the
>>>>>> ops->request() callback.
>>>>>>
>>>>>> That completion variable will be kicked, from your host driver, when
>>>>>> you invoke mmc_request_done(), .
>>>>> Sorry, I don't understand here, how kicked?
>>>> mmc_request_done()
>>>>       ->mrq->done()
>>>>           ->mmc_wait_done()
>>>>               ->complete(&mrq->completion);
>>>>
>>>>> I think the flow is:
>>>>> - not wait for first req
>>>>> - init mrq->done
>>>>> - ops->request()                         --- A.rtsx: start queue
>>>>> work.
>>>>> - continue fetch next req
>>>>> - prepare next req ok,
>>>>> - wait previous done.                --- B.(mmc_request_done() may be
>>>>> called
>>>>> at any time from A to B)
>>>>> - init mrq->done
>>>>> - ops->request()                         --- C.rtsx: start queue
>>>>> next work.
>>>>> ...
>>>>> and seems no problem.
>>>> Right, I don't think there are any _problem_ by using the workqueue as
>>>> you have implemented, but I am questioning if it's correct. Simply
>>>> because I don't think there are any reasons to why you need a
>>>> workqueue, it doesn't solve any problem for you - it just adds
>>>> overhead.
>>> Hi Uffe,
>>>
>>> we have two driver under mfd, the rtsx-mmc and rtsx-ms,
>>> we use mutex lock(pcr_mutex) to protect resource,
>>> when we handle mmc request, we need hold the mutex until we finish the
>>> request,
>>> so it will not interruptted by rtsx-ms request.
>> Ahh, I see. Now, _that_ explains why you want the workqueue. :-) Thanks!
>>
>>> If we not use workq, once the request hold the mutex, we have to 
>>> wait until
>>> the request finish,
>>> then release mutex, so the mmc core is blocking at here.
>>> To implement nonblocking request, we have to use workq.
>> One minor suggestion below, please consider this as an optimization
>> which goes outside the context of this patch.
>>
>> There are cases when I think you should be able to skip the overhead
>> from scheduling the work from ->request(). Those cases can be
>> described as when the mutex are available which can be tested by using
>> mutex_trylock().
> Thanks for your suggestion.
>
> we need schedule the work every time mmc core call ops->request(),
> if we want to handle request, we need hold mutex and do the work.
> so mutex_trylock() will not help decrease overhead.
> if we not schedule the work, the ops->request will do nothing.
>
> Best Regards.
> micky
>> Kind regards
>> Uffe
>> .
>>
>


^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
@ 2014-06-23  9:26                         ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-06-23  9:26 UTC (permalink / raw)
  To: Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Lee Jones, Dan Carpenter

Hi Uffe,

do you accepted this patch?
if accepted, I will submit again with jones ack.

Best Regards.
micky.

On 06/19/2014 09:57 AM, micky wrote:
> On 06/18/2014 07:03 PM, Ulf Hansson wrote:
>> On 18 June 2014 12:08, micky <micky_ching@realsil.com.cn> wrote:
>>> On 06/18/2014 03:39 PM, Ulf Hansson wrote:
>>>> On 18 June 2014 03:17, micky <micky_ching@realsil.com.cn> wrote:
>>>>> On 06/17/2014 03:45 PM, Ulf Hansson wrote:
>>>>>> On 17 June 2014 03:04, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>> On 06/16/2014 08:40 PM, Ulf Hansson wrote:
>>>>>>>> On 16 June 2014 11:09, micky <micky_ching@realsil.com.cn> wrote:
>>>>>>>>> On 06/16/2014 04:42 PM, Ulf Hansson wrote:
>>>>>>>>>>> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>>>>>>>>>>>>             struct rtsx_pcr         *pcr;
>>>>>>>>>>>>             struct mmc_host         *mmc;
>>>>>>>>>>>>             struct mmc_request      *mrq;
>>>>>>>>>>>> +       struct workqueue_struct *workq;
>>>>>>>>>>>> +#define SDMMC_WORKQ_NAME "rtsx_pci_sdmmc_workq"
>>>>>>>>>>>>
>>>>>>>>>>>> +       struct work_struct      work;
>>>>>>>>>> I am trying to understand why you need a work/workqueue to 
>>>>>>>>>> implement
>>>>>>>>>> this feature. Is that really the case?
>>>>>>>>>>
>>>>>>>>>> Could you elaborate on the reasons?
>>>>>>>>> Hi Uffe,
>>>>>>>>>
>>>>>>>>> we need return as fast as possible in mmc_host_ops
>>>>>>>>> request(ops->request)
>>>>>>>>> callback,
>>>>>>>>> so the mmc core can continue handle next request.
>>>>>>>>> when next request everything is ready, it will wait previous 
>>>>>>>>> done(if
>>>>>>>>> not
>>>>>>>>> done),
>>>>>>>>> then call ops->request().
>>>>>>>>>
>>>>>>>>> we can't use atomic context, because we use mutex_lock() to 
>>>>>>>>> protect
>>>>>>>> ops->request should never executed in atomic context. Is that your
>>>>>>>> concern?
>>>>>>> Yes.
>>>>>> Okay. Unless I missed your point, I don't think you need the
>>>>>> work/workqueue.
>>>>> any other method?
>>>>>
>>>>>> Because, ops->request isn't ever executed in atomic context. That's
>>>>>> due to the mmc core, which handles the async mechanism, are waiting
>>>>>> for a completion variable in process context, before it invokes the
>>>>>> ops->request() callback.
>>>>>>
>>>>>> That completion variable will be kicked, from your host driver, when
>>>>>> you invoke mmc_request_done(), .
>>>>> Sorry, I don't understand here, how kicked?
>>>> mmc_request_done()
>>>>       ->mrq->done()
>>>>           ->mmc_wait_done()
>>>>               ->complete(&mrq->completion);
>>>>
>>>>> I think the flow is:
>>>>> - not wait for first req
>>>>> - init mrq->done
>>>>> - ops->request()                         --- A.rtsx: start queue
>>>>> work.
>>>>> - continue fetch next req
>>>>> - prepare next req ok,
>>>>> - wait previous done.                --- B.(mmc_request_done() may be
>>>>> called
>>>>> at any time from A to B)
>>>>> - init mrq->done
>>>>> - ops->request()                         --- C.rtsx: start queue
>>>>> next work.
>>>>> ...
>>>>> and seems no problem.
>>>> Right, I don't think there are any _problem_ by using the workqueue as
>>>> you have implemented, but I am questioning if it's correct. Simply
>>>> because I don't think there are any reasons to why you need a
>>>> workqueue, it doesn't solve any problem for you - it just adds
>>>> overhead.
>>> Hi Uffe,
>>>
>>> we have two driver under mfd, the rtsx-mmc and rtsx-ms,
>>> we use mutex lock(pcr_mutex) to protect resource,
>>> when we handle mmc request, we need hold the mutex until we finish the
>>> request,
>>> so it will not interruptted by rtsx-ms request.
>> Ahh, I see. Now, _that_ explains why you want the workqueue. :-) Thanks!
>>
>>> If we not use workq, once the request hold the mutex, we have to 
>>> wait until
>>> the request finish,
>>> then release mutex, so the mmc core is blocking at here.
>>> To implement nonblocking request, we have to use workq.
>> One minor suggestion below, please consider this as an optimization
>> which goes outside the context of this patch.
>>
>> There are cases when I think you should be able to skip the overhead
>> from scheduling the work from ->request(). Those cases can be
>> described as when the mutex are available which can be tested by using
>> mutex_trylock().
> Thanks for your suggestion.
>
> we need schedule the work every time mmc core call ops->request(),
> if we want to handle request, we need hold mutex and do the work.
> so mutex_trylock() will not help decrease overhead.
> if we not schedule the work, the ops->request will do nothing.
>
> Best Regards.
> micky
>> Kind regards
>> Uffe
>> .
>>
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 2/2] mmc: rtsx: add support for async request
  2014-06-06  7:05   ` micky_ching
  (?)
  (?)
@ 2014-07-02  8:53   ` Ulf Hansson
  -1 siblings, 0 replies; 40+ messages in thread
From: Ulf Hansson @ 2014-07-02  8:53 UTC (permalink / raw)
  To: micky, Lee Jones
  Cc: Samuel Ortiz, Chris Ball, devel, linux-kernel, linux-mmc,
	Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 6 June 2014 09:05,  <micky_ching@realsil.com.cn> wrote:
> From: Micky Ching <micky_ching@realsil.com.cn>
>
> Add support for non-blocking request, pre_req() runs dma_map_sg() and
> post_req() runs dma_unmap_sg(). This patch can increase card read/write
> speed, especially for high speed card and slow speed CPU.
>
> Test on intel i3(800MHz - 2.3GHz) performance mode(2.3GHz), SD card
> clock 208MHz
>
> run dd if=/dev/mmcblk0 of=/dev/null bs=64k count=1024
> before:
> 67108864 bytes (67 MB) copied, 0.85427 s, 78.6 MB/s
> after:
> 67108864 bytes (67 MB) copied, 0.74799 s, 89.7 MB/s
>
> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>

Acked-by: Ulf Hansson <ulf.hansson@linaro.org>

I assume Lee will pick this patchset through his mfd tree then. If
not, ping me and I will help.

Kind regards
Uffe

> ---
>  drivers/mmc/host/rtsx_pci_sdmmc.c |  133 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 127 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
> index 1c68e0d..a2c0858 100644
> --- a/drivers/mmc/host/rtsx_pci_sdmmc.c
> +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
> @@ -24,6 +24,7 @@
>  #include <linux/highmem.h>
>  #include <linux/delay.h>
>  #include <linux/platform_device.h>
> +#include <linux/workqueue.h>
>  #include <linux/mmc/host.h>
>  #include <linux/mmc/mmc.h>
>  #include <linux/mmc/sd.h>
> @@ -36,7 +37,10 @@ struct realtek_pci_sdmmc {
>         struct rtsx_pcr         *pcr;
>         struct mmc_host         *mmc;
>         struct mmc_request      *mrq;
> +       struct workqueue_struct *workq;
> +#define SDMMC_WORKQ_NAME       "rtsx_pci_sdmmc_workq"
>
> +       struct work_struct      work;
>         struct mutex            host_mutex;
>
>         u8                      ssc_depth;
> @@ -48,6 +52,11 @@ struct realtek_pci_sdmmc {
>         int                     power_state;
>  #define SDMMC_POWER_ON         1
>  #define SDMMC_POWER_OFF                0
> +
> +       unsigned int            sg_count;
> +       s32                     cookie;
> +       unsigned int            cookie_sg_count;
> +       bool                    using_cookie;
>  };
>
>  static inline struct device *sdmmc_dev(struct realtek_pci_sdmmc *host)
> @@ -86,6 +95,77 @@ static void sd_print_debug_regs(struct realtek_pci_sdmmc *host)
>  #define sd_print_debug_regs(host)
>  #endif /* DEBUG */
>
> +/*
> + * sd_pre_dma_transfer - do dma_map_sg() or using cookie
> + *
> + * @pre: if called in pre_req()
> + * return:
> + *     0 - do dma_map_sg()
> + *     1 - using cookie
> + */
> +static int sd_pre_dma_transfer(struct realtek_pci_sdmmc *host,
> +               struct mmc_data *data, bool pre)
> +{
> +       struct rtsx_pcr *pcr = host->pcr;
> +       int read = data->flags & MMC_DATA_READ;
> +       int count = 0;
> +       int using_cookie = 0;
> +
> +       if (!pre && data->host_cookie && data->host_cookie != host->cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: data->host_cookie = %d, host->cookie = %d\n",
> +                       data->host_cookie, host->cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       if (pre || data->host_cookie != host->cookie) {
> +               count = rtsx_pci_dma_map_sg(pcr, data->sg, data->sg_len, read);
> +       } else {
> +               count = host->cookie_sg_count;
> +               using_cookie = 1;
> +       }
> +
> +       if (pre) {
> +               host->cookie_sg_count = count;
> +               if (++host->cookie < 0)
> +                       host->cookie = 1;
> +               data->host_cookie = host->cookie;
> +       } else {
> +               host->sg_count = count;
> +       }
> +
> +       return using_cookie;
> +}
> +
> +static void sdmmc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               bool is_first_req)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       if (data->host_cookie) {
> +               dev_err(sdmmc_dev(host),
> +                       "error: reset data->host_cookie = %d\n",
> +                       data->host_cookie);
> +               data->host_cookie = 0;
> +       }
> +
> +       sd_pre_dma_transfer(host, data, true);
> +       dev_dbg(sdmmc_dev(host), "pre dma sg: %d\n", host->cookie_sg_count);
> +}
> +
> +static void sdmmc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
> +               int err)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct rtsx_pcr *pcr = host->pcr;
> +       struct mmc_data *data = mrq->data;
> +       int read = data->flags & MMC_DATA_READ;
> +
> +       rtsx_pci_dma_unmap_sg(pcr, data->sg, data->sg_len, read);
> +       data->host_cookie = 0;
> +}
> +
>  static int sd_read_data(struct realtek_pci_sdmmc *host, u8 *cmd, u16 byte_cnt,
>                 u8 *buf, int buf_len, int timeout)
>  {
> @@ -415,7 +495,7 @@ static int sd_rw_multi(struct realtek_pci_sdmmc *host, struct mmc_request *mrq)
>
>         rtsx_pci_send_cmd_no_wait(pcr);
>
> -       err = rtsx_pci_transfer_data(pcr, data->sg, data->sg_len, read, 10000);
> +       err = rtsx_pci_dma_transfer(pcr, data->sg, host->sg_count, read, 10000);
>         if (err < 0) {
>                 sd_clear_error(host);
>                 return err;
> @@ -640,12 +720,24 @@ static int sd_tuning_rx(struct realtek_pci_sdmmc *host, u8 opcode)
>         return 0;
>  }
>
> -static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +static inline int sd_rw_cmd(struct mmc_command *cmd)
>  {
> -       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       return mmc_op_multi(cmd->opcode) ||
> +               (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> +               (cmd->opcode == MMC_WRITE_BLOCK);
> +}
> +
> +static void sd_request(struct work_struct *work)
> +{
> +       struct realtek_pci_sdmmc *host = container_of(work,
> +                       struct realtek_pci_sdmmc, work);
>         struct rtsx_pcr *pcr = host->pcr;
> +
> +       struct mmc_host *mmc = host->mmc;
> +       struct mmc_request *mrq = host->mrq;
>         struct mmc_command *cmd = mrq->cmd;
>         struct mmc_data *data = mrq->data;
> +
>         unsigned int data_size = 0;
>         int err;
>
> @@ -677,13 +769,13 @@ static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
>         if (mrq->data)
>                 data_size = data->blocks * data->blksz;
>
> -       if (!data_size || mmc_op_multi(cmd->opcode) ||
> -                       (cmd->opcode == MMC_READ_SINGLE_BLOCK) ||
> -                       (cmd->opcode == MMC_WRITE_BLOCK)) {
> +       if (!data_size || sd_rw_cmd(cmd)) {
>                 sd_send_cmd_get_rsp(host, cmd);
>
>                 if (!cmd->error && data_size) {
>                         sd_rw_multi(host, mrq);
> +                       if (!host->using_cookie)
> +                               sdmmc_post_req(host->mmc, host->mrq, 0);
>
>                         if (mmc_op_multi(cmd->opcode) && mrq->stop)
>                                 sd_send_cmd_get_rsp(host, mrq->stop);
> @@ -712,6 +804,21 @@ finish:
>         mmc_request_done(mmc, mrq);
>  }
>
> +static void sdmmc_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> +       struct realtek_pci_sdmmc *host = mmc_priv(mmc);
> +       struct mmc_data *data = mrq->data;
> +
> +       mutex_lock(&host->host_mutex);
> +       host->mrq = mrq;
> +       mutex_unlock(&host->host_mutex);
> +
> +       if (sd_rw_cmd(mrq->cmd))
> +               host->using_cookie = sd_pre_dma_transfer(host, data, false);
> +
> +       queue_work(host->workq, &host->work);
> +}
> +
>  static int sd_set_bus_width(struct realtek_pci_sdmmc *host,
>                 unsigned char bus_width)
>  {
> @@ -1144,6 +1251,8 @@ out:
>  }
>
>  static const struct mmc_host_ops realtek_pci_sdmmc_ops = {
> +       .pre_req = sdmmc_pre_req,
> +       .post_req = sdmmc_post_req,
>         .request = sdmmc_request,
>         .set_ios = sdmmc_set_ios,
>         .get_ro = sdmmc_get_ro,
> @@ -1222,10 +1331,16 @@ static int rtsx_pci_sdmmc_drv_probe(struct platform_device *pdev)
>                 return -ENOMEM;
>
>         host = mmc_priv(mmc);
> +       host->workq = create_singlethread_workqueue(SDMMC_WORKQ_NAME);
> +       if (!host->workq) {
> +               mmc_free_host(mmc);
> +               return -ENOMEM;
> +       }
>         host->pcr = pcr;
>         host->mmc = mmc;
>         host->pdev = pdev;
>         host->power_state = SDMMC_POWER_OFF;
> +       INIT_WORK(&host->work, sd_request);
>         platform_set_drvdata(pdev, host);
>         pcr->slots[RTSX_SD_CARD].p_dev = pdev;
>         pcr->slots[RTSX_SD_CARD].card_event = rtsx_pci_sdmmc_card_event;
> @@ -1253,6 +1368,8 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         pcr->slots[RTSX_SD_CARD].card_event = NULL;
>         mmc = host->mmc;
>
> +       cancel_work_sync(&host->work);
> +
>         mutex_lock(&host->host_mutex);
>         if (host->mrq) {
>                 dev_dbg(&(pdev->dev),
> @@ -1271,6 +1388,10 @@ static int rtsx_pci_sdmmc_drv_remove(struct platform_device *pdev)
>         mmc_remove_host(mmc);
>         host->eject = true;
>
> +       flush_workqueue(host->workq);
> +       destroy_workqueue(host->workq);
> +       host->workq = NULL;
> +
>         mmc_free_host(mmc);
>
>         dev_dbg(&(pdev->dev),
> --
> 1.7.9.5
>

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-06-18  8:00       ` Lee Jones
@ 2014-07-02  9:14           ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-07-02  9:14 UTC (permalink / raw)
  To: Lee Jones, Ulf Hansson
  Cc: Samuel Ortiz, Chris Ball, devel, linux-kernel, linux-mmc,
	Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On 06/18/2014 04:00 PM, Lee Jones wrote:
> On Mon, 16 Jun 2014, Ulf Hansson wrote:
>
>> On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
>>>> From: Micky Ching <micky_ching@realsil.com.cn>
>>>>
>>>> rtsx driver using a single function for transfer data, dma map/unmap are
>>>> placed in one fix function. We need map/unmap dma in different place(for
>>>> mmc async driver), so add three function for dma map, dma transfer and
>>>> dma unmap.
>>>>
>>>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>>>> ---
>>>>   drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
>>>>   include/linux/mfd/rtsx_pci.h |    6 ++++
>>>>   2 files changed, 54 insertions(+), 28 deletions(-)
>>> I don't see any glaring issues with this patch.  Does it rely on the
>>> first patch, or vise versa, or can it just be applied?
>> The mmc part in patch2 relies on this one, but please go ahead and
>> apply the mfd patch if you see it good.
>>
>> I can later provide my ack for the mmc parts, in patch2, when it's a
>> reviewed properly and thus you can take it through your tree.
> There's no rush.  Once you're happy with the MMC patch, Micky can
> submit them both again with my Ack on the MFD part and I'll take them
> as a set.
>
Hi Lee,

Can you pick this patch directly?  need resend it now?

Thanks.

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
@ 2014-07-02  9:14           ` micky
  0 siblings, 0 replies; 40+ messages in thread
From: micky @ 2014-07-02  9:14 UTC (permalink / raw)
  To: Lee Jones, Ulf Hansson
  Cc: Samuel Ortiz, Greg Kroah-Hartman, linux-mmc, linux-kernel,
	Chris Ball, Wei WANG, Roger, devel, Dan Carpenter

On 06/18/2014 04:00 PM, Lee Jones wrote:
> On Mon, 16 Jun 2014, Ulf Hansson wrote:
>
>> On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
>>>> From: Micky Ching <micky_ching@realsil.com.cn>
>>>>
>>>> rtsx driver using a single function for transfer data, dma map/unmap are
>>>> placed in one fix function. We need map/unmap dma in different place(for
>>>> mmc async driver), so add three function for dma map, dma transfer and
>>>> dma unmap.
>>>>
>>>> Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
>>>> ---
>>>>   drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
>>>>   include/linux/mfd/rtsx_pci.h |    6 ++++
>>>>   2 files changed, 54 insertions(+), 28 deletions(-)
>>> I don't see any glaring issues with this patch.  Does it rely on the
>>> first patch, or vise versa, or can it just be applied?
>> The mmc part in patch2 relies on this one, but please go ahead and
>> apply the mfd patch if you see it good.
>>
>> I can later provide my ack for the mmc parts, in patch2, when it's a
>> reviewed properly and thus you can take it through your tree.
> There's no rush.  Once you're happy with the MMC patch, Micky can
> submit them both again with my Ack on the MFD part and I'll take them
> as a set.
>
Hi Lee,

Can you pick this patch directly?  need resend it now?

Thanks.

Best Regards.
micky.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 1/2] mfd: rtsx: add dma transfer function
  2014-07-02  9:14           ` micky
  (?)
@ 2014-07-02 12:15           ` Lee Jones
  -1 siblings, 0 replies; 40+ messages in thread
From: Lee Jones @ 2014-07-02 12:15 UTC (permalink / raw)
  To: micky
  Cc: Ulf Hansson, Samuel Ortiz, Chris Ball, devel, linux-kernel,
	linux-mmc, Greg Kroah-Hartman, Dan Carpenter, Roger, Wei WANG

On Wed, 02 Jul 2014, micky wrote:

> On 06/18/2014 04:00 PM, Lee Jones wrote:
> >On Mon, 16 Jun 2014, Ulf Hansson wrote:
> >
> >>On 16 June 2014 14:20, Lee Jones <lee.jones@linaro.org> wrote:
> >>>>From: Micky Ching <micky_ching@realsil.com.cn>
> >>>>
> >>>>rtsx driver using a single function for transfer data, dma map/unmap are
> >>>>placed in one fix function. We need map/unmap dma in different place(for
> >>>>mmc async driver), so add three function for dma map, dma transfer and
> >>>>dma unmap.
> >>>>
> >>>>Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
> >>>>---
> >>>>  drivers/mfd/rtsx_pcr.c       |   76 ++++++++++++++++++++++++++----------------
> >>>>  include/linux/mfd/rtsx_pci.h |    6 ++++
> >>>>  2 files changed, 54 insertions(+), 28 deletions(-)
> >>>I don't see any glaring issues with this patch.  Does it rely on the
> >>>first patch, or vise versa, or can it just be applied?
> >>The mmc part in patch2 relies on this one, but please go ahead and
> >>apply the mfd patch if you see it good.
> >>
> >>I can later provide my ack for the mmc parts, in patch2, when it's a
> >>reviewed properly and thus you can take it through your tree.
> >There's no rush.  Once you're happy with the MMC patch, Micky can
> >submit them both again with my Ack on the MFD part and I'll take them
> >as a set.
> >
> Hi Lee,
> 
> Can you pick this patch directly?  need resend it now?

No need to resend, I'll get round to applying it from here.

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/2] mmc: rtsx: add support for async request
  2014-06-06  7:05 ` micky_ching
@ 2014-07-18  7:28   ` Lee Jones
  -1 siblings, 0 replies; 40+ messages in thread
From: Lee Jones @ 2014-07-18  7:28 UTC (permalink / raw)
  To: micky_ching
  Cc: sameo, chris, ulf.hansson, devel, linux-kernel, linux-mmc,
	gregkh, dan.carpenter, rogerable, wei_wang

Chris, Ulf,

Sorry for the delay, things have been hectic.

The following changes since commit cd3de83f147601356395b57a8673e9c5ff1e59d1:

  Linux 3.16-rc4 (2014-07-06 12:37:51 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git tags/mfd-mmc-v3.17

for you to fetch changes up to 6291e7153a173f85bbdb13bf0c72fa11b5d74bc0:

  mmc: rtsx: add support for async request (2014-07-09 14:17:15 +0100)

----------------------------------------------------------------
Immutable branch between MFD and MMC due for the v3.17 merge window.

----------------------------------------------------------------
Micky Ching (2):
      mfd: rtsx: Add dma transfer function
      mmc: rtsx: add support for async request

 drivers/mfd/rtsx_pcr.c            |  76 ++++++++++++++--------
 drivers/mmc/host/rtsx_pci_sdmmc.c | 133 ++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/rtsx_pci.h      |   6 ++
 3 files changed, 181 insertions(+), 34 deletions(-)

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH 0/2] mmc: rtsx: add support for async request
@ 2014-07-18  7:28   ` Lee Jones
  0 siblings, 0 replies; 40+ messages in thread
From: Lee Jones @ 2014-07-18  7:28 UTC (permalink / raw)
  To: micky_ching
  Cc: ulf.hansson, sameo, gregkh, linux-mmc, linux-kernel, chris,
	wei_wang, rogerable, devel, dan.carpenter

Chris, Ulf,

Sorry for the delay, things have been hectic.

The following changes since commit cd3de83f147601356395b57a8673e9c5ff1e59d1:

  Linux 3.16-rc4 (2014-07-06 12:37:51 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/lee/mfd.git tags/mfd-mmc-v3.17

for you to fetch changes up to 6291e7153a173f85bbdb13bf0c72fa11b5d74bc0:

  mmc: rtsx: add support for async request (2014-07-09 14:17:15 +0100)

----------------------------------------------------------------
Immutable branch between MFD and MMC due for the v3.17 merge window.

----------------------------------------------------------------
Micky Ching (2):
      mfd: rtsx: Add dma transfer function
      mmc: rtsx: add support for async request

 drivers/mfd/rtsx_pcr.c            |  76 ++++++++++++++--------
 drivers/mmc/host/rtsx_pci_sdmmc.c | 133 ++++++++++++++++++++++++++++++++++++--
 include/linux/mfd/rtsx_pci.h      |   6 ++
 3 files changed, 181 insertions(+), 34 deletions(-)

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
_______________________________________________
devel mailing list
devel@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2014-07-18  7:29 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-06-06  7:05 [PATCH 0/2] mmc: rtsx: add support for async request micky_ching
2014-06-06  7:05 ` micky_ching
2014-06-06  7:05 ` [PATCH 1/2] mfd: rtsx: add dma transfer function micky_ching
2014-06-06  7:05   ` micky_ching
2014-06-16 12:20   ` Lee Jones
2014-06-16 14:24     ` Ulf Hansson
2014-06-16 14:24       ` Ulf Hansson
2014-06-18  8:00       ` Lee Jones
2014-07-02  9:14         ` micky
2014-07-02  9:14           ` micky
2014-07-02 12:15           ` Lee Jones
2014-06-17  1:08     ` micky
2014-06-17  1:08       ` micky
2014-06-06  7:05 ` [PATCH 2/2] mmc: rtsx: add support for async request micky_ching
2014-06-06  7:05   ` micky_ching
2014-06-16  8:42   ` Ulf Hansson
2014-06-16  8:42     ` Ulf Hansson
2014-06-16  8:51     ` Arend van Spriel
2014-06-16  8:51       ` Arend van Spriel
2014-06-16  9:09     ` micky
2014-06-16  9:09       ` micky
2014-06-16 12:40       ` Ulf Hansson
2014-06-17  1:04         ` micky
2014-06-17  1:04           ` micky
2014-06-17  7:45           ` Ulf Hansson
2014-06-17  7:45             ` Ulf Hansson
2014-06-18  1:17             ` micky
2014-06-18  1:17               ` micky
2014-06-18  7:39               ` Ulf Hansson
2014-06-18  7:39                 ` Ulf Hansson
2014-06-18 10:08                 ` micky
2014-06-18 10:08                   ` micky
2014-06-18 11:03                   ` Ulf Hansson
2014-06-19  1:57                     ` micky
2014-06-19  1:57                       ` micky
2014-06-23  9:26                       ` micky
2014-06-23  9:26                         ` micky
2014-07-02  8:53   ` Ulf Hansson
2014-07-18  7:28 ` [PATCH 0/2] " Lee Jones
2014-07-18  7:28   ` Lee Jones

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.