* [PATCH v10 1/4] dma: Introduce dma_get_merge_boundary()
2019-08-28 12:35 [PATCH v10 0/4] treewide: improve R-Car SDHI performance Yoshihiro Shimoda
@ 2019-08-28 12:35 ` Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 2/4] iommu/dma: Add a new dma_map_ops of get_merge_boundary() Yoshihiro Shimoda
` (2 subsequent siblings)
3 siblings, 0 replies; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-08-28 12:35 UTC (permalink / raw)
To: ulf.hansson, hch, m.szyprowski, robin.murphy, joro, axboe
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
Yoshihiro Shimoda
This patch adds a new DMA API "dma_get_merge_boundary". This function
returns the DMA merge boundary if the DMA layer can merge the segments.
This patch also adds the implementation for a new dma_map_ops pointer.
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
---
Documentation/DMA-API.txt | 8 ++++++++
include/linux/dma-mapping.h | 6 ++++++
kernel/dma/mapping.c | 11 +++++++++++
3 files changed, 25 insertions(+)
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index e47c63b..9c4dd3d 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter
of the mapping functions like dma_map_single(), dma_map_page() and
others should not be larger than the returned value.
+::
+
+ unsigned long
+ dma_get_merge_boundary(struct device *dev);
+
+Returns the DMA merge boundary. If the device cannot merge any the DMA address
+segments, the function returns 0.
+
Part Id - Streaming DMA mappings
--------------------------------
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 14702e2..7072b78 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
int (*dma_supported)(struct device *dev, u64 mask);
u64 (*get_required_mask)(struct device *dev);
size_t (*max_mapping_size)(struct device *dev);
+ unsigned long (*get_merge_boundary)(struct device *dev);
};
#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
@@ -462,6 +463,7 @@ int dma_set_mask(struct device *dev, u64 mask);
int dma_set_coherent_mask(struct device *dev, u64 mask);
u64 dma_get_required_mask(struct device *dev);
size_t dma_max_mapping_size(struct device *dev);
+unsigned long dma_get_merge_boundary(struct device *dev);
#else /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_page_attrs(struct device *dev,
struct page *page, size_t offset, size_t size,
@@ -567,6 +569,10 @@ static inline size_t dma_max_mapping_size(struct device *dev)
{
return 0;
}
+static inline unsigned long dma_get_merge_boundary(struct device *dev)
+{
+ return 0;
+}
#endif /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b0038ca..b3077b5 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -405,3 +405,14 @@ size_t dma_max_mapping_size(struct device *dev)
return size;
}
EXPORT_SYMBOL_GPL(dma_max_mapping_size);
+
+unsigned long dma_get_merge_boundary(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (!ops || !ops->get_merge_boundary)
+ return 0; /* can't merge */
+
+ return ops->get_merge_boundary(dev);
+}
+EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v10 2/4] iommu/dma: Add a new dma_map_ops of get_merge_boundary()
2019-08-28 12:35 [PATCH v10 0/4] treewide: improve R-Car SDHI performance Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 1/4] dma: Introduce dma_get_merge_boundary() Yoshihiro Shimoda
@ 2019-08-28 12:35 ` Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 3/4] block: add a helper function to merge the segments Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 4/4] mmc: queue: Use bigger segments if DMA MAP layer can " Yoshihiro Shimoda
3 siblings, 0 replies; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-08-28 12:35 UTC (permalink / raw)
To: ulf.hansson, hch, m.szyprowski, robin.murphy, joro, axboe
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
Yoshihiro Shimoda
This patch adds a new dma_map_ops of get_merge_boundary() to
expose the DMA merge boundary if the domain type is IOMMU_DOMAIN_DMA.
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Acked-by: Joerg Roedel <jroedel@suse.de>
---
drivers/iommu/dma-iommu.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index de68b4a..ad861bd 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1091,6 +1091,13 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
+static unsigned long iommu_dma_get_merge_boundary(struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+
+ return (1UL << __ffs(domain->pgsize_bitmap)) - 1;
+}
+
static const struct dma_map_ops iommu_dma_ops = {
.alloc = iommu_dma_alloc,
.free = iommu_dma_free,
@@ -1106,6 +1113,7 @@ static const struct dma_map_ops iommu_dma_ops = {
.sync_sg_for_device = iommu_dma_sync_sg_for_device,
.map_resource = iommu_dma_map_resource,
.unmap_resource = iommu_dma_unmap_resource,
+ .get_merge_boundary = iommu_dma_get_merge_boundary,
};
/*
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH v10 3/4] block: add a helper function to merge the segments
2019-08-28 12:35 [PATCH v10 0/4] treewide: improve R-Car SDHI performance Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 1/4] dma: Introduce dma_get_merge_boundary() Yoshihiro Shimoda
2019-08-28 12:35 ` [PATCH v10 2/4] iommu/dma: Add a new dma_map_ops of get_merge_boundary() Yoshihiro Shimoda
@ 2019-08-28 12:35 ` Yoshihiro Shimoda
2019-09-02 21:47 ` Jens Axboe
2019-08-28 12:35 ` [PATCH v10 4/4] mmc: queue: Use bigger segments if DMA MAP layer can " Yoshihiro Shimoda
3 siblings, 1 reply; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-08-28 12:35 UTC (permalink / raw)
To: ulf.hansson, hch, m.szyprowski, robin.murphy, joro, axboe
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
Yoshihiro Shimoda
This patch adds a helper function whether a queue can merge
the segments by the DMA MAP layer (e.g. via IOMMU).
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au
---
block/blk-settings.c | 23 +++++++++++++++++++++++
include/linux/blkdev.h | 2 ++
2 files changed, 25 insertions(+)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 2c18312..c3632fc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -12,6 +12,7 @@
#include <linux/lcm.h>
#include <linux/jiffies.h>
#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
#include "blk.h"
#include "blk-wbt.h"
@@ -832,6 +833,28 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
}
EXPORT_SYMBOL_GPL(blk_queue_write_cache);
+/**
+ * blk_queue_can_use_dma_map_merging - configure queue for merging segments.
+ * @q: the request queue for the device
+ * @dev: the device pointer for dma
+ *
+ * Tell the block layer about merging the segments by dma map of @q.
+ */
+bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
+ struct device *dev)
+{
+ unsigned long boundary = dma_get_merge_boundary(dev);
+
+ if (!boundary)
+ return false;
+
+ /* No need to update max_segment_size. see blk_queue_virt_boundary() */
+ blk_queue_virt_boundary(q, boundary);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
+
static int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ac7901..d62d6e2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1086,6 +1086,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
+extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
+ struct device *dev);
/*
* Number of physical segments as sent to the device.
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v10 3/4] block: add a helper function to merge the segments
2019-08-28 12:35 ` [PATCH v10 3/4] block: add a helper function to merge the segments Yoshihiro Shimoda
@ 2019-09-02 21:47 ` Jens Axboe
2019-09-03 4:59 ` Yoshihiro Shimoda
0 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2019-09-02 21:47 UTC (permalink / raw)
To: Yoshihiro Shimoda, ulf.hansson, hch, m.szyprowski, robin.murphy, joro
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc
On 8/28/19 6:35 AM, Yoshihiro Shimoda wrote:
> This patch adds a helper function whether a queue can merge
> the segments by the DMA MAP layer (e.g. via IOMMU).
Reviewed-by: Jens Axboe <axboe@kernel.dk>
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v10 3/4] block: add a helper function to merge the segments
2019-09-02 21:47 ` Jens Axboe
@ 2019-09-03 4:59 ` Yoshihiro Shimoda
2019-09-03 6:33 ` hch
0 siblings, 1 reply; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-09-03 4:59 UTC (permalink / raw)
To: hch
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
ulf.hansson, m.szyprowski, robin.murphy, joro, Jens Axboe
Hi Christoph,
Now this patch series got {Ack,Review}ed-by from each maintainer.
https://patchwork.kernel.org/project/linux-renesas-soc/list/?series=166501
So, would you pick this up through the dma-mapping tree as you said before?
> From: Jens Axboe, Sent: Tuesday, September 3, 2019 6:47 AM
>
> On 8/28/19 6:35 AM, Yoshihiro Shimoda wrote:
> > This patch adds a helper function whether a queue can merge
> > the segments by the DMA MAP layer (e.g. via IOMMU).
>
> Reviewed-by: Jens Axboe <axboe@kernel.dk>
Jens, thank you for your review!
Best regards,
Yoshihiro Shimoda
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v10 3/4] block: add a helper function to merge the segments
2019-09-03 4:59 ` Yoshihiro Shimoda
@ 2019-09-03 6:33 ` hch
2019-09-03 6:42 ` Yoshihiro Shimoda
0 siblings, 1 reply; 9+ messages in thread
From: hch @ 2019-09-03 6:33 UTC (permalink / raw)
To: Yoshihiro Shimoda
Cc: hch, wsa+renesas, linux-mmc, iommu, linux-block,
linux-renesas-soc, ulf.hansson, m.szyprowski, robin.murphy, joro,
Jens Axboe
On Tue, Sep 03, 2019 at 04:59:59AM +0000, Yoshihiro Shimoda wrote:
> Hi Christoph,
>
> Now this patch series got {Ack,Review}ed-by from each maintainer.
> https://patchwork.kernel.org/project/linux-renesas-soc/list/?series=166501
>
> So, would you pick this up through the dma-mapping tree as you said before?
I've applied it to the dma-mapping tree for 5.4 now, thanks a lot!
^ permalink raw reply [flat|nested] 9+ messages in thread
* RE: [PATCH v10 3/4] block: add a helper function to merge the segments
2019-09-03 6:33 ` hch
@ 2019-09-03 6:42 ` Yoshihiro Shimoda
0 siblings, 0 replies; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-09-03 6:42 UTC (permalink / raw)
To: hch
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
ulf.hansson, m.szyprowski, robin.murphy, joro, Jens Axboe
Hi Christoph,
> > Now this patch series got {Ack,Review}ed-by from each maintainer.
> > https://patchwork.kernel.org/project/linux-renesas-soc/list/?series=166501
> >
> > So, would you pick this up through the dma-mapping tree as you said before?
>
> I've applied it to the dma-mapping tree for 5.4 now, thanks a lot!
Thank you very much for your support!
Best regards,
Yoshihiro Shimoda
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v10 4/4] mmc: queue: Use bigger segments if DMA MAP layer can merge the segments
2019-08-28 12:35 [PATCH v10 0/4] treewide: improve R-Car SDHI performance Yoshihiro Shimoda
` (2 preceding siblings ...)
2019-08-28 12:35 ` [PATCH v10 3/4] block: add a helper function to merge the segments Yoshihiro Shimoda
@ 2019-08-28 12:35 ` Yoshihiro Shimoda
3 siblings, 0 replies; 9+ messages in thread
From: Yoshihiro Shimoda @ 2019-08-28 12:35 UTC (permalink / raw)
To: ulf.hansson, hch, m.szyprowski, robin.murphy, joro, axboe
Cc: wsa+renesas, linux-mmc, iommu, linux-block, linux-renesas-soc,
Yoshihiro Shimoda
When the max_segs of a mmc host is smaller than 512, the mmc
subsystem tries to use 512 segments if DMA MAP layer can merge
the segments, and then the mmc subsystem exposes such information
to the block layer by using blk_queue_can_use_dma_map_merging().
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
---
drivers/mmc/core/queue.c | 35 ++++++++++++++++++++++++++++++++---
include/linux/mmc/host.h | 1 +
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 7102e2e..1e29b30 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -21,6 +21,8 @@
#include "card.h"
#include "host.h"
+#define MMC_DMA_MAP_MERGE_SEGMENTS 512
+
static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
{
/* Allow only 1 DCMD at a time */
@@ -193,6 +195,12 @@ static void mmc_queue_setup_discard(struct request_queue *q,
blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
}
+static unsigned int mmc_get_max_segments(struct mmc_host *host)
+{
+ return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS :
+ host->max_segs;
+}
+
/**
* mmc_init_request() - initialize the MMC-specific per-request data
* @q: the request queue
@@ -206,7 +214,7 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req,
struct mmc_card *card = mq->card;
struct mmc_host *host = card->host;
- mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp);
+ mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp);
if (!mq_rq->sg)
return -ENOMEM;
@@ -362,13 +370,23 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
blk_queue_max_hw_sectors(mq->queue,
min(host->max_blk_count, host->max_req_size / 512));
- blk_queue_max_segments(mq->queue, host->max_segs);
+ if (host->can_dma_map_merge)
+ WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
+ mmc_dev(host)),
+ "merging was advertised but not possible");
+ blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
if (mmc_card_mmc(card))
block_size = card->ext_csd.data_sector_size;
blk_queue_logical_block_size(mq->queue, block_size);
- blk_queue_max_segment_size(mq->queue,
+ /*
+ * After blk_queue_can_use_dma_map_merging() was called with succeed,
+ * since it calls blk_queue_virt_boundary(), the mmc should not call
+ * both blk_queue_max_segment_size().
+ */
+ if (!host->can_dma_map_merge)
+ blk_queue_max_segment_size(mq->queue,
round_down(host->max_seg_size, block_size));
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
@@ -418,6 +436,17 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
mq->tag_set.driver_data = mq;
+ /*
+ * Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops,
+ * the host->can_dma_map_merge should be set before to get max_segs
+ * from mmc_get_max_segments().
+ */
+ if (host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
+ dma_get_merge_boundary(mmc_dev(host)))
+ host->can_dma_map_merge = 1;
+ else
+ host->can_dma_map_merge = 0;
+
ret = blk_mq_alloc_tag_set(&mq->tag_set);
if (ret)
return ret;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4a351cb..c5662b3 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -396,6 +396,7 @@ struct mmc_host {
unsigned int retune_paused:1; /* re-tuning is temporarily disabled */
unsigned int use_blk_mq:1; /* use blk-mq */
unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */
+ unsigned int can_dma_map_merge:1; /* merging can be used */
int rescan_disable; /* disable card detection */
int rescan_entered; /* used with nonremovable devices */
--
2.7.4
^ permalink raw reply related [flat|nested] 9+ messages in thread