All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
@ 2017-10-17  8:05 ` Huacai Chen
  0 siblings, 0 replies; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable, Michael S . Tsirkin,
	Pawel Osciak, Kyungmin Park, Michael Chan,
	Benjamin Herrenschmidt, Ivan Mikhaylov

Make dma_get_cache_alignment() to accept a 'dev' argument. As a result,
it can return different alignments due to different devices' I/O cache
coherency.

Currently, ARM/ARM64 and MIPS support coherent & noncoherent devices
co-exist. This may be extended in the future, so add a new function
pointer (i.e, get_cache_alignment) in 'struct dma_map_ops' as a generic
solution.

Cc: stable@vger.kernel.org
Cc: Michael S. Tsirkin <mst@mellanox.co.il>
Cc: Pawel Osciak <pawel@osciak.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ivan Mikhaylov <ivan@ru.ibm.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Cc: Andy Gross <agross@codeaurora.org>
Cc: Mark A. Greer <mgreer@animalcreek.com>
Cc: Robert Baldyga <r.baldyga@hackerion.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 drivers/infiniband/hw/mthca/mthca_main.c       |  2 +-
 drivers/media/v4l2-core/videobuf2-dma-contig.c |  2 +-
 drivers/net/ethernet/broadcom/b44.c            |  8 +++----
 drivers/net/ethernet/ibm/emac/core.c           | 22 ++++++++++-------
 drivers/net/ethernet/ibm/emac/core.h           |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/main.c      |  2 +-
 drivers/spi/spi-qup.c                          |  4 ++--
 drivers/tty/serial/mpsc.c                      | 33 ++++++++++++++++++--------
 drivers/tty/serial/samsung.c                   | 14 +++++------
 include/linux/dma-mapping.h                    | 17 +++++++++----
 10 files changed, 68 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index e36a9bc..078fe8d 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -416,7 +416,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
 
 	/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
 	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size,
-					   dma_get_cache_alignment()) / mdev->limits.mtt_seg_size;
+					   dma_get_cache_alignment(&mdev->pdev->dev)) / mdev->limits.mtt_seg_size;
 
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
 							 mdev->limits.mtt_seg_size,
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 9f389f3..1f6a9b7 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -484,7 +484,7 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
 	int ret = 0;
 	struct sg_table *sgt;
 	unsigned long contig_size;
-	unsigned long dma_align = dma_get_cache_alignment();
+	unsigned long dma_align = dma_get_cache_alignment(dev);
 
 	/* Only cache aligned DMA transfers are reliable */
 	if (!IS_ALIGNED(vaddr | size, dma_align)) {
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index a1125d1..2f6ffe5 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2344,6 +2344,10 @@ static int b44_init_one(struct ssb_device *sdev,
 	struct net_device *dev;
 	struct b44 *bp;
 	int err;
+	unsigned int dma_desc_align_size = dma_get_cache_alignment(sdev->dma_dev);
+
+	/* Setup paramaters for syncing RX/TX DMA descriptors */
+	dma_desc_sync_size = max_t(unsigned int, dma_desc_align_size, sizeof(struct dma_desc));
 
 	instance++;
 
@@ -2587,12 +2591,8 @@ static inline void b44_pci_exit(void)
 
 static int __init b44_init(void)
 {
-	unsigned int dma_desc_align_size = dma_get_cache_alignment();
 	int err;
 
-	/* Setup paramaters for syncing RX/TX DMA descriptors */
-	dma_desc_sync_size = max_t(unsigned int, dma_desc_align_size, sizeof(struct dma_desc));
-
 	err = b44_pci_init();
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 7feff24..b0c5319 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -1030,8 +1030,9 @@ static int emac_set_mac_address(struct net_device *ndev, void *sa)
 
 static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 {
-	int rx_sync_size = emac_rx_sync_size(new_mtu);
-	int rx_skb_size = emac_rx_skb_size(new_mtu);
+	struct device *dma_dev = &dev->ofdev->dev;
+	int rx_skb_size = emac_rx_skb_size(dma_dev, new_mtu);
+	int rx_sync_size = emac_rx_sync_size(dma_dev, new_mtu);
 	int i, ret = 0;
 	int mr1_jumbo_bit_change = 0;
 
@@ -1115,20 +1116,21 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 static int emac_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
+	struct device *dma_dev = &dev->ofdev->dev;
 	int ret = 0;
 
 	DBG(dev, "change_mtu(%d)" NL, new_mtu);
 
 	if (netif_running(ndev)) {
 		/* Check if we really need to reinitialize RX ring */
-		if (emac_rx_skb_size(ndev->mtu) != emac_rx_skb_size(new_mtu))
+		if (emac_rx_skb_size(dma_dev, ndev->mtu) != emac_rx_skb_size(dma_dev, new_mtu))
 			ret = emac_resize_rx_ring(dev, new_mtu);
 	}
 
 	if (!ret) {
 		ndev->mtu = new_mtu;
-		dev->rx_skb_size = emac_rx_skb_size(new_mtu);
-		dev->rx_sync_size = emac_rx_sync_size(new_mtu);
+		dev->rx_skb_size = emac_rx_skb_size(dma_dev, new_mtu);
+		dev->rx_sync_size = emac_rx_sync_size(dma_dev, new_mtu);
 	}
 
 	return ret;
@@ -1171,6 +1173,7 @@ static void emac_clean_rx_ring(struct emac_instance *dev)
 static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot,
 				    gfp_t flags)
 {
+	struct device *dma_dev = &dev->ofdev->dev;
 	struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags);
 	if (unlikely(!skb))
 		return -ENOMEM;
@@ -1649,11 +1652,12 @@ static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot,
 				       int len)
 {
 	struct sk_buff *skb = dev->rx_skb[slot];
+	struct device *dma_dev = &dev->ofdev->dev;
 
 	DBG2(dev, "recycle %d %d" NL, slot, len);
 
 	if (len)
-		dma_map_single(&dev->ofdev->dev, skb->data - 2,
+		dma_map_single(dma_dev, skb->data - 2,
 			       EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE);
 
 	dev->rx_desc[slot].data_len = 0;
@@ -1727,6 +1731,7 @@ static int emac_poll_rx(void *param, int budget)
 {
 	struct emac_instance *dev = param;
 	int slot = dev->rx_slot, received = 0;
+	struct device *dma_dev = &dev->ofdev->dev;
 
 	DBG2(dev, "poll_rx(%d)" NL, budget);
 
@@ -2998,6 +3003,7 @@ static int emac_probe(struct platform_device *ofdev)
 	struct emac_instance *dev;
 	struct device_node *np = ofdev->dev.of_node;
 	struct device_node **blist = NULL;
+	struct device *dma_dev = &ofdev->dev;
 	int err, i;
 
 	/* Skip unused/unwired EMACS.  We leave the check for an unused
@@ -3077,8 +3083,8 @@ static int emac_probe(struct platform_device *ofdev)
 		       np, dev->mal_dev->dev.of_node);
 		goto err_rel_deps;
 	}
-	dev->rx_skb_size = emac_rx_skb_size(ndev->mtu);
-	dev->rx_sync_size = emac_rx_sync_size(ndev->mtu);
+	dev->rx_skb_size = emac_rx_skb_size(dma_dev, ndev->mtu);
+	dev->rx_sync_size = emac_rx_sync_size(dma_dev, ndev->mtu);
 
 	/* Get pointers to BD rings */
 	dev->tx_desc =
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 369de2c..68548a0 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -68,20 +68,20 @@ static inline int emac_rx_size(int mtu)
 		return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD);
 }
 
-#define EMAC_DMA_ALIGN(x)		ALIGN((x), dma_get_cache_alignment())
+#define EMAC_DMA_ALIGN(x)		ALIGN((x), dma_get_cache_alignment(dma_dev))
 
 #define EMAC_RX_SKB_HEADROOM		\
 	EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM)
 
 /* Size of RX skb for the given MTU */
-static inline int emac_rx_skb_size(int mtu)
+static inline int emac_rx_skb_size(struct device *dma_dev, int mtu)
 {
 	int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu));
 	return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM;
 }
 
 /* RX DMA sync size */
-static inline int emac_rx_sync_size(int mtu)
+static inline int emac_rx_sync_size(struct device *dma_dev, int mtu)
 {
 	return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e61c99e..bc146dd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1660,7 +1660,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	 */
 	dev->caps.reserved_mtts =
 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
-		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
+		      dma_get_cache_alignment(&dev->persist->pdev->dev)) / dev->caps.mtt_entry_sz;
 
 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
 				  init_hca->mtt_base,
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 974a8ce..e6da66e 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -862,7 +862,7 @@ static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
 	struct spi_qup *qup = spi_master_get_devdata(master);
-	size_t dma_align = dma_get_cache_alignment();
+	size_t dma_align = dma_get_cache_alignment(qup->dev);
 	int n_words;
 
 	if (xfer->rx_buf) {
@@ -1038,7 +1038,7 @@ static int spi_qup_probe(struct platform_device *pdev)
 	master->transfer_one = spi_qup_transfer_one;
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
-	master->dma_alignment = dma_get_cache_alignment();
+	master->dma_alignment = dma_get_cache_alignment(dev);
 	master->max_dma_len = SPI_MAX_XFER;
 
 	platform_set_drvdata(pdev, master);
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 67ffecc..c708457 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -81,19 +81,19 @@
  * Number of Tx & Rx descriptors must be powers of 2.
  */
 #define	MPSC_RXR_ENTRIES	32
-#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
+#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_RXR_SIZE		(MPSC_RXR_ENTRIES * MPSC_RXRE_SIZE)
-#define	MPSC_RXBE_SIZE		dma_get_cache_alignment()
+#define	MPSC_RXBE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_RXB_SIZE		(MPSC_RXR_ENTRIES * MPSC_RXBE_SIZE)
 
 #define	MPSC_TXR_ENTRIES	32
-#define	MPSC_TXRE_SIZE		dma_get_cache_alignment()
+#define	MPSC_TXRE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_TXR_SIZE		(MPSC_TXR_ENTRIES * MPSC_TXRE_SIZE)
-#define	MPSC_TXBE_SIZE		dma_get_cache_alignment()
+#define	MPSC_TXBE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_TXB_SIZE		(MPSC_TXR_ENTRIES * MPSC_TXBE_SIZE)
 
 #define	MPSC_DMA_ALLOC_SIZE	(MPSC_RXR_SIZE + MPSC_RXB_SIZE + MPSC_TXR_SIZE \
-		+ MPSC_TXB_SIZE + dma_get_cache_alignment() /* for alignment */)
+		+ MPSC_TXB_SIZE + dma_get_cache_alignment(dma_dev) /* for alignment */)
 
 /* Rx and Tx Ring entry descriptors -- assume entry size is <= cacheline size */
 struct mpsc_rx_desc {
@@ -520,6 +520,7 @@ static uint mpsc_sdma_tx_active(struct mpsc_port_info *pi)
 static void mpsc_sdma_start_tx(struct mpsc_port_info *pi)
 {
 	struct mpsc_tx_desc *txre, *txre_p;
+	struct device *dma_dev = pi->port.dev;
 
 	/* If tx isn't running & there's a desc ready to go, start it */
 	if (!mpsc_sdma_tx_active(pi)) {
@@ -738,7 +739,7 @@ static void mpsc_init_hw(struct mpsc_port_info *pi)
 
 	mpsc_brg_init(pi, pi->brg_clk_src);
 	mpsc_brg_enable(pi);
-	mpsc_sdma_init(pi, dma_get_cache_alignment());	/* burst a cacheline */
+	mpsc_sdma_init(pi, dma_get_cache_alignment(pi->port.dev));	/* burst a cacheline */
 	mpsc_sdma_stop(pi);
 	mpsc_hw_init(pi);
 }
@@ -746,6 +747,7 @@ static void mpsc_init_hw(struct mpsc_port_info *pi)
 static int mpsc_alloc_ring_mem(struct mpsc_port_info *pi)
 {
 	int rc = 0;
+	struct device *dma_dev = pi->port.dev;
 
 	pr_debug("mpsc_alloc_ring_mem[%d]: Allocating ring mem\n",
 		pi->port.line);
@@ -769,6 +771,8 @@ static int mpsc_alloc_ring_mem(struct mpsc_port_info *pi)
 
 static void mpsc_free_ring_mem(struct mpsc_port_info *pi)
 {
+	struct device *dma_dev = pi->port.dev;
+
 	pr_debug("mpsc_free_ring_mem[%d]: Freeing ring mem\n", pi->port.line);
 
 	if (pi->dma_region) {
@@ -784,6 +788,7 @@ static void mpsc_init_rings(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 	dma_addr_t dp, dp_p;
 	u8 *bp, *bp_p;
 	int i;
@@ -798,8 +803,8 @@ static void mpsc_init_rings(struct mpsc_port_info *pi)
 	 * Descriptors & buffers are multiples of cacheline size and must be
 	 * cacheline aligned.
 	 */
-	dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment());
-	dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment());
+	dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment(dma_dev));
+	dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment(dma_dev));
 
 	/*
 	 * Partition dma region into rx ring descriptor, rx buffers,
@@ -936,6 +941,7 @@ static int serial_polled;
 static int mpsc_rx_intr(struct mpsc_port_info *pi, unsigned long *flags)
 {
 	struct mpsc_rx_desc *rxre;
+	struct device *dma_dev = pi->port.dev;
 	struct tty_port *port = &pi->port.state->port;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
@@ -1091,6 +1097,7 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi, unsigned long *flags)
 static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 {
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 
 	txre = (struct mpsc_tx_desc *)(pi->txr
 			+ (pi->txr_head * MPSC_TXRE_SIZE));
@@ -1113,6 +1120,7 @@ static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 
 static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 {
+	struct device *dma_dev = pi->port.dev;
 	struct circ_buf *xmit = &pi->port.state->xmit;
 	u8 *bp;
 	u32 i;
@@ -1166,6 +1174,7 @@ static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 static int mpsc_tx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 	int rc = 0;
 	unsigned long iflags;
 
@@ -1360,6 +1369,7 @@ static int mpsc_startup(struct uart_port *port)
 {
 	struct mpsc_port_info *pi =
 		container_of(port, struct mpsc_port_info, port);
+	struct device *dma_dev = pi->port.dev;
 	u32 flag = 0;
 	int rc;
 
@@ -1555,9 +1565,10 @@ static void mpsc_put_poll_char(struct uart_port *port,
 
 static int mpsc_get_poll_char(struct uart_port *port)
 {
+	struct mpsc_rx_desc *rxre;
 	struct mpsc_port_info *pi =
 		container_of(port, struct mpsc_port_info, port);
-	struct mpsc_rx_desc *rxre;
+	struct device *dma_dev = pi->port.dev;
 	u32	cmdstat, bytes_in, i;
 	u8	*bp;
 
@@ -1706,6 +1717,7 @@ static const struct uart_ops mpsc_pops = {
 static void mpsc_console_write(struct console *co, const char *s, uint count)
 {
 	struct mpsc_port_info *pi = &mpsc_ports[co->index];
+	struct device *dma_dev = pi->port.dev;
 	u8 *bp, *dp, add_cr = 0;
 	int i;
 	unsigned long iflags;
@@ -2024,7 +2036,8 @@ static void mpsc_drv_unmap_regs(struct mpsc_port_info *pi)
 static void mpsc_drv_get_platform_data(struct mpsc_port_info *pi,
 		struct platform_device *pd, int num)
 {
-	struct mpsc_pdata	*pdata;
+	struct mpsc_pdata *pdata;
+	struct device *dma_dev = pi->port.dev;
 
 	pdata = dev_get_platdata(&pd->dev);
 
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 8aca18c..9df918e5 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -241,7 +241,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport)
 	/* Enable tx dma mode */
 	ucon = rd_regl(port, S3C2410_UCON);
 	ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK);
-	ucon |= (dma_get_cache_alignment() >= 16) ?
+	ucon |= (dma_get_cache_alignment(port->dev) >= 16) ?
 		S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1;
 	ucon |= S3C64XX_UCON_TXMODE_DMA;
 	wr_regl(port,  S3C2410_UCON, ucon);
@@ -292,7 +292,7 @@ static int s3c24xx_serial_start_tx_dma(struct s3c24xx_uart_port *ourport,
 	if (ourport->tx_mode != S3C24XX_TX_DMA)
 		enable_tx_dma(ourport);
 
-	dma->tx_size = count & ~(dma_get_cache_alignment() - 1);
+	dma->tx_size = count & ~(dma_get_cache_alignment(port->dev) - 1);
 	dma->tx_transfer_addr = dma->tx_addr + xmit->tail;
 
 	dma_sync_single_for_device(ourport->port.dev, dma->tx_transfer_addr,
@@ -332,7 +332,7 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport)
 
 	if (!ourport->dma || !ourport->dma->tx_chan ||
 	    count < ourport->min_dma_size ||
-	    xmit->tail & (dma_get_cache_alignment() - 1))
+	    xmit->tail & (dma_get_cache_alignment(port->dev) - 1))
 		s3c24xx_serial_start_tx_pio(ourport);
 	else
 		s3c24xx_serial_start_tx_dma(ourport, count);
@@ -718,8 +718,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id)
 
 	if (ourport->dma && ourport->dma->tx_chan &&
 	    count >= ourport->min_dma_size) {
-		int align = dma_get_cache_alignment() -
-			(xmit->tail & (dma_get_cache_alignment() - 1));
+		int align = dma_get_cache_alignment(port->dev) -
+			(xmit->tail & (dma_get_cache_alignment(port->dev) - 1));
 		if (count-align >= ourport->min_dma_size) {
 			dma_count = count-align;
 			count = align;
@@ -870,7 +870,7 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p)
 	dma->tx_conf.direction		= DMA_MEM_TO_DEV;
 	dma->tx_conf.dst_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->tx_conf.dst_addr		= p->port.mapbase + S3C2410_UTXH;
-	if (dma_get_cache_alignment() >= 16)
+	if (dma_get_cache_alignment(p->port.dev) >= 16)
 		dma->tx_conf.dst_maxburst = 16;
 	else
 		dma->tx_conf.dst_maxburst = 1;
@@ -1849,7 +1849,7 @@ static int s3c24xx_serial_probe(struct platform_device *pdev)
 	 * so find minimal transfer size suitable for DMA mode
 	 */
 	ourport->min_dma_size = max_t(int, ourport->port.fifosize,
-				    dma_get_cache_alignment());
+				    dma_get_cache_alignment(ourport->port.dev));
 
 	dbg("%s: initialising port %p...\n", __func__, ourport);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29ce981..1326023 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
 #endif
+	int (*get_cache_alignment)(struct device *dev);
 	int is_phys;
 };
 
@@ -697,12 +698,18 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
 }
 
 #ifdef CONFIG_HAS_DMA
-static inline int dma_get_cache_alignment(void)
-{
-#ifdef ARCH_DMA_MINALIGN
-	return ARCH_DMA_MINALIGN;
+
+#ifndef ARCH_DMA_MINALIGN
+#define ARCH_DMA_MINALIGN 1
 #endif
-	return 1;
+
+static inline int dma_get_cache_alignment(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	if (dev && ops && ops->get_cache_alignment)
+		return ops->get_cache_alignment(dev);
+
+	return ARCH_DMA_MINALIGN; /* compatible behavior */
 }
 #endif
 
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
@ 2017-10-17  8:05 ` Huacai Chen
  0 siblings, 0 replies; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable, Michael S . Tsirkin,
	Pawel Osciak, Kyungmin Park, Michael Chan,
	Benjamin Herrenschmidt, Ivan Mikhaylov, Tariq Toukan, Andy Gross,
	Mark A . Greer, Robert Baldyga

Make dma_get_cache_alignment() to accept a 'dev' argument. As a result,
it can return different alignments due to different devices' I/O cache
coherency.

Currently, ARM/ARM64 and MIPS support coherent & noncoherent devices
co-exist. This may be extended in the future, so add a new function
pointer (i.e, get_cache_alignment) in 'struct dma_map_ops' as a generic
solution.

Cc: stable@vger.kernel.org
Cc: Michael S. Tsirkin <mst@mellanox.co.il>
Cc: Pawel Osciak <pawel@osciak.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Ivan Mikhaylov <ivan@ru.ibm.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Cc: Andy Gross <agross@codeaurora.org>
Cc: Mark A. Greer <mgreer@animalcreek.com>
Cc: Robert Baldyga <r.baldyga@hackerion.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 drivers/infiniband/hw/mthca/mthca_main.c       |  2 +-
 drivers/media/v4l2-core/videobuf2-dma-contig.c |  2 +-
 drivers/net/ethernet/broadcom/b44.c            |  8 +++----
 drivers/net/ethernet/ibm/emac/core.c           | 22 ++++++++++-------
 drivers/net/ethernet/ibm/emac/core.h           |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/main.c      |  2 +-
 drivers/spi/spi-qup.c                          |  4 ++--
 drivers/tty/serial/mpsc.c                      | 33 ++++++++++++++++++--------
 drivers/tty/serial/samsung.c                   | 14 +++++------
 include/linux/dma-mapping.h                    | 17 +++++++++----
 10 files changed, 68 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index e36a9bc..078fe8d 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -416,7 +416,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
 
 	/* CPU writes to non-reserved MTTs, while HCA might DMA to reserved mtts */
 	mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * mdev->limits.mtt_seg_size,
-					   dma_get_cache_alignment()) / mdev->limits.mtt_seg_size;
+					   dma_get_cache_alignment(&mdev->pdev->dev)) / mdev->limits.mtt_seg_size;
 
 	mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, init_hca->mtt_base,
 							 mdev->limits.mtt_seg_size,
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index 9f389f3..1f6a9b7 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -484,7 +484,7 @@ static void *vb2_dc_get_userptr(struct device *dev, unsigned long vaddr,
 	int ret = 0;
 	struct sg_table *sgt;
 	unsigned long contig_size;
-	unsigned long dma_align = dma_get_cache_alignment();
+	unsigned long dma_align = dma_get_cache_alignment(dev);
 
 	/* Only cache aligned DMA transfers are reliable */
 	if (!IS_ALIGNED(vaddr | size, dma_align)) {
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index a1125d1..2f6ffe5 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2344,6 +2344,10 @@ static int b44_init_one(struct ssb_device *sdev,
 	struct net_device *dev;
 	struct b44 *bp;
 	int err;
+	unsigned int dma_desc_align_size = dma_get_cache_alignment(sdev->dma_dev);
+
+	/* Setup paramaters for syncing RX/TX DMA descriptors */
+	dma_desc_sync_size = max_t(unsigned int, dma_desc_align_size, sizeof(struct dma_desc));
 
 	instance++;
 
@@ -2587,12 +2591,8 @@ static inline void b44_pci_exit(void)
 
 static int __init b44_init(void)
 {
-	unsigned int dma_desc_align_size = dma_get_cache_alignment();
 	int err;
 
-	/* Setup paramaters for syncing RX/TX DMA descriptors */
-	dma_desc_sync_size = max_t(unsigned int, dma_desc_align_size, sizeof(struct dma_desc));
-
 	err = b44_pci_init();
 	if (err)
 		return err;
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 7feff24..b0c5319 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -1030,8 +1030,9 @@ static int emac_set_mac_address(struct net_device *ndev, void *sa)
 
 static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 {
-	int rx_sync_size = emac_rx_sync_size(new_mtu);
-	int rx_skb_size = emac_rx_skb_size(new_mtu);
+	struct device *dma_dev = &dev->ofdev->dev;
+	int rx_skb_size = emac_rx_skb_size(dma_dev, new_mtu);
+	int rx_sync_size = emac_rx_sync_size(dma_dev, new_mtu);
 	int i, ret = 0;
 	int mr1_jumbo_bit_change = 0;
 
@@ -1115,20 +1116,21 @@ static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
 static int emac_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	struct emac_instance *dev = netdev_priv(ndev);
+	struct device *dma_dev = &dev->ofdev->dev;
 	int ret = 0;
 
 	DBG(dev, "change_mtu(%d)" NL, new_mtu);
 
 	if (netif_running(ndev)) {
 		/* Check if we really need to reinitialize RX ring */
-		if (emac_rx_skb_size(ndev->mtu) != emac_rx_skb_size(new_mtu))
+		if (emac_rx_skb_size(dma_dev, ndev->mtu) != emac_rx_skb_size(dma_dev, new_mtu))
 			ret = emac_resize_rx_ring(dev, new_mtu);
 	}
 
 	if (!ret) {
 		ndev->mtu = new_mtu;
-		dev->rx_skb_size = emac_rx_skb_size(new_mtu);
-		dev->rx_sync_size = emac_rx_sync_size(new_mtu);
+		dev->rx_skb_size = emac_rx_skb_size(dma_dev, new_mtu);
+		dev->rx_sync_size = emac_rx_sync_size(dma_dev, new_mtu);
 	}
 
 	return ret;
@@ -1171,6 +1173,7 @@ static void emac_clean_rx_ring(struct emac_instance *dev)
 static inline int emac_alloc_rx_skb(struct emac_instance *dev, int slot,
 				    gfp_t flags)
 {
+	struct device *dma_dev = &dev->ofdev->dev;
 	struct sk_buff *skb = alloc_skb(dev->rx_skb_size, flags);
 	if (unlikely(!skb))
 		return -ENOMEM;
@@ -1649,11 +1652,12 @@ static inline void emac_recycle_rx_skb(struct emac_instance *dev, int slot,
 				       int len)
 {
 	struct sk_buff *skb = dev->rx_skb[slot];
+	struct device *dma_dev = &dev->ofdev->dev;
 
 	DBG2(dev, "recycle %d %d" NL, slot, len);
 
 	if (len)
-		dma_map_single(&dev->ofdev->dev, skb->data - 2,
+		dma_map_single(dma_dev, skb->data - 2,
 			       EMAC_DMA_ALIGN(len + 2), DMA_FROM_DEVICE);
 
 	dev->rx_desc[slot].data_len = 0;
@@ -1727,6 +1731,7 @@ static int emac_poll_rx(void *param, int budget)
 {
 	struct emac_instance *dev = param;
 	int slot = dev->rx_slot, received = 0;
+	struct device *dma_dev = &dev->ofdev->dev;
 
 	DBG2(dev, "poll_rx(%d)" NL, budget);
 
@@ -2998,6 +3003,7 @@ static int emac_probe(struct platform_device *ofdev)
 	struct emac_instance *dev;
 	struct device_node *np = ofdev->dev.of_node;
 	struct device_node **blist = NULL;
+	struct device *dma_dev = &ofdev->dev;
 	int err, i;
 
 	/* Skip unused/unwired EMACS.  We leave the check for an unused
@@ -3077,8 +3083,8 @@ static int emac_probe(struct platform_device *ofdev)
 		       np, dev->mal_dev->dev.of_node);
 		goto err_rel_deps;
 	}
-	dev->rx_skb_size = emac_rx_skb_size(ndev->mtu);
-	dev->rx_sync_size = emac_rx_sync_size(ndev->mtu);
+	dev->rx_skb_size = emac_rx_skb_size(dma_dev, ndev->mtu);
+	dev->rx_sync_size = emac_rx_sync_size(dma_dev, ndev->mtu);
 
 	/* Get pointers to BD rings */
 	dev->tx_desc =
diff --git a/drivers/net/ethernet/ibm/emac/core.h b/drivers/net/ethernet/ibm/emac/core.h
index 369de2c..68548a0 100644
--- a/drivers/net/ethernet/ibm/emac/core.h
+++ b/drivers/net/ethernet/ibm/emac/core.h
@@ -68,20 +68,20 @@ static inline int emac_rx_size(int mtu)
 		return mal_rx_size(ETH_DATA_LEN + EMAC_MTU_OVERHEAD);
 }
 
-#define EMAC_DMA_ALIGN(x)		ALIGN((x), dma_get_cache_alignment())
+#define EMAC_DMA_ALIGN(x)		ALIGN((x), dma_get_cache_alignment(dma_dev))
 
 #define EMAC_RX_SKB_HEADROOM		\
 	EMAC_DMA_ALIGN(CONFIG_IBM_EMAC_RX_SKB_HEADROOM)
 
 /* Size of RX skb for the given MTU */
-static inline int emac_rx_skb_size(int mtu)
+static inline int emac_rx_skb_size(struct device *dma_dev, int mtu)
 {
 	int size = max(mtu + EMAC_MTU_OVERHEAD, emac_rx_size(mtu));
 	return EMAC_DMA_ALIGN(size + 2) + EMAC_RX_SKB_HEADROOM;
 }
 
 /* RX DMA sync size */
-static inline int emac_rx_sync_size(int mtu)
+static inline int emac_rx_sync_size(struct device *dma_dev, int mtu)
 {
 	return EMAC_DMA_ALIGN(emac_rx_size(mtu) + 2);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e61c99e..bc146dd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1660,7 +1660,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	 */
 	dev->caps.reserved_mtts =
 		ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
-		      dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
+		      dma_get_cache_alignment(&dev->persist->pdev->dev)) / dev->caps.mtt_entry_sz;
 
 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
 				  init_hca->mtt_base,
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 974a8ce..e6da66e 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -862,7 +862,7 @@ static bool spi_qup_can_dma(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
 	struct spi_qup *qup = spi_master_get_devdata(master);
-	size_t dma_align = dma_get_cache_alignment();
+	size_t dma_align = dma_get_cache_alignment(qup->dev);
 	int n_words;
 
 	if (xfer->rx_buf) {
@@ -1038,7 +1038,7 @@ static int spi_qup_probe(struct platform_device *pdev)
 	master->transfer_one = spi_qup_transfer_one;
 	master->dev.of_node = pdev->dev.of_node;
 	master->auto_runtime_pm = true;
-	master->dma_alignment = dma_get_cache_alignment();
+	master->dma_alignment = dma_get_cache_alignment(dev);
 	master->max_dma_len = SPI_MAX_XFER;
 
 	platform_set_drvdata(pdev, master);
diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
index 67ffecc..c708457 100644
--- a/drivers/tty/serial/mpsc.c
+++ b/drivers/tty/serial/mpsc.c
@@ -81,19 +81,19 @@
  * Number of Tx & Rx descriptors must be powers of 2.
  */
 #define	MPSC_RXR_ENTRIES	32
-#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
+#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_RXR_SIZE		(MPSC_RXR_ENTRIES * MPSC_RXRE_SIZE)
-#define	MPSC_RXBE_SIZE		dma_get_cache_alignment()
+#define	MPSC_RXBE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_RXB_SIZE		(MPSC_RXR_ENTRIES * MPSC_RXBE_SIZE)
 
 #define	MPSC_TXR_ENTRIES	32
-#define	MPSC_TXRE_SIZE		dma_get_cache_alignment()
+#define	MPSC_TXRE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_TXR_SIZE		(MPSC_TXR_ENTRIES * MPSC_TXRE_SIZE)
-#define	MPSC_TXBE_SIZE		dma_get_cache_alignment()
+#define	MPSC_TXBE_SIZE		dma_get_cache_alignment(dma_dev)
 #define	MPSC_TXB_SIZE		(MPSC_TXR_ENTRIES * MPSC_TXBE_SIZE)
 
 #define	MPSC_DMA_ALLOC_SIZE	(MPSC_RXR_SIZE + MPSC_RXB_SIZE + MPSC_TXR_SIZE \
-		+ MPSC_TXB_SIZE + dma_get_cache_alignment() /* for alignment */)
+		+ MPSC_TXB_SIZE + dma_get_cache_alignment(dma_dev) /* for alignment */)
 
 /* Rx and Tx Ring entry descriptors -- assume entry size is <= cacheline size */
 struct mpsc_rx_desc {
@@ -520,6 +520,7 @@ static uint mpsc_sdma_tx_active(struct mpsc_port_info *pi)
 static void mpsc_sdma_start_tx(struct mpsc_port_info *pi)
 {
 	struct mpsc_tx_desc *txre, *txre_p;
+	struct device *dma_dev = pi->port.dev;
 
 	/* If tx isn't running & there's a desc ready to go, start it */
 	if (!mpsc_sdma_tx_active(pi)) {
@@ -738,7 +739,7 @@ static void mpsc_init_hw(struct mpsc_port_info *pi)
 
 	mpsc_brg_init(pi, pi->brg_clk_src);
 	mpsc_brg_enable(pi);
-	mpsc_sdma_init(pi, dma_get_cache_alignment());	/* burst a cacheline */
+	mpsc_sdma_init(pi, dma_get_cache_alignment(pi->port.dev));	/* burst a cacheline */
 	mpsc_sdma_stop(pi);
 	mpsc_hw_init(pi);
 }
@@ -746,6 +747,7 @@ static void mpsc_init_hw(struct mpsc_port_info *pi)
 static int mpsc_alloc_ring_mem(struct mpsc_port_info *pi)
 {
 	int rc = 0;
+	struct device *dma_dev = pi->port.dev;
 
 	pr_debug("mpsc_alloc_ring_mem[%d]: Allocating ring mem\n",
 		pi->port.line);
@@ -769,6 +771,8 @@ static int mpsc_alloc_ring_mem(struct mpsc_port_info *pi)
 
 static void mpsc_free_ring_mem(struct mpsc_port_info *pi)
 {
+	struct device *dma_dev = pi->port.dev;
+
 	pr_debug("mpsc_free_ring_mem[%d]: Freeing ring mem\n", pi->port.line);
 
 	if (pi->dma_region) {
@@ -784,6 +788,7 @@ static void mpsc_init_rings(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 	dma_addr_t dp, dp_p;
 	u8 *bp, *bp_p;
 	int i;
@@ -798,8 +803,8 @@ static void mpsc_init_rings(struct mpsc_port_info *pi)
 	 * Descriptors & buffers are multiples of cacheline size and must be
 	 * cacheline aligned.
 	 */
-	dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment());
-	dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment());
+	dp = ALIGN((u32)pi->dma_region, dma_get_cache_alignment(dma_dev));
+	dp_p = ALIGN((u32)pi->dma_region_p, dma_get_cache_alignment(dma_dev));
 
 	/*
 	 * Partition dma region into rx ring descriptor, rx buffers,
@@ -936,6 +941,7 @@ static int serial_polled;
 static int mpsc_rx_intr(struct mpsc_port_info *pi, unsigned long *flags)
 {
 	struct mpsc_rx_desc *rxre;
+	struct device *dma_dev = pi->port.dev;
 	struct tty_port *port = &pi->port.state->port;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
@@ -1091,6 +1097,7 @@ static int mpsc_rx_intr(struct mpsc_port_info *pi, unsigned long *flags)
 static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 {
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 
 	txre = (struct mpsc_tx_desc *)(pi->txr
 			+ (pi->txr_head * MPSC_TXRE_SIZE));
@@ -1113,6 +1120,7 @@ static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 
 static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 {
+	struct device *dma_dev = pi->port.dev;
 	struct circ_buf *xmit = &pi->port.state->xmit;
 	u8 *bp;
 	u32 i;
@@ -1166,6 +1174,7 @@ static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 static int mpsc_tx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_tx_desc *txre;
+	struct device *dma_dev = pi->port.dev;
 	int rc = 0;
 	unsigned long iflags;
 
@@ -1360,6 +1369,7 @@ static int mpsc_startup(struct uart_port *port)
 {
 	struct mpsc_port_info *pi =
 		container_of(port, struct mpsc_port_info, port);
+	struct device *dma_dev = pi->port.dev;
 	u32 flag = 0;
 	int rc;
 
@@ -1555,9 +1565,10 @@ static void mpsc_put_poll_char(struct uart_port *port,
 
 static int mpsc_get_poll_char(struct uart_port *port)
 {
+	struct mpsc_rx_desc *rxre;
 	struct mpsc_port_info *pi =
 		container_of(port, struct mpsc_port_info, port);
-	struct mpsc_rx_desc *rxre;
+	struct device *dma_dev = pi->port.dev;
 	u32	cmdstat, bytes_in, i;
 	u8	*bp;
 
@@ -1706,6 +1717,7 @@ static const struct uart_ops mpsc_pops = {
 static void mpsc_console_write(struct console *co, const char *s, uint count)
 {
 	struct mpsc_port_info *pi = &mpsc_ports[co->index];
+	struct device *dma_dev = pi->port.dev;
 	u8 *bp, *dp, add_cr = 0;
 	int i;
 	unsigned long iflags;
@@ -2024,7 +2036,8 @@ static void mpsc_drv_unmap_regs(struct mpsc_port_info *pi)
 static void mpsc_drv_get_platform_data(struct mpsc_port_info *pi,
 		struct platform_device *pd, int num)
 {
-	struct mpsc_pdata	*pdata;
+	struct mpsc_pdata *pdata;
+	struct device *dma_dev = pi->port.dev;
 
 	pdata = dev_get_platdata(&pd->dev);
 
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index 8aca18c..9df918e5 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -241,7 +241,7 @@ static void enable_tx_dma(struct s3c24xx_uart_port *ourport)
 	/* Enable tx dma mode */
 	ucon = rd_regl(port, S3C2410_UCON);
 	ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK);
-	ucon |= (dma_get_cache_alignment() >= 16) ?
+	ucon |= (dma_get_cache_alignment(port->dev) >= 16) ?
 		S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1;
 	ucon |= S3C64XX_UCON_TXMODE_DMA;
 	wr_regl(port,  S3C2410_UCON, ucon);
@@ -292,7 +292,7 @@ static int s3c24xx_serial_start_tx_dma(struct s3c24xx_uart_port *ourport,
 	if (ourport->tx_mode != S3C24XX_TX_DMA)
 		enable_tx_dma(ourport);
 
-	dma->tx_size = count & ~(dma_get_cache_alignment() - 1);
+	dma->tx_size = count & ~(dma_get_cache_alignment(port->dev) - 1);
 	dma->tx_transfer_addr = dma->tx_addr + xmit->tail;
 
 	dma_sync_single_for_device(ourport->port.dev, dma->tx_transfer_addr,
@@ -332,7 +332,7 @@ static void s3c24xx_serial_start_next_tx(struct s3c24xx_uart_port *ourport)
 
 	if (!ourport->dma || !ourport->dma->tx_chan ||
 	    count < ourport->min_dma_size ||
-	    xmit->tail & (dma_get_cache_alignment() - 1))
+	    xmit->tail & (dma_get_cache_alignment(port->dev) - 1))
 		s3c24xx_serial_start_tx_pio(ourport);
 	else
 		s3c24xx_serial_start_tx_dma(ourport, count);
@@ -718,8 +718,8 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id)
 
 	if (ourport->dma && ourport->dma->tx_chan &&
 	    count >= ourport->min_dma_size) {
-		int align = dma_get_cache_alignment() -
-			(xmit->tail & (dma_get_cache_alignment() - 1));
+		int align = dma_get_cache_alignment(port->dev) -
+			(xmit->tail & (dma_get_cache_alignment(port->dev) - 1));
 		if (count-align >= ourport->min_dma_size) {
 			dma_count = count-align;
 			count = align;
@@ -870,7 +870,7 @@ static int s3c24xx_serial_request_dma(struct s3c24xx_uart_port *p)
 	dma->tx_conf.direction		= DMA_MEM_TO_DEV;
 	dma->tx_conf.dst_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
 	dma->tx_conf.dst_addr		= p->port.mapbase + S3C2410_UTXH;
-	if (dma_get_cache_alignment() >= 16)
+	if (dma_get_cache_alignment(p->port.dev) >= 16)
 		dma->tx_conf.dst_maxburst = 16;
 	else
 		dma->tx_conf.dst_maxburst = 1;
@@ -1849,7 +1849,7 @@ static int s3c24xx_serial_probe(struct platform_device *pdev)
 	 * so find minimal transfer size suitable for DMA mode
 	 */
 	ourport->min_dma_size = max_t(int, ourport->port.fifosize,
-				    dma_get_cache_alignment());
+				    dma_get_cache_alignment(ourport->port.dev));
 
 	dbg("%s: initialising port %p...\n", __func__, ourport);
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 29ce981..1326023 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
 #endif
+	int (*get_cache_alignment)(struct device *dev);
 	int is_phys;
 };
 
@@ -697,12 +698,18 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size,
 }
 
 #ifdef CONFIG_HAS_DMA
-static inline int dma_get_cache_alignment(void)
-{
-#ifdef ARCH_DMA_MINALIGN
-	return ARCH_DMA_MINALIGN;
+
+#ifndef ARCH_DMA_MINALIGN
+#define ARCH_DMA_MINALIGN 1
 #endif
-	return 1;
+
+static inline int dma_get_cache_alignment(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	if (dev && ops && ops->get_cache_alignment)
+		return ops->get_cache_alignment(dev);
+
+	return ARCH_DMA_MINALIGN; /* compatible behavior */
 }
 #endif
 
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V8 2/5] MIPS: Implement dma_map_ops::get_cache_alignment()
  2017-10-17  8:05 ` Huacai Chen
  (?)
@ 2017-10-17  8:05 ` Huacai Chen
  -1 siblings, 0 replies; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable

Currently, MIPS is an architecture which support coherent & noncoherent
devices co-exist. So implement get_cache_alignment() function pointer
in 'struct dma_map_ops' to return different dma alignments.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 arch/mips/cavium-octeon/dma-octeon.c      |  3 ++-
 arch/mips/include/asm/dma-coherence.h     |  2 ++
 arch/mips/loongson64/common/dma-swiotlb.c |  1 +
 arch/mips/mm/dma-default.c                | 11 ++++++++++-
 arch/mips/netlogic/common/nlm-dma.c       |  3 ++-
 5 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c64bd87..41c29a85 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -324,7 +324,8 @@ static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = {
 		.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 		.sync_sg_for_device = octeon_dma_sync_sg_for_device,
 		.mapping_error = swiotlb_dma_mapping_error,
-		.dma_supported = swiotlb_dma_supported
+		.dma_supported = swiotlb_dma_supported,
+		.get_cache_alignment = mips_dma_get_cache_alignment
 	},
 };
 
diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h
index 72d0eab..5f7a9fc 100644
--- a/arch/mips/include/asm/dma-coherence.h
+++ b/arch/mips/include/asm/dma-coherence.h
@@ -29,4 +29,6 @@ extern int hw_coherentio;
 #define hw_coherentio	0
 #endif /* CONFIG_DMA_MAYBE_COHERENT */
 
+int mips_dma_get_cache_alignment(struct device *dev);
+
 #endif
diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c
index 34486c1..17b9897 100644
--- a/arch/mips/loongson64/common/dma-swiotlb.c
+++ b/arch/mips/loongson64/common/dma-swiotlb.c
@@ -119,6 +119,7 @@ static const struct dma_map_ops loongson_dma_map_ops = {
 	.sync_sg_for_device = loongson_dma_sync_sg_for_device,
 	.mapping_error = swiotlb_dma_mapping_error,
 	.dma_supported = loongson_dma_supported,
+	.get_cache_alignment = mips_dma_get_cache_alignment
 };
 
 void __init plat_swiotlb_setup(void)
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index c01bd20..e8f0659 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -394,6 +394,14 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 
 EXPORT_SYMBOL(dma_cache_sync);
 
+int mips_dma_get_cache_alignment(struct device *dev)
+{
+	if (plat_device_is_coherent(dev))
+		return 1;
+	else
+		return ARCH_DMA_MINALIGN;
+}
+
 static const struct dma_map_ops mips_default_dma_map_ops = {
 	.alloc = mips_dma_alloc_coherent,
 	.free = mips_dma_free_coherent,
@@ -407,7 +415,8 @@ static const struct dma_map_ops mips_default_dma_map_ops = {
 	.sync_sg_for_cpu = mips_dma_sync_sg_for_cpu,
 	.sync_sg_for_device = mips_dma_sync_sg_for_device,
 	.mapping_error = mips_dma_mapping_error,
-	.dma_supported = mips_dma_supported
+	.dma_supported = mips_dma_supported,
+	.get_cache_alignment = mips_dma_get_cache_alignment
 };
 
 const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops;
diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c
index 0ec9d9d..e9a9ddc 100644
--- a/arch/mips/netlogic/common/nlm-dma.c
+++ b/arch/mips/netlogic/common/nlm-dma.c
@@ -79,7 +79,8 @@ const struct dma_map_ops nlm_swiotlb_dma_ops = {
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
 	.mapping_error = swiotlb_dma_mapping_error,
-	.dma_supported = swiotlb_dma_supported
+	.dma_supported = swiotlb_dma_supported,
+	.get_cache_alignment = mips_dma_get_cache_alignment
 };
 
 void __init plat_swiotlb_setup(void)
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V8 3/5] scsi: Align block queue to dma_get_cache_alignment()
  2017-10-17  8:05 ` Huacai Chen
  (?)
  (?)
@ 2017-10-17  8:05 ` Huacai Chen
  2017-10-19 15:10   ` Christoph Hellwig
  -1 siblings, 1 reply; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable

In non-coherent DMA mode, kernel uses cache flushing operations to
maintain I/O coherency, so scsi's block queue should be aligned to
ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel structure
share a same cache line, and if the kernel structure has dirty data,
cache_invalidate (no writeback) will cause data corruption.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 drivers/scsi/scsi_lib.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9cf6a80..19abc2e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2132,11 +2132,11 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 		q->limits.cluster = 0;
 
 	/*
-	 * set a reasonable default alignment on word boundaries: the
-	 * host and device may alter it using
+	 * set a reasonable default alignment on word/cacheline boundaries:
+	 * the host and device may alter it using
 	 * blk_queue_update_dma_alignment() later.
 	 */
-	blk_queue_dma_alignment(q, 0x03);
+	blk_queue_dma_alignment(q, max(4, dma_get_cache_alignment(dev)) - 1);
 }
 EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
  2017-10-17  8:05 ` Huacai Chen
                   ` (2 preceding siblings ...)
  (?)
@ 2017-10-17  8:05 ` Huacai Chen
  2017-10-17 11:55   ` Marek Szyprowski
  2017-10-20  0:04     ` kbuild test robot
  -1 siblings, 2 replies; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable

In non-coherent DMA mode, kernel uses cache flushing operations to
maintain I/O coherency, so libsas's SMP request/response should be
aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel
structure share a same cache line, and if the kernel structure has
dirty data, cache_invalidate (no writeback) will cause data corruption.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 drivers/scsi/libsas/sas_expander.c | 93 +++++++++++++++++++++++---------------
 1 file changed, 57 insertions(+), 36 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 6b4fd23..124a44b 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -164,17 +164,17 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
 
 /* ---------- Allocations ---------- */
 
-static inline void *alloc_smp_req(int size)
+static inline void *alloc_smp_req(int size, int align)
 {
-	u8 *p = kzalloc(size, GFP_KERNEL);
+	u8 *p = kzalloc(ALIGN(size, align), GFP_KERNEL);
 	if (p)
 		p[0] = SMP_REQUEST;
 	return p;
 }
 
-static inline void *alloc_smp_resp(int size)
+static inline void *alloc_smp_resp(int size, int align)
 {
-	return kzalloc(size, GFP_KERNEL);
+	return kzalloc(ALIGN(size, align), GFP_KERNEL);
 }
 
 static char sas_route_char(struct domain_device *dev, struct ex_phy *phy)
@@ -403,15 +403,17 @@ static int sas_ex_phy_discover_helper(struct domain_device *dev, u8 *disc_req,
 int sas_ex_phy_discover(struct domain_device *dev, int single)
 {
 	struct expander_device *ex = &dev->ex_dev;
-	int  res = 0;
+	int  res = 0, align;
 	u8   *disc_req;
 	u8   *disc_resp;
 
-	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
 	if (!disc_req)
 		return -ENOMEM;
 
-	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
 	if (!disc_resp) {
 		kfree(disc_req);
 		return -ENOMEM;
@@ -480,14 +482,15 @@ static int sas_ex_general(struct domain_device *dev)
 {
 	u8 *rg_req;
 	struct smp_resp *rg_resp;
-	int res;
-	int i;
+	int i, res, align;
 
-	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
 	if (!rg_req)
 		return -ENOMEM;
 
-	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
 	if (!rg_resp) {
 		kfree(rg_req);
 		return -ENOMEM;
@@ -552,13 +555,15 @@ static int sas_ex_manuf_info(struct domain_device *dev)
 {
 	u8 *mi_req;
 	u8 *mi_resp;
-	int res;
+	int res, align;
 
-	mi_req = alloc_smp_req(MI_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	mi_req = alloc_smp_req(MI_REQ_SIZE, align);
 	if (!mi_req)
 		return -ENOMEM;
 
-	mi_resp = alloc_smp_resp(MI_RESP_SIZE);
+	mi_resp = alloc_smp_resp(MI_RESP_SIZE, align);
 	if (!mi_resp) {
 		kfree(mi_req);
 		return -ENOMEM;
@@ -593,13 +598,15 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
 {
 	u8 *pc_req;
 	u8 *pc_resp;
-	int res;
+	int res, align;
+
+	align = dma_get_cache_alignment(&dev->phy->dev);
 
-	pc_req = alloc_smp_req(PC_REQ_SIZE);
+	pc_req = alloc_smp_req(PC_REQ_SIZE, align);
 	if (!pc_req)
 		return -ENOMEM;
 
-	pc_resp = alloc_smp_resp(PC_RESP_SIZE);
+	pc_resp = alloc_smp_resp(PC_RESP_SIZE, align);
 	if (!pc_resp) {
 		kfree(pc_req);
 		return -ENOMEM;
@@ -664,17 +671,19 @@ static int sas_dev_present_in_domain(struct asd_sas_port *port,
 #define RPEL_RESP_SIZE	32
 int sas_smp_get_phy_events(struct sas_phy *phy)
 {
-	int res;
+	int res, align;
 	u8 *req;
 	u8 *resp;
 	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
 	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
 
-	req = alloc_smp_req(RPEL_REQ_SIZE);
+	align = dma_get_cache_alignment(&phy->dev);
+
+	req = alloc_smp_req(RPEL_REQ_SIZE, align);
 	if (!req)
 		return -ENOMEM;
 
-	resp = alloc_smp_resp(RPEL_RESP_SIZE);
+	resp = alloc_smp_resp(RPEL_RESP_SIZE, align);
 	if (!resp) {
 		kfree(req);
 		return -ENOMEM;
@@ -709,7 +718,8 @@ int sas_get_report_phy_sata(struct domain_device *dev, int phy_id,
 			    struct smp_resp *rps_resp)
 {
 	int res;
-	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
+	int align = dma_get_cache_alignment(&dev->phy->dev);
+	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE, align);
 	u8 *resp = (u8 *)rps_resp;
 
 	if (!rps_req)
@@ -1398,7 +1408,7 @@ static int sas_check_parent_topology(struct domain_device *child)
 static int sas_configure_present(struct domain_device *dev, int phy_id,
 				 u8 *sas_addr, int *index, int *present)
 {
-	int i, res = 0;
+	int i, res = 0, align;
 	struct expander_device *ex = &dev->ex_dev;
 	struct ex_phy *phy = &ex->ex_phy[phy_id];
 	u8 *rri_req;
@@ -1406,12 +1416,13 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
 
 	*present = 0;
 	*index = 0;
+	align = dma_get_cache_alignment(&dev->phy->dev);
 
-	rri_req = alloc_smp_req(RRI_REQ_SIZE);
+	rri_req = alloc_smp_req(RRI_REQ_SIZE, align);
 	if (!rri_req)
 		return -ENOMEM;
 
-	rri_resp = alloc_smp_resp(RRI_RESP_SIZE);
+	rri_resp = alloc_smp_resp(RRI_RESP_SIZE, align);
 	if (!rri_resp) {
 		kfree(rri_req);
 		return -ENOMEM;
@@ -1472,15 +1483,17 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
 static int sas_configure_set(struct domain_device *dev, int phy_id,
 			     u8 *sas_addr, int index, int include)
 {
-	int res;
+	int res, align;
 	u8 *cri_req;
 	u8 *cri_resp;
 
-	cri_req = alloc_smp_req(CRI_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	cri_req = alloc_smp_req(CRI_REQ_SIZE, align);
 	if (!cri_req)
 		return -ENOMEM;
 
-	cri_resp = alloc_smp_resp(CRI_RESP_SIZE);
+	cri_resp = alloc_smp_resp(CRI_RESP_SIZE, align);
 	if (!cri_resp) {
 		kfree(cri_req);
 		return -ENOMEM;
@@ -1689,10 +1702,12 @@ int sas_discover_root_expander(struct domain_device *dev)
 static int sas_get_phy_discover(struct domain_device *dev,
 				int phy_id, struct smp_resp *disc_resp)
 {
-	int res;
+	int res, align;
 	u8 *disc_req;
 
-	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
 	if (!disc_req)
 		return -ENOMEM;
 
@@ -1715,10 +1730,12 @@ static int sas_get_phy_discover(struct domain_device *dev,
 static int sas_get_phy_change_count(struct domain_device *dev,
 				    int phy_id, int *pcc)
 {
-	int res;
+	int res, align;
 	struct smp_resp *disc_resp;
 
-	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
 	if (!disc_resp)
 		return -ENOMEM;
 
@@ -1733,11 +1750,13 @@ static int sas_get_phy_change_count(struct domain_device *dev,
 static int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
 				    u8 *sas_addr, enum sas_device_type *type)
 {
-	int res;
+	int res, align;
 	struct smp_resp *disc_resp;
 	struct discover_resp *dr;
 
-	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
 	if (!disc_resp)
 		return -ENOMEM;
 	dr = &disc_resp->disc;
@@ -1787,15 +1806,17 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id,
 
 static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
 {
-	int res;
+	int res, align;
 	u8  *rg_req;
 	struct smp_resp  *rg_resp;
 
-	rg_req = alloc_smp_req(RG_REQ_SIZE);
+	align = dma_get_cache_alignment(&dev->phy->dev);
+
+	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
 	if (!rg_req)
 		return -ENOMEM;
 
-	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
+	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
 	if (!rg_resp) {
 		kfree(rg_req);
 		return -ENOMEM;
-- 
2.7.0




^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
  2017-10-17  8:05 ` Huacai Chen
                   ` (3 preceding siblings ...)
  (?)
@ 2017-10-17  8:05 ` Huacai Chen
  2017-10-17  9:43   ` Sergei Shtylyov
  2017-10-18 13:03   ` Tejun Heo
  -1 siblings, 2 replies; 26+ messages in thread
From: Huacai Chen @ 2017-10-17  8:05 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, Huacai Chen, stable

In non-coherent DMA mode, kernel uses cache flushing operations to
maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
and a kernel structure share a same cache line, and if the kernel
structure has dirty data, cache_invalidate (no writeback) will cause
data corruption.

Cc: stable@vger.kernel.org
Signed-off-by: Huacai Chen <chenhc@lemote.com>
---
 drivers/ata/libata-core.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index ee4c1ec..e134955 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
 unsigned int ata_do_dev_read_id(struct ata_device *dev,
 					struct ata_taskfile *tf, u16 *id)
 {
-	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
-				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
+	u16 *devid;
+	int res, size = sizeof(u16) * ATA_ID_WORDS;
+
+	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
+		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
+	else {
+		devid = kmalloc(size, GFP_KERNEL);
+		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
+		memcpy(id, devid, size);
+		kfree(devid);
+	}
+
+	return res;
 }
 
 /**
-- 
2.7.0

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
  2017-10-17  8:05 ` [PATCH V8 5/5] libata: Align DMA buffer " Huacai Chen
@ 2017-10-17  9:43   ` Sergei Shtylyov
  2017-10-18  1:06       ` 陈华才
  2017-10-18 19:54     ` [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment() Alan Cox
  2017-10-18 13:03   ` Tejun Heo
  1 sibling, 2 replies; 26+ messages in thread
From: Sergei Shtylyov @ 2017-10-17  9:43 UTC (permalink / raw)
  To: Huacai Chen, Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable

On 10/17/2017 11:05 AM, Huacai Chen wrote:

> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
> and a kernel structure share a same cache line, and if the kernel
> structure has dirty data, cache_invalidate (no writeback) will cause
> data corruption.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>   drivers/ata/libata-core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index ee4c1ec..e134955 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>   					struct ata_taskfile *tf, u16 *id)
>   {
> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
> +	u16 *devid;
> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
> +
> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
> +	else {
> +		devid = kmalloc(size, GFP_KERNEL);
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
> +		memcpy(id, devid, size);
> +		kfree(devid);
> +	}
> +
> +	return res;

    This function is called only for the PIO mode commands, so I doubt this is 
necessary...

MBR, Sergei

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
  2017-10-17  8:05 ` [PATCH V8 4/5] libsas: Align SMP req/resp " Huacai Chen
@ 2017-10-17 11:55   ` Marek Szyprowski
  2017-10-18  1:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment() 陈华才
  2017-10-19 15:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment() Christoph Hellwig
  2017-10-20  0:04     ` kbuild test robot
  1 sibling, 2 replies; 26+ messages in thread
From: Marek Szyprowski @ 2017-10-17 11:55 UTC (permalink / raw)
  To: Huacai Chen, Christoph Hellwig
  Cc: Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel,
	Ralf Baechle, James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable

Hi Huacai,

On 2017-10-17 10:05, Huacai Chen wrote:
> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so libsas's SMP request/response should be
> aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel
> structure share a same cache line, and if the kernel structure has
> dirty data, cache_invalidate (no writeback) will cause data corruption.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>   drivers/scsi/libsas/sas_expander.c | 93 +++++++++++++++++++++++---------------
>   1 file changed, 57 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 6b4fd23..124a44b 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -164,17 +164,17 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
>   
>   /* ---------- Allocations ---------- */
>   
> -static inline void *alloc_smp_req(int size)
> +static inline void *alloc_smp_req(int size, int align)
>   {
> -	u8 *p = kzalloc(size, GFP_KERNEL);
> +	u8 *p = kzalloc(ALIGN(size, align), GFP_KERNEL);

If I remember correctly, kernel guarantees that each kmalloced buffer is
always at least aligned to the CPU cache line, so CPU cache can be
invalidated on the allocated buffer without corrupting anything else.
Taking this into account, I wonder if the above change make sense.

Have you experienced any problems without this change?

>   	if (p)
>   		p[0] = SMP_REQUEST;
>   	return p;
>   }
>   
> -static inline void *alloc_smp_resp(int size)
> +static inline void *alloc_smp_resp(int size, int align)
>   {
> -	return kzalloc(size, GFP_KERNEL);
> +	return kzalloc(ALIGN(size, align), GFP_KERNEL);

Save a above.

>   }
>   
>   static char sas_route_char(struct domain_device *dev, struct ex_phy *phy)
> @@ -403,15 +403,17 @@ static int sas_ex_phy_discover_helper(struct domain_device *dev, u8 *disc_req,
>   int sas_ex_phy_discover(struct domain_device *dev, int single)
>   {
>   	struct expander_device *ex = &dev->ex_dev;
> -	int  res = 0;
> +	int  res = 0, align;
>   	u8   *disc_req;
>   	u8   *disc_resp;
>   
> -	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
>   	if (!disc_req)
>   		return -ENOMEM;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp) {
>   		kfree(disc_req);
>   		return -ENOMEM;
> @@ -480,14 +482,15 @@ static int sas_ex_general(struct domain_device *dev)
>   {
>   	u8 *rg_req;
>   	struct smp_resp *rg_resp;
> -	int res;
> -	int i;
> +	int i, res, align;
>   
> -	rg_req = alloc_smp_req(RG_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
>   	if (!rg_req)
>   		return -ENOMEM;
>   
> -	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
> +	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
>   	if (!rg_resp) {
>   		kfree(rg_req);
>   		return -ENOMEM;
> @@ -552,13 +555,15 @@ static int sas_ex_manuf_info(struct domain_device *dev)
>   {
>   	u8 *mi_req;
>   	u8 *mi_resp;
> -	int res;
> +	int res, align;
>   
> -	mi_req = alloc_smp_req(MI_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	mi_req = alloc_smp_req(MI_REQ_SIZE, align);
>   	if (!mi_req)
>   		return -ENOMEM;
>   
> -	mi_resp = alloc_smp_resp(MI_RESP_SIZE);
> +	mi_resp = alloc_smp_resp(MI_RESP_SIZE, align);
>   	if (!mi_resp) {
>   		kfree(mi_req);
>   		return -ENOMEM;
> @@ -593,13 +598,15 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
>   {
>   	u8 *pc_req;
>   	u8 *pc_resp;
> -	int res;
> +	int res, align;
> +
> +	align = dma_get_cache_alignment(&dev->phy->dev);
>   
> -	pc_req = alloc_smp_req(PC_REQ_SIZE);
> +	pc_req = alloc_smp_req(PC_REQ_SIZE, align);
>   	if (!pc_req)
>   		return -ENOMEM;
>   
> -	pc_resp = alloc_smp_resp(PC_RESP_SIZE);
> +	pc_resp = alloc_smp_resp(PC_RESP_SIZE, align);
>   	if (!pc_resp) {
>   		kfree(pc_req);
>   		return -ENOMEM;
> @@ -664,17 +671,19 @@ static int sas_dev_present_in_domain(struct asd_sas_port *port,
>   #define RPEL_RESP_SIZE	32
>   int sas_smp_get_phy_events(struct sas_phy *phy)
>   {
> -	int res;
> +	int res, align;
>   	u8 *req;
>   	u8 *resp;
>   	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
>   	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
>   
> -	req = alloc_smp_req(RPEL_REQ_SIZE);
> +	align = dma_get_cache_alignment(&phy->dev);
> +
> +	req = alloc_smp_req(RPEL_REQ_SIZE, align);
>   	if (!req)
>   		return -ENOMEM;
>   
> -	resp = alloc_smp_resp(RPEL_RESP_SIZE);
> +	resp = alloc_smp_resp(RPEL_RESP_SIZE, align);
>   	if (!resp) {
>   		kfree(req);
>   		return -ENOMEM;
> @@ -709,7 +718,8 @@ int sas_get_report_phy_sata(struct domain_device *dev, int phy_id,
>   			    struct smp_resp *rps_resp)
>   {
>   	int res;
> -	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
> +	int align = dma_get_cache_alignment(&dev->phy->dev);
> +	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE, align);
>   	u8 *resp = (u8 *)rps_resp;
>   
>   	if (!rps_req)
> @@ -1398,7 +1408,7 @@ static int sas_check_parent_topology(struct domain_device *child)
>   static int sas_configure_present(struct domain_device *dev, int phy_id,
>   				 u8 *sas_addr, int *index, int *present)
>   {
> -	int i, res = 0;
> +	int i, res = 0, align;
>   	struct expander_device *ex = &dev->ex_dev;
>   	struct ex_phy *phy = &ex->ex_phy[phy_id];
>   	u8 *rri_req;
> @@ -1406,12 +1416,13 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
>   
>   	*present = 0;
>   	*index = 0;
> +	align = dma_get_cache_alignment(&dev->phy->dev);
>   
> -	rri_req = alloc_smp_req(RRI_REQ_SIZE);
> +	rri_req = alloc_smp_req(RRI_REQ_SIZE, align);
>   	if (!rri_req)
>   		return -ENOMEM;
>   
> -	rri_resp = alloc_smp_resp(RRI_RESP_SIZE);
> +	rri_resp = alloc_smp_resp(RRI_RESP_SIZE, align);
>   	if (!rri_resp) {
>   		kfree(rri_req);
>   		return -ENOMEM;
> @@ -1472,15 +1483,17 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
>   static int sas_configure_set(struct domain_device *dev, int phy_id,
>   			     u8 *sas_addr, int index, int include)
>   {
> -	int res;
> +	int res, align;
>   	u8 *cri_req;
>   	u8 *cri_resp;
>   
> -	cri_req = alloc_smp_req(CRI_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	cri_req = alloc_smp_req(CRI_REQ_SIZE, align);
>   	if (!cri_req)
>   		return -ENOMEM;
>   
> -	cri_resp = alloc_smp_resp(CRI_RESP_SIZE);
> +	cri_resp = alloc_smp_resp(CRI_RESP_SIZE, align);
>   	if (!cri_resp) {
>   		kfree(cri_req);
>   		return -ENOMEM;
> @@ -1689,10 +1702,12 @@ int sas_discover_root_expander(struct domain_device *dev)
>   static int sas_get_phy_discover(struct domain_device *dev,
>   				int phy_id, struct smp_resp *disc_resp)
>   {
> -	int res;
> +	int res, align;
>   	u8 *disc_req;
>   
> -	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
>   	if (!disc_req)
>   		return -ENOMEM;
>   
> @@ -1715,10 +1730,12 @@ static int sas_get_phy_discover(struct domain_device *dev,
>   static int sas_get_phy_change_count(struct domain_device *dev,
>   				    int phy_id, int *pcc)
>   {
> -	int res;
> +	int res, align;
>   	struct smp_resp *disc_resp;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp)
>   		return -ENOMEM;
>   
> @@ -1733,11 +1750,13 @@ static int sas_get_phy_change_count(struct domain_device *dev,
>   static int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
>   				    u8 *sas_addr, enum sas_device_type *type)
>   {
> -	int res;
> +	int res, align;
>   	struct smp_resp *disc_resp;
>   	struct discover_resp *dr;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp)
>   		return -ENOMEM;
>   	dr = &disc_resp->disc;
> @@ -1787,15 +1806,17 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id,
>   
>   static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
>   {
> -	int res;
> +	int res, align;
>   	u8  *rg_req;
>   	struct smp_resp  *rg_resp;
>   
> -	rg_req = alloc_smp_req(RG_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
>   	if (!rg_req)
>   		return -ENOMEM;
>   
> -	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
> +	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
>   	if (!rg_resp) {
>   		kfree(rg_req);
>   		return -ENOMEM;

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()
  2017-10-17  9:43   ` Sergei Shtylyov
@ 2017-10-18  1:06       ` 陈华才
  2017-10-18 19:54     ` [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment() Alan Cox
  1 sibling, 0 replies; 26+ messages in thread
From: 陈华才 @ 2017-10-18  1:06 UTC (permalink / raw)
  To: Sergei Shtylyov, Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide

Hi, Sergei,

Without this patch, in non-coherent mode, ata_do_set_mode() return with "no PIO support", and disk detection fails.

Huacai
 
 
------------------ Original ------------------
From:  "Sergei Shtylyov"<sergei.shtylyov@cogentembedded.com>;
Date:  Tue, Oct 17, 2017 05:43 PM
To:  "Huacai Chen"<chenhc@lemote.com>; "Christoph Hellwig"<hch@lst.de>; 
Cc:  "Marek Szyprowski"<m.szyprowski@samsung.com>; "Robin Murphy"<robin.murphy@arm.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<ralf@linux-mips.org>; "James Hogan"<james.hogan@imgtec.com>; "linux-mips"<linux-mips@linux-mips.org>; "James E . J . Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "Tejun Heo"<tj@kernel.org>; "linux-ide"<linux-ide@vger.kernel.org>; "stable"<stable@vger.kernel.org>; 
Subject:  Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()

 
On 10/17/2017 11:05 AM, Huacai Chen wrote:

> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
> and a kernel structure share a same cache line, and if the kernel
> structure has dirty data, cache_invalidate (no writeback) will cause
> data corruption.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>   drivers/ata/libata-core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index ee4c1ec..e134955 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>   					struct ata_taskfile *tf, u16 *id)
>   {
> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
> +	u16 *devid;
> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
> +
> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
> +	else {
> +		devid = kmalloc(size, GFP_KERNEL);
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
> +		memcpy(id, devid, size);
> +		kfree(devid);
> +	}
> +
> +	return res;

    This function is called only for the PIO mode commands, so I doubt this is 
necessary...

MBR, Sergei

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()
@ 2017-10-18  1:06       ` 陈华才
  0 siblings, 0 replies; 26+ messages in thread
From: 陈华才 @ 2017-10-18  1:06 UTC (permalink / raw)
  To: Sergei Shtylyov, Christoph Hellwig
  Cc: Marek Szyprowski, Robin Murphy, Andrew Morton, Fuxin Zhang,
	linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable

Hi, Sergei,

Without this patch, in non-coherent mode, ata_do_set_mode() return with "no PIO support", and disk detection fails.

Huacai
 
 
------------------ Original ------------------
From:  "Sergei Shtylyov"<sergei.shtylyov@cogentembedded.com>;
Date:  Tue, Oct 17, 2017 05:43 PM
To:  "Huacai Chen"<chenhc@lemote.com>; "Christoph Hellwig"<hch@lst.de>; 
Cc:  "Marek Szyprowski"<m.szyprowski@samsung.com>; "Robin Murphy"<robin.murphy@arm.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<ralf@linux-mips.org>; "James Hogan"<james.hogan@imgtec.com>; "linux-mips"<linux-mips@linux-mips.org>; "James E . J . Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "Tejun Heo"<tj@kernel.org>; "linux-ide"<linux-ide@vger.kernel.org>; "stable"<stable@vger.kernel.org>; 
Subject:  Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()

 
On 10/17/2017 11:05 AM, Huacai Chen wrote:

> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
> and a kernel structure share a same cache line, and if the kernel
> structure has dirty data, cache_invalidate (no writeback) will cause
> data corruption.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>   drivers/ata/libata-core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index ee4c1ec..e134955 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>   					struct ata_taskfile *tf, u16 *id)
>   {
> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
> +	u16 *devid;
> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
> +
> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
> +	else {
> +		devid = kmalloc(size, GFP_KERNEL);
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
> +		memcpy(id, devid, size);
> +		kfree(devid);
> +	}
> +
> +	return res;

    This function is called only for the PIO mode commands, so I doubt this is 
necessary...

MBR, Sergei

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment()
  2017-10-17 11:55   ` Marek Szyprowski
@ 2017-10-18  1:12     ` 陈华才
  2017-10-19 15:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment() Christoph Hellwig
  1 sibling, 0 replies; 26+ messages in thread
From: 陈华才 @ 2017-10-18  1:12 UTC (permalink / raw)
  To: Marek Szyprowski, Christoph Hellwig
  Cc: Robin Murphy, Andrew Morton, Fuxin Zhang, linux-kernel,
	Ralf Baechle, JamesHogan, linux-mips, James E . J .Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable

Hi, Marek,

Yes, I know in include/linux/slab.h, there is
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN

But I observed that the req/resp isn't aligned to ARCH_DMA_MINALIGN and I don't know why.

Problems I experienced is: In non-coherent mode, mvsas with an expander cannot detect any disks.

Huacai
 
 
------------------ Original ------------------
From:  "Marek Szyprowski"<m.szyprowski@samsung.com>;
Date:  Tue, Oct 17, 2017 07:55 PM
To:  "Huacai Chen"<chenhc@lemote.com>; "Christoph Hellwig"<hch@lst.de>; 
Cc:  "Robin Murphy"<robin.murphy@arm.com>; "Andrew Morton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<ralf@linux-mips.org>; "JamesHogan"<james.hogan@imgtec.com>; "linux-mips"<linux-mips@linux-mips.org>; "James E . J .Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "Tejun Heo"<tj@kernel.org>; "linux-ide"<linux-ide@vger.kernel.org>; "stable"<stable@vger.kernel.org>; 
Subject:  Re: [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment()

 
Hi Huacai,

On 2017-10-17 10:05, Huacai Chen wrote:
> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so libsas's SMP request/response should be
> aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel
> structure share a same cache line, and if the kernel structure has
> dirty data, cache_invalidate (no writeback) will cause data corruption.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>   drivers/scsi/libsas/sas_expander.c | 93 +++++++++++++++++++++++---------------
>   1 file changed, 57 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index 6b4fd23..124a44b 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -164,17 +164,17 @@ static int smp_execute_task(struct domain_device *dev, void *req, int req_size,
>   
>   /* ---------- Allocations ---------- */
>   
> -static inline void *alloc_smp_req(int size)
> +static inline void *alloc_smp_req(int size, int align)
>   {
> -	u8 *p = kzalloc(size, GFP_KERNEL);
> +	u8 *p = kzalloc(ALIGN(size, align), GFP_KERNEL);

If I remember correctly, kernel guarantees that each kmalloced buffer is
always at least aligned to the CPU cache line, so CPU cache can be
invalidated on the allocated buffer without corrupting anything else.
Taking this into account, I wonder if the above change make sense.

Have you experienced any problems without this change?

>   	if (p)
>   		p[0] = SMP_REQUEST;
>   	return p;
>   }
>   
> -static inline void *alloc_smp_resp(int size)
> +static inline void *alloc_smp_resp(int size, int align)
>   {
> -	return kzalloc(size, GFP_KERNEL);
> +	return kzalloc(ALIGN(size, align), GFP_KERNEL);

Save a above.

>   }
>   
>   static char sas_route_char(struct domain_device *dev, struct ex_phy *phy)
> @@ -403,15 +403,17 @@ static int sas_ex_phy_discover_helper(struct domain_device *dev, u8 *disc_req,
>   int sas_ex_phy_discover(struct domain_device *dev, int single)
>   {
>   	struct expander_device *ex = &dev->ex_dev;
> -	int  res = 0;
> +	int  res = 0, align;
>   	u8   *disc_req;
>   	u8   *disc_resp;
>   
> -	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
>   	if (!disc_req)
>   		return -ENOMEM;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp) {
>   		kfree(disc_req);
>   		return -ENOMEM;
> @@ -480,14 +482,15 @@ static int sas_ex_general(struct domain_device *dev)
>   {
>   	u8 *rg_req;
>   	struct smp_resp *rg_resp;
> -	int res;
> -	int i;
> +	int i, res, align;
>   
> -	rg_req = alloc_smp_req(RG_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
>   	if (!rg_req)
>   		return -ENOMEM;
>   
> -	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
> +	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
>   	if (!rg_resp) {
>   		kfree(rg_req);
>   		return -ENOMEM;
> @@ -552,13 +555,15 @@ static int sas_ex_manuf_info(struct domain_device *dev)
>   {
>   	u8 *mi_req;
>   	u8 *mi_resp;
> -	int res;
> +	int res, align;
>   
> -	mi_req = alloc_smp_req(MI_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	mi_req = alloc_smp_req(MI_REQ_SIZE, align);
>   	if (!mi_req)
>   		return -ENOMEM;
>   
> -	mi_resp = alloc_smp_resp(MI_RESP_SIZE);
> +	mi_resp = alloc_smp_resp(MI_RESP_SIZE, align);
>   	if (!mi_resp) {
>   		kfree(mi_req);
>   		return -ENOMEM;
> @@ -593,13 +598,15 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id,
>   {
>   	u8 *pc_req;
>   	u8 *pc_resp;
> -	int res;
> +	int res, align;
> +
> +	align = dma_get_cache_alignment(&dev->phy->dev);
>   
> -	pc_req = alloc_smp_req(PC_REQ_SIZE);
> +	pc_req = alloc_smp_req(PC_REQ_SIZE, align);
>   	if (!pc_req)
>   		return -ENOMEM;
>   
> -	pc_resp = alloc_smp_resp(PC_RESP_SIZE);
> +	pc_resp = alloc_smp_resp(PC_RESP_SIZE, align);
>   	if (!pc_resp) {
>   		kfree(pc_req);
>   		return -ENOMEM;
> @@ -664,17 +671,19 @@ static int sas_dev_present_in_domain(struct asd_sas_port *port,
>   #define RPEL_RESP_SIZE	32
>   int sas_smp_get_phy_events(struct sas_phy *phy)
>   {
> -	int res;
> +	int res, align;
>   	u8 *req;
>   	u8 *resp;
>   	struct sas_rphy *rphy = dev_to_rphy(phy->dev.parent);
>   	struct domain_device *dev = sas_find_dev_by_rphy(rphy);
>   
> -	req = alloc_smp_req(RPEL_REQ_SIZE);
> +	align = dma_get_cache_alignment(&phy->dev);
> +
> +	req = alloc_smp_req(RPEL_REQ_SIZE, align);
>   	if (!req)
>   		return -ENOMEM;
>   
> -	resp = alloc_smp_resp(RPEL_RESP_SIZE);
> +	resp = alloc_smp_resp(RPEL_RESP_SIZE, align);
>   	if (!resp) {
>   		kfree(req);
>   		return -ENOMEM;
> @@ -709,7 +718,8 @@ int sas_get_report_phy_sata(struct domain_device *dev, int phy_id,
>   			    struct smp_resp *rps_resp)
>   {
>   	int res;
> -	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE);
> +	int align = dma_get_cache_alignment(&dev->phy->dev);
> +	u8 *rps_req = alloc_smp_req(RPS_REQ_SIZE, align);
>   	u8 *resp = (u8 *)rps_resp;
>   
>   	if (!rps_req)
> @@ -1398,7 +1408,7 @@ static int sas_check_parent_topology(struct domain_device *child)
>   static int sas_configure_present(struct domain_device *dev, int phy_id,
>   				 u8 *sas_addr, int *index, int *present)
>   {
> -	int i, res = 0;
> +	int i, res = 0, align;
>   	struct expander_device *ex = &dev->ex_dev;
>   	struct ex_phy *phy = &ex->ex_phy[phy_id];
>   	u8 *rri_req;
> @@ -1406,12 +1416,13 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
>   
>   	*present = 0;
>   	*index = 0;
> +	align = dma_get_cache_alignment(&dev->phy->dev);
>   
> -	rri_req = alloc_smp_req(RRI_REQ_SIZE);
> +	rri_req = alloc_smp_req(RRI_REQ_SIZE, align);
>   	if (!rri_req)
>   		return -ENOMEM;
>   
> -	rri_resp = alloc_smp_resp(RRI_RESP_SIZE);
> +	rri_resp = alloc_smp_resp(RRI_RESP_SIZE, align);
>   	if (!rri_resp) {
>   		kfree(rri_req);
>   		return -ENOMEM;
> @@ -1472,15 +1483,17 @@ static int sas_configure_present(struct domain_device *dev, int phy_id,
>   static int sas_configure_set(struct domain_device *dev, int phy_id,
>   			     u8 *sas_addr, int index, int include)
>   {
> -	int res;
> +	int res, align;
>   	u8 *cri_req;
>   	u8 *cri_resp;
>   
> -	cri_req = alloc_smp_req(CRI_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	cri_req = alloc_smp_req(CRI_REQ_SIZE, align);
>   	if (!cri_req)
>   		return -ENOMEM;
>   
> -	cri_resp = alloc_smp_resp(CRI_RESP_SIZE);
> +	cri_resp = alloc_smp_resp(CRI_RESP_SIZE, align);
>   	if (!cri_resp) {
>   		kfree(cri_req);
>   		return -ENOMEM;
> @@ -1689,10 +1702,12 @@ int sas_discover_root_expander(struct domain_device *dev)
>   static int sas_get_phy_discover(struct domain_device *dev,
>   				int phy_id, struct smp_resp *disc_resp)
>   {
> -	int res;
> +	int res, align;
>   	u8 *disc_req;
>   
> -	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
>   	if (!disc_req)
>   		return -ENOMEM;
>   
> @@ -1715,10 +1730,12 @@ static int sas_get_phy_discover(struct domain_device *dev,
>   static int sas_get_phy_change_count(struct domain_device *dev,
>   				    int phy_id, int *pcc)
>   {
> -	int res;
> +	int res, align;
>   	struct smp_resp *disc_resp;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp)
>   		return -ENOMEM;
>   
> @@ -1733,11 +1750,13 @@ static int sas_get_phy_change_count(struct domain_device *dev,
>   static int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
>   				    u8 *sas_addr, enum sas_device_type *type)
>   {
> -	int res;
> +	int res, align;
>   	struct smp_resp *disc_resp;
>   	struct discover_resp *dr;
>   
> -	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
>   	if (!disc_resp)
>   		return -ENOMEM;
>   	dr = &disc_resp->disc;
> @@ -1787,15 +1806,17 @@ static int sas_find_bcast_phy(struct domain_device *dev, int *phy_id,
>   
>   static int sas_get_ex_change_count(struct domain_device *dev, int *ecc)
>   {
> -	int res;
> +	int res, align;
>   	u8  *rg_req;
>   	struct smp_resp  *rg_resp;
>   
> -	rg_req = alloc_smp_req(RG_REQ_SIZE);
> +	align = dma_get_cache_alignment(&dev->phy->dev);
> +
> +	rg_req = alloc_smp_req(RG_REQ_SIZE, align);
>   	if (!rg_req)
>   		return -ENOMEM;
>   
> -	rg_resp = alloc_smp_resp(RG_RESP_SIZE);
> +	rg_resp = alloc_smp_resp(RG_RESP_SIZE, align);
>   	if (!rg_resp) {
>   		kfree(rg_req);
>   		return -ENOMEM;

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
  2017-10-17  8:05 ` [PATCH V8 5/5] libata: Align DMA buffer " Huacai Chen
  2017-10-17  9:43   ` Sergei Shtylyov
@ 2017-10-18 13:03   ` Tejun Heo
  2017-10-19  7:52       ` Matt Redfearn
  1 sibling, 1 reply; 26+ messages in thread
From: Tejun Heo @ 2017-10-18 13:03 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	linux-ide, stable

On Tue, Oct 17, 2017 at 04:05:42PM +0800, Huacai Chen wrote:
> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
> and a kernel structure share a same cache line, and if the kernel
> structure has dirty data, cache_invalidate (no writeback) will cause
> data corruption.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Huacai Chen <chenhc@lemote.com>
> ---
>  drivers/ata/libata-core.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
> index ee4c1ec..e134955 100644
> --- a/drivers/ata/libata-core.c
> +++ b/drivers/ata/libata-core.c
> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>  unsigned int ata_do_dev_read_id(struct ata_device *dev,
>  					struct ata_taskfile *tf, u16 *id)
>  {
> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
> +	u16 *devid;
> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
> +
> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
> +	else {
> +		devid = kmalloc(size, GFP_KERNEL);
> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
> +		memcpy(id, devid, size);
> +		kfree(devid);
> +	}
> +
> +	return res;

Hmm... I think it'd be a lot better to ensure that the buffers are
aligned properly to begin with.  There are only two buffers which are
used for id reading - ata_port->sector_buf and ata_device->id.  Both
are embedded arrays but making them separately allocated aligned
buffers shouldn't be difficult.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
  2017-10-17  8:05 ` Huacai Chen
@ 2017-10-18 17:23   ` Mark Greer
  -1 siblings, 0 replies; 26+ messages in thread
From: Mark Greer @ 2017-10-18 17:23 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable, Michael S . Tsirkin, Pawel Osciak,
	Kyungmin Park, Michael Chan, Benjamin Herrenschmidt, Ivan

On Tue, Oct 17, 2017 at 04:05:38PM +0800, Huacai Chen wrote:
> Make dma_get_cache_alignment() to accept a 'dev' argument. As a result,
> it can return different alignments due to different devices' I/O cache
> coherency.
> 
> Currently, ARM/ARM64 and MIPS support coherent & noncoherent devices
> co-exist. This may be extended in the future, so add a new function
> pointer (i.e, get_cache_alignment) in 'struct dma_map_ops' as a generic
> solution.

Hi Huacai.

> diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
> index 67ffecc..c708457 100644
> --- a/drivers/tty/serial/mpsc.c
> +++ b/drivers/tty/serial/mpsc.c
> @@ -81,19 +81,19 @@
>   * Number of Tx & Rx descriptors must be powers of 2.
>   */
>  #define	MPSC_RXR_ENTRIES	32
> -#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
> +#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)

I would much prefer that you add a parameter to the macro to avoid forcing
a non-flexible and non-obvious variable definition wherever it is used.
What I mean is something like:

#define MPSC_RXRE_SIZE(d)		dma_get_cache_alignment(d)

Similarly for all of the other macros and where they're used.

Thanks,

Mark
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
@ 2017-10-18 17:23   ` Mark Greer
  0 siblings, 0 replies; 26+ messages in thread
From: Mark Greer @ 2017-10-18 17:23 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable, Michael S . Tsirkin, Pawel Osciak,
	Kyungmin Park, Michael Chan, Benjamin Herrenschmidt,
	Ivan Mikhaylov, Tariq Toukan, Andy Gross, Mark A . Greer,
	Robert Baldyga

On Tue, Oct 17, 2017 at 04:05:38PM +0800, Huacai Chen wrote:
> Make dma_get_cache_alignment() to accept a 'dev' argument. As a result,
> it can return different alignments due to different devices' I/O cache
> coherency.
> 
> Currently, ARM/ARM64 and MIPS support coherent & noncoherent devices
> co-exist. This may be extended in the future, so add a new function
> pointer (i.e, get_cache_alignment) in 'struct dma_map_ops' as a generic
> solution.

Hi Huacai.

> diff --git a/drivers/tty/serial/mpsc.c b/drivers/tty/serial/mpsc.c
> index 67ffecc..c708457 100644
> --- a/drivers/tty/serial/mpsc.c
> +++ b/drivers/tty/serial/mpsc.c
> @@ -81,19 +81,19 @@
>   * Number of Tx & Rx descriptors must be powers of 2.
>   */
>  #define	MPSC_RXR_ENTRIES	32
> -#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
> +#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)

I would much prefer that you add a parameter to the macro to avoid forcing
a non-flexible and non-obvious variable definition wherever it is used.
What I mean is something like:

#define MPSC_RXRE_SIZE(d)		dma_get_cache_alignment(d)

Similarly for all of the other macros and where they're used.

Thanks,

Mark
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
  2017-10-17  9:43   ` Sergei Shtylyov
  2017-10-18  1:06       ` 陈华才
@ 2017-10-18 19:54     ` Alan Cox
  1 sibling, 0 replies; 26+ messages in thread
From: Alan Cox @ 2017-10-18 19:54 UTC (permalink / raw)
  To: Sergei Shtylyov
  Cc: Huacai Chen, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable

>     This function is called only for the PIO mode commands, so I doubt this is 
> necessary...

That is true but there are platforms out there that issue disk level PIO
commands via DMA (or can do so). Indeed the Cyrix MediaGX could do that
in the 1990s but I never add support 8)

So I think it makes sense to allocate the buffers DMA aligned, but it
doesn't seem to explain the situation in this case and I think it would
be helpful to know what platform and ATA driver is tripping this and wny
they are the only people in the universe to have the problem.

Alan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
  2017-10-18 13:03   ` Tejun Heo
@ 2017-10-19  7:52       ` Matt Redfearn
  0 siblings, 0 replies; 26+ messages in thread
From: Matt Redfearn @ 2017-10-19  7:52 UTC (permalink / raw)
  To: Tejun Heo, Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	linux-ide, stable



On 18/10/17 14:03, Tejun Heo wrote:
> On Tue, Oct 17, 2017 at 04:05:42PM +0800, Huacai Chen wrote:
>> In non-coherent DMA mode, kernel uses cache flushing operations to
>> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
>> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
>> and a kernel structure share a same cache line, and if the kernel
>> structure has dirty data, cache_invalidate (no writeback) will cause
>> data corruption.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>>   drivers/ata/libata-core.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
>> index ee4c1ec..e134955 100644
>> --- a/drivers/ata/libata-core.c
>> +++ b/drivers/ata/libata-core.c
>> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>>   					struct ata_taskfile *tf, u16 *id)
>>   {
>> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
>> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
>> +	u16 *devid;
>> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
>> +
>> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
>> +	else {
>> +		devid = kmalloc(size, GFP_KERNEL);
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
>> +		memcpy(id, devid, size);
>> +		kfree(devid);
>> +	}
>> +
>> +	return res;
> Hmm... I think it'd be a lot better to ensure that the buffers are
> aligned properly to begin with.  There are only two buffers which are
> used for id reading - ata_port->sector_buf and ata_device->id.  Both
> are embedded arrays but making them separately allocated aligned
> buffers shouldn't be difficult.
>
> Thanks.

FWIW, I agree that the buffers used for DMA should be split out from the 
structure. We ran into this problem on MIPS last year, 
4ee34ea3a12396f35b26d90a094c75db95080baa ("libata: Align ata_device's id 
on a cacheline") partially fixed it, but likely should have also 
cacheline aligned the following devslp_timing in the struct such that we 
guarantee that members of the struct not used for DMA do not share the 
same cacheline as the DMA buffer. Not having this means that 
architectures, such as MIPS, which in some cases have to perform manual 
invalidation of DMA buffer can clobber valid adjacent data if it is in 
the same cacheline.

Thanks,
Matt

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment()
@ 2017-10-19  7:52       ` Matt Redfearn
  0 siblings, 0 replies; 26+ messages in thread
From: Matt Redfearn @ 2017-10-19  7:52 UTC (permalink / raw)
  To: Tejun Heo, Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	linux-ide, stable



On 18/10/17 14:03, Tejun Heo wrote:
> On Tue, Oct 17, 2017 at 04:05:42PM +0800, Huacai Chen wrote:
>> In non-coherent DMA mode, kernel uses cache flushing operations to
>> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
>> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
>> and a kernel structure share a same cache line, and if the kernel
>> structure has dirty data, cache_invalidate (no writeback) will cause
>> data corruption.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>>   drivers/ata/libata-core.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
>> index ee4c1ec..e134955 100644
>> --- a/drivers/ata/libata-core.c
>> +++ b/drivers/ata/libata-core.c
>> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>>   					struct ata_taskfile *tf, u16 *id)
>>   {
>> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
>> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
>> +	u16 *devid;
>> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
>> +
>> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
>> +	else {
>> +		devid = kmalloc(size, GFP_KERNEL);
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
>> +		memcpy(id, devid, size);
>> +		kfree(devid);
>> +	}
>> +
>> +	return res;
> Hmm... I think it'd be a lot better to ensure that the buffers are
> aligned properly to begin with.  There are only two buffers which are
> used for id reading - ata_port->sector_buf and ata_device->id.  Both
> are embedded arrays but making them separately allocated aligned
> buffers shouldn't be difficult.
>
> Thanks.

FWIW, I agree that the buffers used for DMA should be split out from the 
structure. We ran into this problem on MIPS last year, 
4ee34ea3a12396f35b26d90a094c75db95080baa ("libata: Align ata_device's id 
on a cacheline") partially fixed it, but likely should have also 
cacheline aligned the following devslp_timing in the struct such that we 
guarantee that members of the struct not used for DMA do not share the 
same cacheline as the DMA buffer. Not having this means that 
architectures, such as MIPS, which in some cases have to perform manual 
invalidation of DMA buffer can clobber valid adjacent data if it is in 
the same cacheline.

Thanks,
Matt

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
  2017-10-18 17:23   ` Mark Greer
@ 2017-10-19 15:09     ` Christoph Hellwig
  -1 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2017-10-19 15:09 UTC (permalink / raw)
  To: Mark Greer
  Cc: Huacai Chen, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable,
	Michael S . Tsirkin, Pawel Osciak, Kyungmin Park, Michael Chan,
	Benjamin Herrenschmidt

On Wed, Oct 18, 2017 at 10:23:36AM -0700, Mark Greer wrote:
> >  #define	MPSC_RXR_ENTRIES	32
> > -#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
> > +#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)
> 
> I would much prefer that you add a parameter to the macro to avoid forcing
> a non-flexible and non-obvious variable definition wherever it is used.
> What I mean is something like:
> 
> #define MPSC_RXRE_SIZE(d)		dma_get_cache_alignment(d)
> 
> Similarly for all of the other macros and where they're used.

Agreed.  Except for that the patch looks fine to me, though.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment()
@ 2017-10-19 15:09     ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2017-10-19 15:09 UTC (permalink / raw)
  To: Mark Greer
  Cc: Huacai Chen, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable,
	Michael S . Tsirkin, Pawel Osciak, Kyungmin Park, Michael Chan,
	Benjamin Herrenschmidt, Ivan Mikhaylov, Tariq Toukan, Andy Gross,
	Robert Baldyga

On Wed, Oct 18, 2017 at 10:23:36AM -0700, Mark Greer wrote:
> >  #define	MPSC_RXR_ENTRIES	32
> > -#define	MPSC_RXRE_SIZE		dma_get_cache_alignment()
> > +#define	MPSC_RXRE_SIZE		dma_get_cache_alignment(dma_dev)
> 
> I would much prefer that you add a parameter to the macro to avoid forcing
> a non-flexible and non-obvious variable definition wherever it is used.
> What I mean is something like:
> 
> #define MPSC_RXRE_SIZE(d)		dma_get_cache_alignment(d)
> 
> Similarly for all of the other macros and where they're used.

Agreed.  Except for that the patch looks fine to me, though.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 3/5] scsi: Align block queue to dma_get_cache_alignment()
  2017-10-17  8:05 ` [PATCH V8 3/5] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen
@ 2017-10-19 15:10   ` Christoph Hellwig
  0 siblings, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2017-10-19 15:10 UTC (permalink / raw)
  To: Huacai Chen
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable

On Tue, Oct 17, 2017 at 04:05:40PM +0800, Huacai Chen wrote:
> In non-coherent DMA mode, kernel uses cache flushing operations to
> maintain I/O coherency, so scsi's block queue should be aligned to
> ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer and a kernel structure
> share a same cache line, and if the kernel structure has dirty data,
> cache_invalidate (no writeback) will cause data corruption.

Looks fine to, and I like cleaning up the arcane 0x03 as wel.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
  2017-10-17 11:55   ` Marek Szyprowski
  2017-10-18  1:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment() 陈华才
@ 2017-10-19 15:12     ` Christoph Hellwig
  1 sibling, 0 replies; 26+ messages in thread
From: Christoph Hellwig @ 2017-10-19 15:12 UTC (permalink / raw)
  To: Marek Szyprowski
  Cc: Huacai Chen, Christoph Hellwig, Robin Murphy, Andrew Morton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, James Hogan, linux-mips,
	James E . J . Bottomley, Martin K . Petersen, linux-scsi,
	Tejun Heo, linux-ide, stable

On Tue, Oct 17, 2017 at 01:55:43PM +0200, Marek Szyprowski wrote:
> If I remember correctly, kernel guarantees that each kmalloced buffer is
> always at least aligned to the CPU cache line, so CPU cache can be
> invalidated on the allocated buffer without corrupting anything else.

Yes, from slab.h:

/*
 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
 * alignment larger than the alignment of a 64-bit integer.
 * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
 */
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
#else
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
#endif

Mips sets this for a few subarchitectures, but it seems like you need
to set it for yours as well.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
  2017-10-17  8:05 ` [PATCH V8 4/5] libsas: Align SMP req/resp " Huacai Chen
  2017-10-17 11:55   ` Marek Szyprowski
@ 2017-10-20  0:04     ` kbuild test robot
  1 sibling, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2017-10-20  0:04 UTC (permalink / raw)
  Cc: kbuild-all, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide,
	Huacai Chen, stable

[-- Attachment #1: Type: text/plain, Size: 9547 bytes --]

Hi Huacai,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.14-rc5 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Huacai-Chen/dma-mapping-Rework-dma_get_cache_alignment/20171020-050317
config: um-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um 

All errors (new ones prefixed by >>):

   drivers/scsi/libsas/sas_expander.c: In function 'sas_ex_phy_discover':
>> drivers/scsi/libsas/sas_expander.c:410:10: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration]
     align = dma_get_cache_alignment(&dev->phy->dev);
             ^~~~~~~~~~~~~~~~~~~~~~~
   Cyclomatic Complexity 5 include/linux/compiler.h:__write_once_size
   Cyclomatic Complexity 1 arch/x86/include/asm/atomic.h:atomic_set
   Cyclomatic Complexity 2 arch/x86/include/asm/bitops.h:set_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:constant_test_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:fls64
   Cyclomatic Complexity 1 arch/x86/include/uapi/asm/swab.h:__arch_swab64
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab16
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab64
   Cyclomatic Complexity 1 include/linux/log2.h:__ilog2_u64
   Cyclomatic Complexity 1 include/linux/list.h:INIT_LIST_HEAD
   Cyclomatic Complexity 2 include/linux/list.h:__list_add
   Cyclomatic Complexity 1 include/linux/list.h:list_add_tail
   Cyclomatic Complexity 1 include/linux/list.h:__list_del
   Cyclomatic Complexity 2 include/linux/list.h:__list_del_entry
   Cyclomatic Complexity 1 include/linux/list.h:list_del
   Cyclomatic Complexity 1 include/asm-generic/getorder.h:__get_order
   Cyclomatic Complexity 1 include/linux/spinlock.h:spinlock_check
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_lock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irqrestore
   Cyclomatic Complexity 1 include/linux/refcount.h:refcount_set
   Cyclomatic Complexity 28 include/linux/slab.h:kmalloc_index
   Cyclomatic Complexity 68 include/linux/slab.h:kmalloc_large
   Cyclomatic Complexity 5 include/linux/slab.h:kmalloc
   Cyclomatic Complexity 1 include/linux/slab.h:kzalloc
   Cyclomatic Complexity 1 include/linux/kref.h:kref_init
   Cyclomatic Complexity 1 include/linux/kref.h:kref_get
   Cyclomatic Complexity 2 include/linux/kref.h:kref_put
   Cyclomatic Complexity 1 include/scsi/scsi.h:scsi_to_u32
   Cyclomatic Complexity 1 include/scsi/sas_ata.h:dev_is_sata
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_internal.h:sas_fill_in_rphy
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_add_parent_port
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_alloc_device
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_internal.h:sas_put_device
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:alloc_smp_req
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:alloc_smp_resp
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_route_char
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:to_dev_type
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:dev_type_flutter
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_print_parent_topology_bug
   Cyclomatic Complexity 17 drivers/scsi/libsas/sas_expander.c:smp_execute_task_sg
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:smp_execute_task
   Cyclomatic Complexity 21 drivers/scsi/libsas/sas_expander.c:sas_configure_present
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_get_phy_change_count
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_phy
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_get_ex_change_count
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_timedout
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_done
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:ex_assign_report_general
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_check_eeds
   Cyclomatic Complexity 23 drivers/scsi/libsas/sas_expander.c:sas_check_parent_topology
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_set
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_configure_phy
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_parent
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_configure_routing
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_disable_routing
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_find_sub_addr
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_attached_dev
   Cyclomatic Complexity 35 drivers/scsi/libsas/sas_expander.c:sas_set_ex_phy
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover_helper
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_ex_general
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:ex_assign_manuf_info
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_manuf_info
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_get_linkrate
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_join_wide_port
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_dev_present_in_domain
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_end_dev
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_unregister_ex_tree
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_unregister_devs_sas_addr
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_dev
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_to_ata
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_expander_discover
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_smp_phy_control
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_phy
   Cyclomatic Complexity 13 drivers/scsi/libsas/sas_expander.c:sas_check_ex_subtractive_boundary
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_discover_expander
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_expander
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_port
   Cyclomatic Complexity 14 drivers/scsi/libsas/sas_expander.c:sas_check_level_subtractive_boundary
   Cyclomatic Complexity 40 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_dev
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_devices
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_ex_level_discovery
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_ex_bfs_disc
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root_level
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_discover_new
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_rediscover_dev
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_rediscover
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_smp_get_phy_events
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_root_expander
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_revalidate_domain
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_smp_handler
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:_GLOBAL__sub_I_65535_0_sas_ex_to_ata
   cc1: some warnings being treated as errors

vim +/dma_get_cache_alignment +410 drivers/scsi/libsas/sas_expander.c

   402	
   403	int sas_ex_phy_discover(struct domain_device *dev, int single)
   404	{
   405		struct expander_device *ex = &dev->ex_dev;
   406		int  res = 0, align;
   407		u8   *disc_req;
   408		u8   *disc_resp;
   409	
 > 410		align = dma_get_cache_alignment(&dev->phy->dev);
   411	
   412		disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
   413		if (!disc_req)
   414			return -ENOMEM;
   415	
   416		disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
   417		if (!disc_resp) {
   418			kfree(disc_req);
   419			return -ENOMEM;
   420		}
   421	
   422		disc_req[1] = SMP_DISCOVER;
   423	
   424		if (0 <= single && single < ex->num_phys) {
   425			res = sas_ex_phy_discover_helper(dev, disc_req, disc_resp, single);
   426		} else {
   427			int i;
   428	
   429			for (i = 0; i < ex->num_phys; i++) {
   430				res = sas_ex_phy_discover_helper(dev, disc_req,
   431								 disc_resp, i);
   432				if (res)
   433					goto out_err;
   434			}
   435		}
   436	out_err:
   437		kfree(disc_resp);
   438		kfree(disc_req);
   439		return res;
   440	}
   441	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 19622 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
@ 2017-10-20  0:04     ` kbuild test robot
  0 siblings, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2017-10-20  0:04 UTC (permalink / raw)
  To: Huacai Chen
  Cc: kbuild-all, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide,
	Huacai Chen, stable

[-- Attachment #1: Type: text/plain, Size: 9547 bytes --]

Hi Huacai,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.14-rc5 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Huacai-Chen/dma-mapping-Rework-dma_get_cache_alignment/20171020-050317
config: um-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um 

All errors (new ones prefixed by >>):

   drivers/scsi/libsas/sas_expander.c: In function 'sas_ex_phy_discover':
>> drivers/scsi/libsas/sas_expander.c:410:10: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration]
     align = dma_get_cache_alignment(&dev->phy->dev);
             ^~~~~~~~~~~~~~~~~~~~~~~
   Cyclomatic Complexity 5 include/linux/compiler.h:__write_once_size
   Cyclomatic Complexity 1 arch/x86/include/asm/atomic.h:atomic_set
   Cyclomatic Complexity 2 arch/x86/include/asm/bitops.h:set_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:constant_test_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:fls64
   Cyclomatic Complexity 1 arch/x86/include/uapi/asm/swab.h:__arch_swab64
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab16
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab64
   Cyclomatic Complexity 1 include/linux/log2.h:__ilog2_u64
   Cyclomatic Complexity 1 include/linux/list.h:INIT_LIST_HEAD
   Cyclomatic Complexity 2 include/linux/list.h:__list_add
   Cyclomatic Complexity 1 include/linux/list.h:list_add_tail
   Cyclomatic Complexity 1 include/linux/list.h:__list_del
   Cyclomatic Complexity 2 include/linux/list.h:__list_del_entry
   Cyclomatic Complexity 1 include/linux/list.h:list_del
   Cyclomatic Complexity 1 include/asm-generic/getorder.h:__get_order
   Cyclomatic Complexity 1 include/linux/spinlock.h:spinlock_check
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_lock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irqrestore
   Cyclomatic Complexity 1 include/linux/refcount.h:refcount_set
   Cyclomatic Complexity 28 include/linux/slab.h:kmalloc_index
   Cyclomatic Complexity 68 include/linux/slab.h:kmalloc_large
   Cyclomatic Complexity 5 include/linux/slab.h:kmalloc
   Cyclomatic Complexity 1 include/linux/slab.h:kzalloc
   Cyclomatic Complexity 1 include/linux/kref.h:kref_init
   Cyclomatic Complexity 1 include/linux/kref.h:kref_get
   Cyclomatic Complexity 2 include/linux/kref.h:kref_put
   Cyclomatic Complexity 1 include/scsi/scsi.h:scsi_to_u32
   Cyclomatic Complexity 1 include/scsi/sas_ata.h:dev_is_sata
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_internal.h:sas_fill_in_rphy
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_add_parent_port
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_alloc_device
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_internal.h:sas_put_device
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:alloc_smp_req
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:alloc_smp_resp
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_route_char
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:to_dev_type
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:dev_type_flutter
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_print_parent_topology_bug
   Cyclomatic Complexity 17 drivers/scsi/libsas/sas_expander.c:smp_execute_task_sg
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:smp_execute_task
   Cyclomatic Complexity 21 drivers/scsi/libsas/sas_expander.c:sas_configure_present
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_get_phy_change_count
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_phy
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_get_ex_change_count
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_timedout
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_done
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:ex_assign_report_general
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_check_eeds
   Cyclomatic Complexity 23 drivers/scsi/libsas/sas_expander.c:sas_check_parent_topology
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_set
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_configure_phy
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_parent
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_configure_routing
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_disable_routing
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_find_sub_addr
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_attached_dev
   Cyclomatic Complexity 35 drivers/scsi/libsas/sas_expander.c:sas_set_ex_phy
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover_helper
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_ex_general
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:ex_assign_manuf_info
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_manuf_info
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_get_linkrate
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_join_wide_port
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_dev_present_in_domain
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_end_dev
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_unregister_ex_tree
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_unregister_devs_sas_addr
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_dev
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_to_ata
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_expander_discover
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_smp_phy_control
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_phy
   Cyclomatic Complexity 13 drivers/scsi/libsas/sas_expander.c:sas_check_ex_subtractive_boundary
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_discover_expander
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_expander
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_port
   Cyclomatic Complexity 14 drivers/scsi/libsas/sas_expander.c:sas_check_level_subtractive_boundary
   Cyclomatic Complexity 40 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_dev
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_devices
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_ex_level_discovery
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_ex_bfs_disc
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root_level
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_discover_new
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_rediscover_dev
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_rediscover
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_smp_get_phy_events
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_root_expander
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_revalidate_domain
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_smp_handler
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:_GLOBAL__sub_I_65535_0_sas_ex_to_ata
   cc1: some warnings being treated as errors

vim +/dma_get_cache_alignment +410 drivers/scsi/libsas/sas_expander.c

   402	
   403	int sas_ex_phy_discover(struct domain_device *dev, int single)
   404	{
   405		struct expander_device *ex = &dev->ex_dev;
   406		int  res = 0, align;
   407		u8   *disc_req;
   408		u8   *disc_resp;
   409	
 > 410		align = dma_get_cache_alignment(&dev->phy->dev);
   411	
   412		disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
   413		if (!disc_req)
   414			return -ENOMEM;
   415	
   416		disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
   417		if (!disc_resp) {
   418			kfree(disc_req);
   419			return -ENOMEM;
   420		}
   421	
   422		disc_req[1] = SMP_DISCOVER;
   423	
   424		if (0 <= single && single < ex->num_phys) {
   425			res = sas_ex_phy_discover_helper(dev, disc_req, disc_resp, single);
   426		} else {
   427			int i;
   428	
   429			for (i = 0; i < ex->num_phys; i++) {
   430				res = sas_ex_phy_discover_helper(dev, disc_req,
   431								 disc_resp, i);
   432				if (res)
   433					goto out_err;
   434			}
   435		}
   436	out_err:
   437		kfree(disc_resp);
   438		kfree(disc_req);
   439		return res;
   440	}
   441	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 19622 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment()
@ 2017-10-20  0:04     ` kbuild test robot
  0 siblings, 0 replies; 26+ messages in thread
From: kbuild test robot @ 2017-10-20  0:04 UTC (permalink / raw)
  To: Huacai Chen
  Cc: kbuild-all, Christoph Hellwig, Marek Szyprowski, Robin Murphy,
	Andrew Morton, Fuxin Zhang, linux-kernel, Ralf Baechle,
	James Hogan, linux-mips, James E . J . Bottomley,
	Martin K . Petersen, linux-scsi, Tejun Heo, linux-ide, stable

[-- Attachment #1: Type: text/plain, Size: 9547 bytes --]

Hi Huacai,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.14-rc5 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Huacai-Chen/dma-mapping-Rework-dma_get_cache_alignment/20171020-050317
config: um-allyesconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=um 

All errors (new ones prefixed by >>):

   drivers/scsi/libsas/sas_expander.c: In function 'sas_ex_phy_discover':
>> drivers/scsi/libsas/sas_expander.c:410:10: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration]
     align = dma_get_cache_alignment(&dev->phy->dev);
             ^~~~~~~~~~~~~~~~~~~~~~~
   Cyclomatic Complexity 5 include/linux/compiler.h:__write_once_size
   Cyclomatic Complexity 1 arch/x86/include/asm/atomic.h:atomic_set
   Cyclomatic Complexity 2 arch/x86/include/asm/bitops.h:set_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:constant_test_bit
   Cyclomatic Complexity 1 arch/x86/include/asm/bitops.h:fls64
   Cyclomatic Complexity 1 arch/x86/include/uapi/asm/swab.h:__arch_swab64
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab16
   Cyclomatic Complexity 1 include/uapi/linux/swab.h:__fswab64
   Cyclomatic Complexity 1 include/linux/log2.h:__ilog2_u64
   Cyclomatic Complexity 1 include/linux/list.h:INIT_LIST_HEAD
   Cyclomatic Complexity 2 include/linux/list.h:__list_add
   Cyclomatic Complexity 1 include/linux/list.h:list_add_tail
   Cyclomatic Complexity 1 include/linux/list.h:__list_del
   Cyclomatic Complexity 2 include/linux/list.h:__list_del_entry
   Cyclomatic Complexity 1 include/linux/list.h:list_del
   Cyclomatic Complexity 1 include/asm-generic/getorder.h:__get_order
   Cyclomatic Complexity 1 include/linux/spinlock.h:spinlock_check
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_lock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irq
   Cyclomatic Complexity 1 include/linux/spinlock.h:spin_unlock_irqrestore
   Cyclomatic Complexity 1 include/linux/refcount.h:refcount_set
   Cyclomatic Complexity 28 include/linux/slab.h:kmalloc_index
   Cyclomatic Complexity 68 include/linux/slab.h:kmalloc_large
   Cyclomatic Complexity 5 include/linux/slab.h:kmalloc
   Cyclomatic Complexity 1 include/linux/slab.h:kzalloc
   Cyclomatic Complexity 1 include/linux/kref.h:kref_init
   Cyclomatic Complexity 1 include/linux/kref.h:kref_get
   Cyclomatic Complexity 2 include/linux/kref.h:kref_put
   Cyclomatic Complexity 1 include/scsi/scsi.h:scsi_to_u32
   Cyclomatic Complexity 1 include/scsi/sas_ata.h:dev_is_sata
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_internal.h:sas_fill_in_rphy
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_add_parent_port
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_internal.h:sas_alloc_device
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_internal.h:sas_put_device
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:alloc_smp_req
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:alloc_smp_resp
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_route_char
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:to_dev_type
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:dev_type_flutter
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_print_parent_topology_bug
   Cyclomatic Complexity 17 drivers/scsi/libsas/sas_expander.c:smp_execute_task_sg
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:smp_execute_task
   Cyclomatic Complexity 21 drivers/scsi/libsas/sas_expander.c:sas_configure_present
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_get_phy_change_count
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_phy
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_get_ex_change_count
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_timedout
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:smp_task_done
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:ex_assign_report_general
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_check_eeds
   Cyclomatic Complexity 23 drivers/scsi/libsas/sas_expander.c:sas_check_parent_topology
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_set
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_configure_phy
   Cyclomatic Complexity 11 drivers/scsi/libsas/sas_expander.c:sas_configure_parent
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_configure_routing
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_disable_routing
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_find_sub_addr
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_get_phy_attached_dev
   Cyclomatic Complexity 35 drivers/scsi/libsas/sas_expander.c:sas_set_ex_phy
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover_helper
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_ex_general
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:ex_assign_manuf_info
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_manuf_info
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_get_linkrate
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_join_wide_port
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_dev_present_in_domain
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_end_dev
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_unregister_ex_tree
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_unregister_devs_sas_addr
   Cyclomatic Complexity 10 drivers/scsi/libsas/sas_expander.c:sas_find_bcast_dev
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_to_ata
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_phy_discover
   Cyclomatic Complexity 3 drivers/scsi/libsas/sas_expander.c:sas_expander_discover
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_smp_phy_control
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_phy
   Cyclomatic Complexity 13 drivers/scsi/libsas/sas_expander.c:sas_check_ex_subtractive_boundary
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_discover_expander
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_expander
   Cyclomatic Complexity 6 drivers/scsi/libsas/sas_expander.c:sas_ex_disable_port
   Cyclomatic Complexity 14 drivers/scsi/libsas/sas_expander.c:sas_check_level_subtractive_boundary
   Cyclomatic Complexity 40 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_dev
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_ex_discover_devices
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_ex_level_discovery
   Cyclomatic Complexity 2 drivers/scsi/libsas/sas_expander.c:sas_ex_bfs_disc
   Cyclomatic Complexity 8 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root_level
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_bfs_by_root
   Cyclomatic Complexity 12 drivers/scsi/libsas/sas_expander.c:sas_discover_new
   Cyclomatic Complexity 22 drivers/scsi/libsas/sas_expander.c:sas_rediscover_dev
   Cyclomatic Complexity 9 drivers/scsi/libsas/sas_expander.c:sas_rediscover
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_smp_get_phy_events
   Cyclomatic Complexity 4 drivers/scsi/libsas/sas_expander.c:sas_discover_root_expander
   Cyclomatic Complexity 5 drivers/scsi/libsas/sas_expander.c:sas_ex_revalidate_domain
   Cyclomatic Complexity 7 drivers/scsi/libsas/sas_expander.c:sas_smp_handler
   Cyclomatic Complexity 1 drivers/scsi/libsas/sas_expander.c:_GLOBAL__sub_I_65535_0_sas_ex_to_ata
   cc1: some warnings being treated as errors

vim +/dma_get_cache_alignment +410 drivers/scsi/libsas/sas_expander.c

   402	
   403	int sas_ex_phy_discover(struct domain_device *dev, int single)
   404	{
   405		struct expander_device *ex = &dev->ex_dev;
   406		int  res = 0, align;
   407		u8   *disc_req;
   408		u8   *disc_resp;
   409	
 > 410		align = dma_get_cache_alignment(&dev->phy->dev);
   411	
   412		disc_req = alloc_smp_req(DISCOVER_REQ_SIZE, align);
   413		if (!disc_req)
   414			return -ENOMEM;
   415	
   416		disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE, align);
   417		if (!disc_resp) {
   418			kfree(disc_req);
   419			return -ENOMEM;
   420		}
   421	
   422		disc_req[1] = SMP_DISCOVER;
   423	
   424		if (0 <= single && single < ex->num_phys) {
   425			res = sas_ex_phy_discover_helper(dev, disc_req, disc_resp, single);
   426		} else {
   427			int i;
   428	
   429			for (i = 0; i < ex->num_phys; i++) {
   430				res = sas_ex_phy_discover_helper(dev, disc_req,
   431								 disc_resp, i);
   432				if (res)
   433					goto out_err;
   434			}
   435		}
   436	out_err:
   437		kfree(disc_resp);
   438		kfree(disc_req);
   439		return res;
   440	}
   441	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 19622 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()
  2017-10-19  7:52       ` Matt Redfearn
@ 2017-10-20  4:25         ` 陈华才
  -1 siblings, 0 replies; 26+ messages in thread
From: 陈华才 @ 2017-10-20  4:25 UTC (permalink / raw)
  To: Matt Redfearn, Tejun Heo
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, AndrewMorton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, JamesHogan, linux-mips,
	James E . J .Bottomley, Martin K . Petersen, linux-scsi,
	linux-ide

Hi, Matt,

I found that 4ee34ea3a12396f35b26d90a094c75db ("libata: Align ata_device's id on a cacheline") can resolve everything. Because the size of id[ATA_ID_WORDS] is already aligned and devslp_timing needn't to be aligned. So, In V9 of this series I will drop this patch. Why I had problems before? because I used linux-4.4.

Huacai
 
 
------------------ Original ------------------
From:  "Matt Redfearn"<matt.redfearn@mips.com>;
Date:  Thu, Oct 19, 2017 03:52 PM
To:  "Tejun Heo"<tj@kernel.org>; "Huacai Chen"<chenhc@lemote.com>; 
Cc:  "Christoph Hellwig"<hch@lst.de>; "Marek Szyprowski"<m.szyprowski@samsung.com>; "Robin Murphy"<robin.murphy@arm.com>; "AndrewMorton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<ralf@linux-mips.org>; "JamesHogan"<james.hogan@imgtec.com>; "linux-mips"<linux-mips@linux-mips.org>; "James E . J .Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "linux-ide"<linux-ide@vger.kernel.org>; "stable"<stable@vger.kernel.org>; 
Subject:  Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()

 


On 18/10/17 14:03, Tejun Heo wrote:
> On Tue, Oct 17, 2017 at 04:05:42PM +0800, Huacai Chen wrote:
>> In non-coherent DMA mode, kernel uses cache flushing operations to
>> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
>> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
>> and a kernel structure share a same cache line, and if the kernel
>> structure has dirty data, cache_invalidate (no writeback) will cause
>> data corruption.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>>   drivers/ata/libata-core.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
>> index ee4c1ec..e134955 100644
>> --- a/drivers/ata/libata-core.c
>> +++ b/drivers/ata/libata-core.c
>> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>>   					struct ata_taskfile *tf, u16 *id)
>>   {
>> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
>> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
>> +	u16 *devid;
>> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
>> +
>> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
>> +	else {
>> +		devid = kmalloc(size, GFP_KERNEL);
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
>> +		memcpy(id, devid, size);
>> +		kfree(devid);
>> +	}
>> +
>> +	return res;
> Hmm... I think it'd be a lot better to ensure that the buffers are
> aligned properly to begin with.  There are only two buffers which are
> used for id reading - ata_port->sector_buf and ata_device->id.  Both
> are embedded arrays but making them separately allocated aligned
> buffers shouldn't be difficult.
>
> Thanks.

FWIW, I agree that the buffers used for DMA should be split out from the 
structure. We ran into this problem on MIPS last year, 
4ee34ea3a12396f35b26d90a094c75db95080baa ("libata: Align ata_device's id 
on a cacheline") partially fixed it, but likely should have also 
cacheline aligned the following devslp_timing in the struct such that we 
guarantee that members of the struct not used for DMA do not share the 
same cacheline as the DMA buffer. Not having this means that 
architectures, such as MIPS, which in some cases have to perform manual 
invalidation of DMA buffer can clobber valid adjacent data if it is in 
the same cacheline.

Thanks,
Matt

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()
@ 2017-10-20  4:25         ` 陈华才
  0 siblings, 0 replies; 26+ messages in thread
From: 陈华才 @ 2017-10-20  4:25 UTC (permalink / raw)
  To: Matt Redfearn, Tejun Heo
  Cc: Christoph Hellwig, Marek Szyprowski, Robin Murphy, AndrewMorton,
	Fuxin Zhang, linux-kernel, Ralf Baechle, JamesHogan, linux-mips,
	James E . J .Bottomley, Martin K . Petersen, linux-scsi,
	linux-ide, stable

Hi, Matt,

I found that 4ee34ea3a12396f35b26d90a094c75db ("libata: Align ata_device's id on a cacheline") can resolve everything. Because the size of id[ATA_ID_WORDS] is already aligned and devslp_timing needn't to be aligned. So, In V9 of this series I will drop this patch. Why I had problems before? because I used linux-4.4.

Huacai
 
 
------------------ Original ------------------
From:  "Matt Redfearn"<matt.redfearn@mips.com>;
Date:  Thu, Oct 19, 2017 03:52 PM
To:  "Tejun Heo"<tj@kernel.org>; "Huacai Chen"<chenhc@lemote.com>; 
Cc:  "Christoph Hellwig"<hch@lst.de>; "Marek Szyprowski"<m.szyprowski@samsung.com>; "Robin Murphy"<robin.murphy@arm.com>; "AndrewMorton"<akpm@linux-foundation.org>; "Fuxin Zhang"<zhangfx@lemote.com>; "linux-kernel"<linux-kernel@vger.kernel.org>; "Ralf Baechle"<ralf@linux-mips.org>; "JamesHogan"<james.hogan@imgtec.com>; "linux-mips"<linux-mips@linux-mips.org>; "James E . J .Bottomley"<jejb@linux.vnet.ibm.com>; "Martin K . Petersen"<martin.petersen@oracle.com>; "linux-scsi"<linux-scsi@vger.kernel.org>; "linux-ide"<linux-ide@vger.kernel.org>; "stable"<stable@vger.kernel.org>; 
Subject:  Re: [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment()

 


On 18/10/17 14:03, Tejun Heo wrote:
> On Tue, Oct 17, 2017 at 04:05:42PM +0800, Huacai Chen wrote:
>> In non-coherent DMA mode, kernel uses cache flushing operations to
>> maintain I/O coherency, so in ata_do_dev_read_id() the DMA buffer
>> should be aligned to ARCH_DMA_MINALIGN. Otherwise, If a DMA buffer
>> and a kernel structure share a same cache line, and if the kernel
>> structure has dirty data, cache_invalidate (no writeback) will cause
>> data corruption.
>>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Huacai Chen <chenhc@lemote.com>
>> ---
>>   drivers/ata/libata-core.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
>> index ee4c1ec..e134955 100644
>> --- a/drivers/ata/libata-core.c
>> +++ b/drivers/ata/libata-core.c
>> @@ -1833,8 +1833,19 @@ static u32 ata_pio_mask_no_iordy(const struct ata_device *adev)
>>   unsigned int ata_do_dev_read_id(struct ata_device *dev,
>>   					struct ata_taskfile *tf, u16 *id)
>>   {
>> -	return ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE,
>> -				     id, sizeof(id[0]) * ATA_ID_WORDS, 0);
>> +	u16 *devid;
>> +	int res, size = sizeof(u16) * ATA_ID_WORDS;
>> +
>> +	if (IS_ALIGNED((unsigned long)id, dma_get_cache_alignment(&dev->tdev)))
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, id, size, 0);
>> +	else {
>> +		devid = kmalloc(size, GFP_KERNEL);
>> +		res = ata_exec_internal(dev, tf, NULL, DMA_FROM_DEVICE, devid, size, 0);
>> +		memcpy(id, devid, size);
>> +		kfree(devid);
>> +	}
>> +
>> +	return res;
> Hmm... I think it'd be a lot better to ensure that the buffers are
> aligned properly to begin with.  There are only two buffers which are
> used for id reading - ata_port->sector_buf and ata_device->id.  Both
> are embedded arrays but making them separately allocated aligned
> buffers shouldn't be difficult.
>
> Thanks.

FWIW, I agree that the buffers used for DMA should be split out from the 
structure. We ran into this problem on MIPS last year, 
4ee34ea3a12396f35b26d90a094c75db95080baa ("libata: Align ata_device's id 
on a cacheline") partially fixed it, but likely should have also 
cacheline aligned the following devslp_timing in the struct such that we 
guarantee that members of the struct not used for DMA do not share the 
same cacheline as the DMA buffer. Not having this means that 
architectures, such as MIPS, which in some cases have to perform manual 
invalidation of DMA buffer can clobber valid adjacent data if it is in 
the same cacheline.

Thanks,
Matt

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2017-10-20  4:25 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-17  8:05 [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment() Huacai Chen
2017-10-17  8:05 ` Huacai Chen
2017-10-17  8:05 ` [PATCH V8 2/5] MIPS: Implement dma_map_ops::get_cache_alignment() Huacai Chen
2017-10-17  8:05 ` [PATCH V8 3/5] scsi: Align block queue to dma_get_cache_alignment() Huacai Chen
2017-10-19 15:10   ` Christoph Hellwig
2017-10-17  8:05 ` [PATCH V8 4/5] libsas: Align SMP req/resp " Huacai Chen
2017-10-17 11:55   ` Marek Szyprowski
2017-10-18  1:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp todma_get_cache_alignment() 陈华才
2017-10-19 15:12     ` [PATCH V8 4/5] libsas: Align SMP req/resp to dma_get_cache_alignment() Christoph Hellwig
2017-10-20  0:04   ` kbuild test robot
2017-10-20  0:04     ` kbuild test robot
2017-10-20  0:04     ` kbuild test robot
2017-10-17  8:05 ` [PATCH V8 5/5] libata: Align DMA buffer " Huacai Chen
2017-10-17  9:43   ` Sergei Shtylyov
2017-10-18  1:06     ` [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment() 陈华才
2017-10-18  1:06       ` 陈华才
2017-10-18 19:54     ` [PATCH V8 5/5] libata: Align DMA buffer to dma_get_cache_alignment() Alan Cox
2017-10-18 13:03   ` Tejun Heo
2017-10-19  7:52     ` Matt Redfearn
2017-10-19  7:52       ` Matt Redfearn
2017-10-20  4:25       ` [PATCH V8 5/5] libata: Align DMA buffer todma_get_cache_alignment() 陈华才
2017-10-20  4:25         ` 陈华才
2017-10-18 17:23 ` [PATCH V8 1/5] dma-mapping: Rework dma_get_cache_alignment() Mark Greer
2017-10-18 17:23   ` Mark Greer
2017-10-19 15:09   ` Christoph Hellwig
2017-10-19 15:09     ` Christoph Hellwig

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.