linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] spi: Add FSI-attached SPI controller driver
@ 2020-01-29 20:08 Eddie James
  2020-01-30 14:46 ` Mark Brown
  2020-01-30 16:37 ` Andy Shevchenko
  0 siblings, 2 replies; 17+ messages in thread
From: Eddie James @ 2020-01-29 20:08 UTC (permalink / raw)
  To: linux-spi; +Cc: linux-kernel, broonie, joel, andrew, Eddie James

There exists a set of SPI controllers on some POWER processors that may
be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
engine that can access and drive the SPI controllers. This driver would
typically be used by a baseboard management controller (BMC).

Signed-off-by: Eddie James <eajames@linux.ibm.com>
---
 MAINTAINERS           |   6 +
 drivers/spi/Kconfig   |   7 +
 drivers/spi/Makefile  |   1 +
 drivers/spi/spi-fsi.c | 547 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 561 insertions(+)
 create mode 100644 drivers/spi/spi-fsi.c

diff --git a/MAINTAINERS b/MAINTAINERS
index fc1e24c..56a50c3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6862,6 +6862,12 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-fsi.c
 F:	Documentation/devicetree/bindings/i2c/i2c-fsi.txt
 
+FSI-ATTACHED SPI DRIVER
+M:	Eddie James <eajames@linux.ibm.com>
+L:	linux-spi@vger.kernel.org
+S:	Maintained
+F:	drivers/spi/spi-fsi.c
+
 FSNOTIFY: FILESYSTEM NOTIFICATION INFRASTRUCTURE
 M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index d6ed0c3..c6fc9f1 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -264,6 +264,13 @@ config SPI_FALCON
 	  has only been tested with m25p80 type chips. The hardware has no
 	  support for other types of SPI peripherals.
 
+config SPI_FSI
+	tristate "FSI SPI driver"
+	depends on FSI
+	help
+	  This enables support for the driver for FSI bus attached SPI
+	  controllers.
+
 config SPI_FSL_LPSPI
 	tristate "Freescale i.MX LPSPI controller"
 	depends on ARCH_MXC || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 9b65ec5..d4ea24e 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -40,6 +40,7 @@ spi-dw-midpci-objs			:= spi-dw-pci.o spi-dw-mid.o
 obj-$(CONFIG_SPI_EFM32)			+= spi-efm32.o
 obj-$(CONFIG_SPI_EP93XX)		+= spi-ep93xx.o
 obj-$(CONFIG_SPI_FALCON)		+= spi-falcon.o
+obj-$(CONFIG_SPI_FSI)			+= spi-fsi.o
 obj-$(CONFIG_SPI_FSL_CPM)		+= spi-fsl-cpm.o
 obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
new file mode 100644
index 0000000..4be0a0f
--- /dev/null
+++ b/drivers/spi/spi-fsi.c
@@ -0,0 +1,547 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) IBM Corporation 2020
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/fsi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+#define FSI_ENGID_SPI			0x23
+
+#define FSI2SPI_DATA0			0x00
+#define FSI2SPI_DATA1			0x04
+#define FSI2SPI_CMD			0x08
+#define  FSI2SPI_CMD_WRITE		 BIT(31)
+#define FSI2SPI_RESET			0x18
+#define FSI2SPI_STATUS			0x1c
+#define  FSI2SPI_STATUS_ANY_ERROR	 BIT(31)
+#define FSI2SPI_IRQ			0x20
+
+#define SPI_FSI_BASE			0x70000
+#define SPI_FSI_MAX_TRANSFER_SIZE	2048
+
+#define SPI_FSI_ERROR			0x0
+#define SPI_FSI_COUNTER_CFG		0x1
+#define  SPI_FSI_COUNTER_CFG_LOOPS(x)	 (((u64)(x) & 0xffULL) << 32)
+#define SPI_FSI_CFG1			0x2
+#define SPI_FSI_CLOCK_CFG		0x3
+#define  SPI_FSI_CLOCK_CFG_MM_ENABLE	 BIT_ULL(32)
+#define  SPI_FSI_CLOCK_CFG_ECC_DISABLE	 (BIT_ULL(35) | BIT_ULL(33))
+#define  SPI_FSI_CLOCK_CFG_RESET1	 (BIT_ULL(36) | BIT_ULL(38))
+#define  SPI_FSI_CLOCK_CFG_RESET2	 (BIT_ULL(37) | BIT_ULL(39))
+#define  SPI_FSI_CLOCK_CFG_MODE		 (BIT_ULL(41) | BIT_ULL(42))
+#define  SPI_FSI_CLOCK_CFG_SCK_RECV_DEL	 GENMASK_ULL(51, 44)
+#define   SPI_FSI_CLOCK_CFG_SCK_NO_DEL	  BIT_ULL(51)
+#define  SPI_FSI_CLOCK_CFG_SCK_DIV	 GENMASK_ULL(63, 52)
+#define SPI_FSI_MMAP			0x4
+#define SPI_FSI_DATA_TX			0x5
+#define SPI_FSI_DATA_RX			0x6
+#define SPI_FSI_SEQUENCE		0x7
+#define  SPI_FSI_SEQUENCE_STOP		 0x00
+#define  SPI_FSI_SEQUENCE_SEL_SLAVE(x)	 (0x10 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_SHIFT_OUT(x)	 (0x30 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_SHIFT_IN(x)	 (0x40 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_COPY_DATA_TX	 0xc0
+#define  SPI_FSI_SEQUENCE_BRANCH(x)	 (0xe0 | ((x) & 0xf))
+#define SPI_FSI_STATUS			0x8
+#define  SPI_FSI_STATUS_ERROR		 \
+	(GENMASK_ULL(31, 21) | GENMASK_ULL(15, 12))
+#define  SPI_FSI_STATUS_SEQ_STATE	 GENMASK_ULL(55, 48)
+#define   SPI_FSI_STATUS_SEQ_STATE_IDLE	  BIT_ULL(48)
+#define  SPI_FSI_STATUS_TDR_UNDERRUN	 BIT_ULL(57)
+#define  SPI_FSI_STATUS_TDR_OVERRUN	 BIT_ULL(58)
+#define  SPI_FSI_STATUS_TDR_FULL	 BIT_ULL(59)
+#define  SPI_FSI_STATUS_RDR_UNDERRUN	 BIT_ULL(61)
+#define  SPI_FSI_STATUS_RDR_OVERRUN	 BIT_ULL(62)
+#define  SPI_FSI_STATUS_RDR_FULL	 BIT_ULL(63)
+#define  SPI_FSI_STATUS_ANY_ERROR	 \
+	(SPI_FSI_STATUS_ERROR | SPI_FSI_STATUS_TDR_UNDERRUN | \
+	 SPI_FSI_STATUS_TDR_OVERRUN | SPI_FSI_STATUS_RDR_UNDERRUN | \
+	 SPI_FSI_STATUS_RDR_OVERRUN)
+#define SPI_FSI_PORT_CTRL		0x9
+
+struct fsi_spi {
+	struct device *dev;
+	struct fsi_device *fsi;
+	u32 base;
+};
+
+struct fsi_spi_sequence {
+	int bit;
+	u64 data;
+};
+
+static int fsi_spi_check_status(struct fsi_spi *ctx)
+{
+	int rc;
+	u32 sts;
+	__be32 sts_be;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_STATUS, &sts_be,
+			     sizeof(sts_be));
+	if (rc)
+		return rc;
+
+	sts = be32_to_cpu(sts_be);
+	if (sts & FSI2SPI_STATUS_ANY_ERROR) {
+		dev_err(ctx->dev, "Error with FSI2SPI interface: %08x.\n", sts);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
+{
+	int rc;
+	__be32 cmd_be;
+	__be32 data_be;
+
+	*value = 0ULL;
+
+	cmd_be = cpu_to_be32(offset + ctx->base);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+	if (rc)
+		return rc;
+
+	rc = fsi_spi_check_status(ctx);
+	if (rc)
+		return rc;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA0, &data_be,
+			     sizeof(data_be));
+	if (rc)
+		return rc;
+
+	*value |= (u64)be32_to_cpu(data_be) << 32;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA1, &data_be,
+			     sizeof(data_be));
+	if (rc)
+		return rc;
+
+	*value |= (u64)be32_to_cpu(data_be);
+	dev_dbg(ctx->dev, "Read %02x[%016llx].\n", offset, *value);
+
+	return 0;
+}
+
+static int fsi_spi_write_reg(struct fsi_spi *ctx, u32 offset, u64 value)
+{
+	int rc;
+	__be32 cmd_be;
+	__be32 data_be;
+
+	dev_dbg(ctx->dev, "Write %02x[%016llx].\n", offset, value);
+
+	data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA0, &data_be,
+			      sizeof(data_be));
+	if (rc)
+		return rc;
+
+	data_be = cpu_to_be32(value & 0xFFFFFFFF);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA1, &data_be,
+			      sizeof(data_be));
+	if (rc)
+		return rc;
+
+	cmd_be = cpu_to_be32((offset + ctx->base) | FSI2SPI_CMD_WRITE);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+	if (rc)
+		return rc;
+
+	return fsi_spi_check_status(ctx);
+}
+
+static int fsi_spi_data_in(u64 in, u8 *rx, int len)
+{
+	int i;
+	int num_bytes = len > 8 ? 8 : len;
+
+	for (i = 0; i < num_bytes; ++i)
+		rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
+
+	return num_bytes;
+}
+
+static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
+{
+	int i;
+	int num_bytes = len > 8 ? 8 : len;
+
+	*out = 0ULL;
+
+	for (i = 0; i < num_bytes; ++i)
+		*out |= (u64)tx[i] << (8 * (8 - (i + 1)));
+
+	return num_bytes;
+}
+
+static int fsi_spi_reset(struct fsi_spi *ctx)
+{
+	int rc;
+
+	dev_info(ctx->dev, "Resetting SPI controller.\n");
+
+	rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+			       SPI_FSI_CLOCK_CFG_RESET1);
+	if (rc)
+		return rc;
+
+	rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+			       SPI_FSI_CLOCK_CFG_RESET2);
+	return rc;
+}
+
+static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
+{
+	seq->data |= (u64)val << seq->bit;
+	seq->bit -= 8;
+
+	return ((64 - seq->bit) / 8) - 2;
+}
+
+static void fsi_spi_sequence_init(struct fsi_spi_sequence *seq)
+{
+	seq->bit = 56;
+	seq->data = 0ULL;
+}
+
+static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
+				     struct fsi_spi_sequence *seq,
+				     struct spi_transfer *transfer)
+{
+	int loops = 1;
+	int idx = 0;
+	int rc;
+	u8 len;
+	u8 rem = 0;
+
+	if (transfer->len > 8) {
+		loops = transfer->len / 8;
+		rem = transfer->len - (loops * 8);
+		len = 8;
+	} else {
+		len = transfer->len;
+	}
+
+	if (transfer->tx_buf) {
+		idx = fsi_spi_sequence_add(seq,
+					   SPI_FSI_SEQUENCE_SHIFT_OUT(len));
+		if (rem)
+			rem = SPI_FSI_SEQUENCE_SHIFT_OUT(rem);
+	} else if (transfer->rx_buf) {
+		idx = fsi_spi_sequence_add(seq,
+					   SPI_FSI_SEQUENCE_SHIFT_IN(len));
+		if (rem)
+			rem = SPI_FSI_SEQUENCE_SHIFT_IN(rem);
+	} else {
+		return -EINVAL;
+	}
+
+	if (loops > 1) {
+		fsi_spi_sequence_add(seq, SPI_FSI_SEQUENCE_BRANCH(idx));
+
+		if (rem)
+			fsi_spi_sequence_add(seq, rem);
+
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
+				       SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
+		if (rc) {
+			/* Ensure error returns < 0 in this case. */
+			if (rc > 0)
+				rc = -rc;
+
+			return rc;
+		}
+
+		return loops;
+	}
+
+	return 0;
+}
+
+static int fsi_spi_transfer_data(struct fsi_spi *ctx,
+				 struct spi_transfer *transfer)
+{
+	int rc = 0;
+	u64 status = 0ULL;
+
+	if (transfer->tx_buf) {
+		int nb;
+		int sent = 0;
+		u64 out = 0ULL;
+		const u8 *tx = transfer->tx_buf;
+
+		while (transfer->len > sent) {
+			nb = fsi_spi_data_out(&out, &tx[sent],
+					      (int)transfer->len - sent);
+
+			rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, out);
+			if (rc)
+				return rc;
+
+			do {
+				rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+						      &status);
+				if (rc)
+					return rc;
+
+				if (status & SPI_FSI_STATUS_ANY_ERROR) {
+					rc = fsi_spi_reset(ctx);
+					if (rc)
+						return rc;
+
+					return -EREMOTEIO;
+				}
+			} while (status & SPI_FSI_STATUS_TDR_FULL);
+
+			sent += nb;
+		}
+	} else if (transfer->rx_buf) {
+		int recv = 0;
+		u64 in = 0ULL;
+		u8 *rx = transfer->rx_buf;
+
+		while (transfer->len > recv) {
+			do {
+				rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+						      &status);
+				if (rc)
+					return rc;
+
+				if (status & SPI_FSI_STATUS_ANY_ERROR) {
+					rc = fsi_spi_reset(ctx);
+					if (rc)
+						return rc;
+
+					return -EREMOTEIO;
+				}
+			} while (!(status & SPI_FSI_STATUS_RDR_FULL));
+
+			rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in);
+			if (rc)
+				return rc;
+
+			recv += fsi_spi_data_in(in, &rx[recv],
+						(int)transfer->len - recv);
+		}
+	}
+
+	return 0;
+}
+
+static int fsi_spi_transfer_init(struct fsi_spi *ctx)
+{
+	int rc;
+	u64 seq_state;
+	u64 clock_cfg = 0ULL;
+	u64 status = 0ULL;
+	u64 wanted_clock_cfg = SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+		SPI_FSI_CLOCK_CFG_SCK_NO_DEL |
+		FIELD_PREP(SPI_FSI_CLOCK_CFG_SCK_DIV, 4);
+
+	do {
+		rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
+		if (rc)
+			return rc;
+
+		if (status & (SPI_FSI_STATUS_ANY_ERROR |
+			      SPI_FSI_STATUS_TDR_FULL |
+			      SPI_FSI_STATUS_RDR_FULL)) {
+			rc = fsi_spi_reset(ctx);
+			if (rc)
+				return rc;
+
+			continue;
+		}
+
+		seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
+	} while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
+
+	rc = fsi_spi_read_reg(ctx, SPI_FSI_CLOCK_CFG, &clock_cfg);
+	if (rc)
+		return rc;
+
+	if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
+			  SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+			  SPI_FSI_CLOCK_CFG_MODE |
+			  SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
+			  SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+				       wanted_clock_cfg);
+
+	return rc;
+}
+
+static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
+					struct spi_message *mesg)
+{
+	int rc = 0;
+	u8 seq_slave = SPI_FSI_SEQUENCE_SEL_SLAVE(mesg->spi->chip_select + 1);
+	struct spi_transfer *transfer;
+	struct fsi_spi *ctx = spi_controller_get_devdata(ctlr);
+
+	list_for_each_entry(transfer, &mesg->transfers, transfer_list) {
+		struct fsi_spi_sequence seq;
+		struct spi_transfer *next = NULL;
+
+		/* Sequencer must do shift out (tx) first. */
+		if (!transfer->tx_buf ||
+		    transfer->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+			rc = -EINVAL;
+			goto error;
+		}
+
+		dev_dbg(ctx->dev, "Start tx of %d bytes.\n", transfer->len);
+
+		rc = fsi_spi_transfer_init(ctx);
+		if (rc < 0)
+			goto error;
+
+		fsi_spi_sequence_init(&seq);
+		fsi_spi_sequence_add(&seq, seq_slave);
+
+		rc = fsi_spi_sequence_transfer(ctx, &seq, transfer);
+		if (rc < 0)
+			goto error;
+
+		if (!list_is_last(&transfer->transfer_list,
+				  &mesg->transfers)) {
+			next = list_next_entry(transfer, transfer_list);
+
+			/* Sequencer can only do shift in (rx) after tx. */
+			if (next->rx_buf) {
+				if (next->len > SPI_FSI_MAX_TRANSFER_SIZE) {
+					rc = -EINVAL;
+					goto error;
+				}
+
+				dev_dbg(ctx->dev, "Sequence rx of %d bytes.\n",
+					next->len);
+
+				rc = fsi_spi_sequence_transfer(ctx, &seq,
+							       next);
+				if (rc < 0)
+					goto error;
+			} else {
+				next = NULL;
+			}
+		}
+
+		fsi_spi_sequence_add(&seq, SPI_FSI_SEQUENCE_SEL_SLAVE(0));
+
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_SEQUENCE, seq.data);
+		if (rc)
+			goto error;
+
+		rc = fsi_spi_transfer_data(ctx, transfer);
+		if (rc)
+			goto error;
+
+		if (next) {
+			rc = fsi_spi_transfer_data(ctx, next);
+			if (rc)
+				goto error;
+
+			transfer = next;
+		}
+	}
+
+error:
+	mesg->status = rc;
+	spi_finalize_current_message(ctlr);
+
+	return rc;
+}
+
+static size_t fsi_spi_max_transfer_size(struct spi_device *spi)
+{
+	return SPI_FSI_MAX_TRANSFER_SIZE;
+}
+
+static int fsi_spi_probe(struct device *dev)
+{
+	int rc;
+	u32 root_ctrl_8;
+	struct device_node *np;
+	int num_controllers_registered = 0;
+	struct fsi_device *fsi = to_fsi_dev(dev);
+
+	rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,
+			    sizeof(root_ctrl_8));
+	if (rc)
+		return rc;
+
+	if (!root_ctrl_8) {
+		dev_dbg(dev, "SPI MUX not set, aborting probe.\n");
+		return -ENODEV;
+	}
+
+	for_each_available_child_of_node(dev->of_node, np) {
+		u32 base;
+		struct fsi_spi *ctx;
+		struct spi_controller *ctlr;
+
+		if (of_property_read_u32(np, "reg", &base))
+			continue;
+
+		ctlr = spi_alloc_master(dev, sizeof(*ctx));
+		if (!ctlr)
+			break;
+
+		ctlr->dev.of_node = np;
+		ctlr->num_chipselect = of_get_available_child_count(np) ?: 1;
+		ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+		ctlr->max_transfer_size = fsi_spi_max_transfer_size;
+		ctlr->transfer_one_message = fsi_spi_transfer_one_message;
+
+		ctx = spi_controller_get_devdata(ctlr);
+		ctx->dev = &ctlr->dev;
+		ctx->fsi = fsi;
+		ctx->base = base + SPI_FSI_BASE;
+
+		rc = devm_spi_register_controller(dev, ctlr);
+		if (rc)
+			spi_controller_put(ctlr);
+		else
+			num_controllers_registered++;
+	}
+
+	if (!num_controllers_registered)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int fsi_spi_remove(struct device *dev)
+{
+	return 0;
+}
+
+static const struct fsi_device_id fsi_spi_ids[] = {
+	{ FSI_ENGID_SPI, FSI_VERSION_ANY },
+	{ }
+};
+
+static struct fsi_driver fsi_spi_driver = {
+	.id_table = fsi_spi_ids,
+	.drv = {
+		.name = "spi-fsi",
+		.bus = &fsi_bus_type,
+		.probe = fsi_spi_probe,
+		.remove = fsi_spi_remove,
+	},
+};
+
+module_fsi_driver(fsi_spi_driver);
+
+MODULE_AUTHOR("Eddie James <eajames@linux.ibm.com>");
+MODULE_DESCRIPTION("FSI attached SPI controller");
+MODULE_LICENSE("GPL");
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-01-29 20:08 [PATCH] spi: Add FSI-attached SPI controller driver Eddie James
@ 2020-01-30 14:46 ` Mark Brown
  2020-01-30 15:32   ` Eddie James
  2020-01-30 16:37 ` Andy Shevchenko
  1 sibling, 1 reply; 17+ messages in thread
From: Mark Brown @ 2020-01-30 14:46 UTC (permalink / raw)
  To: Eddie James; +Cc: linux-spi, linux-kernel, joel, andrew

[-- Attachment #1: Type: text/plain, Size: 1133 bytes --]

On Wed, Jan 29, 2020 at 02:08:24PM -0600, Eddie James wrote:

Overall this looks good, some comments below but they're all fairly
minor.

> +++ b/drivers/spi/spi-fsi.c
> @@ -0,0 +1,547 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) IBM Corporation 2020
> + */

Please make the entire comment a C++ one so things look more
intentional.

> +
> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
> +{
> +	int i;
> +	int num_bytes = len > 8 ? 8 : len;

Please write normal conditional statements to improve legibility, the
ternery operator isn't really needed here.

> +static int fsi_spi_reset(struct fsi_spi *ctx)
> +{
> +	int rc;
> +
> +	dev_info(ctx->dev, "Resetting SPI controller.\n");

This should be lowered to dev_dbg() at most, it's not really adding
anything otherwise.

> +static int fsi_spi_remove(struct device *dev)
> +{
> +	return 0;
> +}

Remove empty functions, if they can safely be empty then it should be
possible to omit them.

> +static const struct fsi_device_id fsi_spi_ids[] = {
> +	{ FSI_ENGID_SPI, FSI_VERSION_ANY },
> +	{ }
> +};

This needs a MODULE_DEVICE_TABLE annotation.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-01-30 14:46 ` Mark Brown
@ 2020-01-30 15:32   ` Eddie James
  0 siblings, 0 replies; 17+ messages in thread
From: Eddie James @ 2020-01-30 15:32 UTC (permalink / raw)
  To: Mark Brown; +Cc: linux-spi, linux-kernel, joel, andrew


On 1/30/20 8:46 AM, Mark Brown wrote:
> On Wed, Jan 29, 2020 at 02:08:24PM -0600, Eddie James wrote:
>
> Overall this looks good, some comments below but they're all fairly
> minor.


Thanks for the quick review! I'll fix what you've suggested below.

Thanks,

Eddie


>
>> +++ b/drivers/spi/spi-fsi.c
>> @@ -0,0 +1,547 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Copyright (C) IBM Corporation 2020
>> + */
> Please make the entire comment a C++ one so things look more
> intentional.
>
>> +
>> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
>> +{
>> +	int i;
>> +	int num_bytes = len > 8 ? 8 : len;
> Please write normal conditional statements to improve legibility, the
> ternery operator isn't really needed here.
>
>> +static int fsi_spi_reset(struct fsi_spi *ctx)
>> +{
>> +	int rc;
>> +
>> +	dev_info(ctx->dev, "Resetting SPI controller.\n");
> This should be lowered to dev_dbg() at most, it's not really adding
> anything otherwise.
>
>> +static int fsi_spi_remove(struct device *dev)
>> +{
>> +	return 0;
>> +}
> Remove empty functions, if they can safely be empty then it should be
> possible to omit them.
>
>> +static const struct fsi_device_id fsi_spi_ids[] = {
>> +	{ FSI_ENGID_SPI, FSI_VERSION_ANY },
>> +	{ }
>> +};
> This needs a MODULE_DEVICE_TABLE annotation.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-01-29 20:08 [PATCH] spi: Add FSI-attached SPI controller driver Eddie James
  2020-01-30 14:46 ` Mark Brown
@ 2020-01-30 16:37 ` Andy Shevchenko
  2020-02-03 20:33   ` Eddie James
  1 sibling, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-01-30 16:37 UTC (permalink / raw)
  To: Eddie James
  Cc: linux-spi, Linux Kernel Mailing List, Mark Brown, Joel Stanley,
	Andrew Jeffery

On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:
>
> There exists a set of SPI controllers on some POWER processors that may
> be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
> engine that can access and drive the SPI controllers. This driver would
> typically be used by a baseboard management controller (BMC).

...

> +#include <linux/bitfield.h>
> +#include <linux/bits.h>

> +#include <linux/of.h>

...

> +struct fsi_spi {
> +       struct device *dev;

Isn't fsl->dev the same?
Perhaps kernel doc to explain the difference?

> +       struct fsi_device *fsi;

> +       u32 base;
> +};

...

> +static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
> +{
> +       int rc;
> +       __be32 cmd_be;
> +       __be32 data_be;

> +       *value = 0ULL;

Usually the pattern is don't pollute output on error condition. Any
reason why you zeroing output beforehand?

> +       cmd_be = cpu_to_be32(offset + ctx->base);
> +       rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
> +       if (rc)
> +               return rc;

> +       return 0;
> +}

...

> +       data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);

Redundant & 0xff... part.

> +       data_be = cpu_to_be32(value & 0xFFFFFFFF);

Ditto.

You may use upper_32_bits() / lower_32_bits() instead.

...

> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
> +{
> +       int i;

> +       int num_bytes = len > 8 ? 8 : len;

min(len, 8);

> +       for (i = 0; i < num_bytes; ++i)
> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);

Redundant & 0xffULL part.

Isn't it NIH of get_unalinged_be64 / le64 or something similar?

> +       return num_bytes;
> +}

> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> +{

Ditto as for above function. (put_unaligned ...)

> +}

...

> +       dev_info(ctx->dev, "Resetting SPI controller.\n");

info?! Why?

> +       rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> +                              SPI_FSI_CLOCK_CFG_RESET2);
> +       return rc;

return fsi_spi_write_reg();

...

> +       return ((64 - seq->bit) / 8) - 2;

Too many parentheses.

...

> +static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
> +                                    struct fsi_spi_sequence *seq,
> +                                    struct spi_transfer *transfer)
> +{

> +       int loops = 1;
> +       int idx = 0;
> +       int rc;
> +       u8 len;
> +       u8 rem = 0;

> +       if (transfer->len > 8) {
> +               loops = transfer->len / 8;
> +               rem = transfer->len - (loops * 8);
> +               len = 8;
> +       } else {
> +               len = transfer->len;
> +       }

len = min(transfer->len, 8);

loops = transfer->len / len;
rem = transfer->len % len;

(I think compiler is clever enough to find out that the division can be avoided)

...and drop assignments in definition block.

I didn't look carefully in the implementation, but I believe there is
still room for improvement / optimization.

> +       if (loops > 1) {

> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
> +                                      SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
> +               if (rc) {

> +                       /* Ensure error returns < 0 in this case. */

I didn't get why this case is special? Why not to be consistent with
return value?

> +                       if (rc > 0)
> +                               rc = -rc;
> +
> +                       return rc;
> +               }

> +               return loops;

If we return here the amount of loops...

> +       }
> +
> +       return 0;

...why here is 0?

I think more consistency is required.

> +}

...

> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
> +                                struct spi_transfer *transfer)
> +{

Can you refactor to tx and rx parts?

> +       return 0;
> +}

...

> +       do {
> +               rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
> +               if (rc)
> +                       return rc;
> +
> +               if (status & (SPI_FSI_STATUS_ANY_ERROR |
> +                             SPI_FSI_STATUS_TDR_FULL |
> +                             SPI_FSI_STATUS_RDR_FULL)) {
> +                       rc = fsi_spi_reset(ctx);
> +                       if (rc)
> +                               return rc;
> +

> +                       continue;

I forgot if this to be infinite loop or if it's going to check
previous seq_state value. In any case this code is a bit fishy. Needs
comments / refactoring.

> +               }
> +
> +               seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
> +       } while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));

...

> +       if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
> +                         SPI_FSI_CLOCK_CFG_ECC_DISABLE |
> +                         SPI_FSI_CLOCK_CFG_MODE |
> +                         SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
> +                         SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)

> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> +                                      wanted_clock_cfg);

Missed {} ?

> +
> +       return rc;
> +}

...

> +       rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,

What is this magic for?

> +                           sizeof(root_ctrl_8));
> +       if (rc)
> +               return rc;

...

> +static int fsi_spi_remove(struct device *dev)
> +{
> +       return 0;
> +}

Why do you need this?

...

> +static struct fsi_driver fsi_spi_driver = {
> +       .id_table = fsi_spi_ids,
> +       .drv = {
> +               .name = "spi-fsi",

> +               .bus = &fsi_bus_type,

Why is it not in the module_fsi_driver() macro?

> +               .probe = fsi_spi_probe,
> +               .remove = fsi_spi_remove,
> +       },
> +};
> +
> +module_fsi_driver(fsi_spi_driver);

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-01-30 16:37 ` Andy Shevchenko
@ 2020-02-03 20:33   ` Eddie James
  2020-02-04 11:02     ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-03 20:33 UTC (permalink / raw)
  To: Andy Shevchenko, Eddie James
  Cc: linux-spi, Linux Kernel Mailing List, Mark Brown, Joel Stanley,
	Andrew Jeffery


On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:
>> There exists a set of SPI controllers on some POWER processors that may
>> be accessed through the FSI bus. Add a driver to traverse the FSI CFAM
>> engine that can access and drive the SPI controllers. This driver would
>> typically be used by a baseboard management controller (BMC).
> ...
>
>> +#include <linux/bitfield.h>
>> +#include <linux/bits.h>
>> +#include <linux/of.h>
> ...
>
>> +struct fsi_spi {
>> +       struct device *dev;
> Isn't fsl->dev the same?
> Perhaps kernel doc to explain the difference?


No, it's not the same, as dev here is the SPI controller. I'll add a 
comment.


>
>> +       struct fsi_device *fsi;
>> +       u32 base;
>> +};
> ...
>
>> +static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
>> +{
>> +       int rc;
>> +       __be32 cmd_be;
>> +       __be32 data_be;
>> +       *value = 0ULL;
> Usually the pattern is don't pollute output on error condition. Any
> reason why you zeroing output beforehand?


Well otherwise I have to store another 64 bit int and do another 
assignment at the end. This is an internal function and all the users 
below know what's happening.


>
>> +       cmd_be = cpu_to_be32(offset + ctx->base);
>> +       rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
>> +       if (rc)
>> +               return rc;
>> +       return 0;
>> +}
> ...
>
>> +       data_be = cpu_to_be32((value >> 32) & 0xFFFFFFFF);
> Redundant & 0xff... part.
>
>> +       data_be = cpu_to_be32(value & 0xFFFFFFFF);
> Ditto.
>
> You may use upper_32_bits() / lower_32_bits() instead.


OK, thanks.


>
> ...
>
>> +static int fsi_spi_data_in(u64 in, u8 *rx, int len)
>> +{
>> +       int i;
>> +       int num_bytes = len > 8 ? 8 : len;
> min(len, 8);


Sure.


>
>> +       for (i = 0; i < num_bytes; ++i)
>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> Redundant & 0xffULL part.
>
> Isn't it NIH of get_unalinged_be64 / le64 or something similar?


No, these are shift in/out operations. The read register will also have 
previous operations data in them and must be extracted with only the 
correct number of bytes.


>
>> +       return num_bytes;
>> +}
>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>> +{
> Ditto as for above function. (put_unaligned ...)
>
>> +}
> ...
>
>> +       dev_info(ctx->dev, "Resetting SPI controller.\n");
> info?! Why?
>
>> +       rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>> +                              SPI_FSI_CLOCK_CFG_RESET2);
>> +       return rc;
> return fsi_spi_write_reg();
>
> ...
>
>> +       return ((64 - seq->bit) / 8) - 2;
> Too many parentheses.


I prefer using 2 extra characters to make it much clearer at a glance.


>
> ...
>
>> +static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
>> +                                    struct fsi_spi_sequence *seq,
>> +                                    struct spi_transfer *transfer)
>> +{
>> +       int loops = 1;
>> +       int idx = 0;
>> +       int rc;
>> +       u8 len;
>> +       u8 rem = 0;
>> +       if (transfer->len > 8) {
>> +               loops = transfer->len / 8;
>> +               rem = transfer->len - (loops * 8);
>> +               len = 8;
>> +       } else {
>> +               len = transfer->len;
>> +       }
> len = min(transfer->len, 8);
>
> loops = transfer->len / len;
> rem = transfer->len % len;


Sure.


>
> (I think compiler is clever enough to find out that the division can be avoided)
>
> ...and drop assignments in definition block.
>
> I didn't look carefully in the implementation, but I believe there is
> still room for improvement / optimization.
>
>> +       if (loops > 1) {
>> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG,
>> +                                      SPI_FSI_COUNTER_CFG_LOOPS(loops - 1));
>> +               if (rc) {
>> +                       /* Ensure error returns < 0 in this case. */
> I didn't get why this case is special? Why not to be consistent with
> return value?


Sure, will fix, this was leftover from some testing.


>
>> +                       if (rc > 0)
>> +                               rc = -rc;
>> +
>> +                       return rc;
>> +               }
>> +               return loops;
> If we return here the amount of loops...
>
>> +       }
>> +
>> +       return 0;
> ...why here is 0?
>
> I think more consistency is required.


Will refactor.


>
>> +}
> ...
>
>> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
>> +                                struct spi_transfer *transfer)
>> +{
> Can you refactor to tx and rx parts?


Why?


>
>> +       return 0;
>> +}
> ...
>
>> +       do {
>> +               rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
>> +               if (rc)
>> +                       return rc;
>> +
>> +               if (status & (SPI_FSI_STATUS_ANY_ERROR |
>> +                             SPI_FSI_STATUS_TDR_FULL |
>> +                             SPI_FSI_STATUS_RDR_FULL)) {
>> +                       rc = fsi_spi_reset(ctx);
>> +                       if (rc)
>> +                               return rc;
>> +
>> +                       continue;
> I forgot if this to be infinite loop or if it's going to check
> previous seq_state value. In any case this code is a bit fishy. Needs
> comments / refactoring.


I'll add a timeout.


>
>> +               }
>> +
>> +               seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
>> +       } while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
> ...
>
>> +       if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
>> +                         SPI_FSI_CLOCK_CFG_ECC_DISABLE |
>> +                         SPI_FSI_CLOCK_CFG_MODE |
>> +                         SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
>> +                         SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
>> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>> +                                      wanted_clock_cfg);
> Missed {} ?


No? It's one line under the if.


>
>> +
>> +       return rc;
>> +}
> ...
>
>> +       rc = fsi_slave_read(fsi->slave, 0x2860, &root_ctrl_8,
> What is this magic for?


Added comment.


>
>> +                           sizeof(root_ctrl_8));
>> +       if (rc)
>> +               return rc;
> ...
>
>> +static int fsi_spi_remove(struct device *dev)
>> +{
>> +       return 0;
>> +}
> Why do you need this?


Will drop it.


Thanks for the review!

Eddie


>
> ...
>
>> +static struct fsi_driver fsi_spi_driver = {
>> +       .id_table = fsi_spi_ids,
>> +       .drv = {
>> +               .name = "spi-fsi",
>> +               .bus = &fsi_bus_type,
> Why is it not in the module_fsi_driver() macro?
>
>> +               .probe = fsi_spi_probe,
>> +               .remove = fsi_spi_remove,
>> +       },
>> +};
>> +
>> +module_fsi_driver(fsi_spi_driver);

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-03 20:33   ` Eddie James
@ 2020-02-04 11:02     ` Andy Shevchenko
  2020-02-04 16:06       ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-04 11:02 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> > On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:

...

> >> +       struct device *dev;
> > Isn't fsl->dev the same?
> > Perhaps kernel doc to explain the difference?
>
>
> No, it's not the same, as dev here is the SPI controller. I'll add a
> comment.

Why to have duplication then?

> >> +       struct fsi_device *fsi;

...

> >> +       for (i = 0; i < num_bytes; ++i)
> >> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> > Redundant & 0xffULL part.
> >
> > Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>
>
> No, these are shift in/out operations. The read register will also have
> previous operations data in them and must be extracted with only the
> correct number of bytes.

Why not to call put_unaligned() how the tail in this case (it's 0 or
can be easily made to be 0) will affect the result?

> >> +       return num_bytes;
> >> +}

> >> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >> +{
> > Ditto as for above function. (put_unaligned ...)

Ditto.

> >> +}

...

> >> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
> >> +                                struct spi_transfer *transfer)
> >> +{
> > Can you refactor to tx and rx parts?
>
>
> Why?

It's way too long function to read. Indentation level also can improve
readability.
That's basically what refactoring is for.

> >> +       return 0;
> >> +}

...

> >> +       if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
> >> +                         SPI_FSI_CLOCK_CFG_ECC_DISABLE |
> >> +                         SPI_FSI_CLOCK_CFG_MODE |
> >> +                         SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
> >> +                         SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
> >> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
> >> +                                      wanted_clock_cfg);
> > Missed {} ?
>
>
> No? It's one line under the if.

One statement, but *two* lines.
What does checkpatch.pl tell you about this?

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-04 11:02     ` Andy Shevchenko
@ 2020-02-04 16:06       ` Eddie James
  2020-02-05 15:51         ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-04 16:06 UTC (permalink / raw)
  To: Andy Shevchenko, Eddie James
  Cc: linux-spi, Linux Kernel Mailing List, Mark Brown, Joel Stanley,
	Andrew Jeffery


On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:
> ...
>
>>>> +       struct device *dev;
>>> Isn't fsl->dev the same?
>>> Perhaps kernel doc to explain the difference?
>>
>> No, it's not the same, as dev here is the SPI controller. I'll add a
>> comment.
> Why to have duplication then?


Nothing is being duplicated, the two variables are storing entirely 
different information, both of which are necessary for each SPI 
controller that this driver is driving.


>
>>>> +       struct fsi_device *fsi;
> ...
>
>>>> +       for (i = 0; i < num_bytes; ++i)
>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>> Redundant & 0xffULL part.
>>>
>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>
>> No, these are shift in/out operations. The read register will also have
>> previous operations data in them and must be extracted with only the
>> correct number of bytes.
> Why not to call put_unaligned() how the tail in this case (it's 0 or
> can be easily made to be 0) will affect the result?


The shift-in is not the same as any byte-swap or unaligned operation. 
For however many bytes we've read, we start at that many bytes 
left-shifted in the register and copy out to our buffer, moving right 
for each next byte... I don't think there is an existing function for 
this operation.


>
>>>> +       return num_bytes;
>>>> +}
>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>> +{
>>> Ditto as for above function. (put_unaligned ...)
> Ditto.


I don't understand how this could work for transfers of less than 8 
bytes, any put_unaligned would access memory that it doesn't own.


>
>>>> +}
> ...
>
>>>> +static int fsi_spi_transfer_data(struct fsi_spi *ctx,
>>>> +                                struct spi_transfer *transfer)
>>>> +{
>>> Can you refactor to tx and rx parts?
>>
>> Why?
> It's way too long function to read. Indentation level also can improve
> readability.
> That's basically what refactoring is for.


The body is 65 lines, I don't think it is too long.

Since the function is used multiple times I think it makes more sense to 
encapsulate the check for tx/rx in the function rather than split it and 
have to check each time the functions are used.


>
>>>> +       return 0;
>>>> +}
> ...
>
>>>> +       if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
>>>> +                         SPI_FSI_CLOCK_CFG_ECC_DISABLE |
>>>> +                         SPI_FSI_CLOCK_CFG_MODE |
>>>> +                         SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
>>>> +                         SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
>>>> +               rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
>>>> +                                      wanted_clock_cfg);
>>> Missed {} ?
>>
>> No? It's one line under the if.
> One statement, but *two* lines.
> What does checkpatch.pl tell you about this?


Right.

checkpatch.pl says nothing about this, I think it meets the coding 
standards as is.


Thanks for the review,

Eddie


>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-04 16:06       ` Eddie James
@ 2020-02-05 15:51         ` Andy Shevchenko
  2020-02-07 19:28           ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-05 15:51 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> > On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:

...

> >>>> +       struct device *dev;
> >>> Isn't fsl->dev the same?
> >>> Perhaps kernel doc to explain the difference?
> >>
> >> No, it's not the same, as dev here is the SPI controller. I'll add a
> >> comment.
> > Why to have duplication then?
>
>
> Nothing is being duplicated, the two variables are storing entirely
> different information, both of which are necessary for each SPI
> controller that this driver is driving.

Oh, I see now, thanks!

...

> >>>> +       for (i = 0; i < num_bytes; ++i)
> >>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>> Redundant & 0xffULL part.
> >>>
> >>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
> >>
> >> No, these are shift in/out operations. The read register will also have
> >> previous operations data in them and must be extracted with only the
> >> correct number of bytes.
> > Why not to call put_unaligned() how the tail in this case (it's 0 or
> > can be easily made to be 0) will affect the result?
>
>
> The shift-in is not the same as any byte-swap or unaligned operation.
> For however many bytes we've read, we start at that many bytes
> left-shifted in the register and copy out to our buffer, moving right
> for each next byte... I don't think there is an existing function for
> this operation.

For me it looks like

  u8 tmp[8];

  put_unaligned_be64(in, tmp);
  memcpy(rx, tmp, num_bytes);

put_unaligned*() is just a method to unroll the value to the u8 buffer.
See, for example, linux/unaligned/be_byteshift.h implementation.

> >>>> +       return num_bytes;
> >>>> +}
> >>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >>>> +{
> >>> Ditto as for above function. (put_unaligned ...)
> > Ditto.
>
>
> I don't understand how this could work for transfers of less than 8
> bytes, any put_unaligned would access memory that it doesn't own.

Ditto.

> >>>> +}

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-05 15:51         ` Andy Shevchenko
@ 2020-02-07 19:28           ` Eddie James
  2020-02-07 19:39             ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-07 19:28 UTC (permalink / raw)
  To: Andy Shevchenko, Eddie James
  Cc: linux-spi, Linux Kernel Mailing List, Mark Brown, Joel Stanley,
	Andrew Jeffery


On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>> On Wed, Jan 29, 2020 at 10:09 PM Eddie James <eajames@linux.ibm.com> wrote:
> ...
>
>>>>>> +       struct device *dev;
>>>>> Isn't fsl->dev the same?
>>>>> Perhaps kernel doc to explain the difference?
>>>> No, it's not the same, as dev here is the SPI controller. I'll add a
>>>> comment.
>>> Why to have duplication then?
>>
>> Nothing is being duplicated, the two variables are storing entirely
>> different information, both of which are necessary for each SPI
>> controller that this driver is driving.
> Oh, I see now, thanks!
>
> ...
>
>>>>>> +       for (i = 0; i < num_bytes; ++i)
>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>> Redundant & 0xffULL part.
>>>>>
>>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>>> No, these are shift in/out operations. The read register will also have
>>>> previous operations data in them and must be extracted with only the
>>>> correct number of bytes.
>>> Why not to call put_unaligned() how the tail in this case (it's 0 or
>>> can be easily made to be 0) will affect the result?
>>
>> The shift-in is not the same as any byte-swap or unaligned operation.
>> For however many bytes we've read, we start at that many bytes
>> left-shifted in the register and copy out to our buffer, moving right
>> for each next byte... I don't think there is an existing function for
>> this operation.
> For me it looks like
>
>    u8 tmp[8];
>
>    put_unaligned_be64(in, tmp);
>    memcpy(rx, tmp, num_bytes);
>
> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> See, for example, linux/unaligned/be_byteshift.h implementation.


Unforunately it is not the same. put_unaligned_be64 will take the 
highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then 
0x00ff000000000000 into tmp[1], etc. This is only correct for this 
driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes, 
then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into 
tmp[1], etc. So I think my current implementation is correct.


Thanks,

Eddie


>
>>>>>> +       return num_bytes;
>>>>>> +}
>>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>>>> +{
>>>>> Ditto as for above function. (put_unaligned ...)
>>> Ditto.
>>
>> I don't understand how this could work for transfers of less than 8
>> bytes, any put_unaligned would access memory that it doesn't own.
> Ditto.
>
>>>>>> +}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 19:28           ` Eddie James
@ 2020-02-07 19:39             ` Andy Shevchenko
  2020-02-07 20:04               ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-07 19:39 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> > On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
> >> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:

...

> >>>>>> +       for (i = 0; i < num_bytes; ++i)
> >>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>> Redundant & 0xffULL part.
> >>>>>
> >>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
> >>>> No, these are shift in/out operations. The read register will also have
> >>>> previous operations data in them and must be extracted with only the
> >>>> correct number of bytes.
> >>> Why not to call put_unaligned() how the tail in this case (it's 0 or
> >>> can be easily made to be 0) will affect the result?
> >>
> >> The shift-in is not the same as any byte-swap or unaligned operation.
> >> For however many bytes we've read, we start at that many bytes
> >> left-shifted in the register and copy out to our buffer, moving right
> >> for each next byte... I don't think there is an existing function for
> >> this operation.
> > For me it looks like
> >
> >    u8 tmp[8];
> >
> >    put_unaligned_be64(in, tmp);
> >    memcpy(rx, tmp, num_bytes);
> >
> > put_unaligned*() is just a method to unroll the value to the u8 buffer.
> > See, for example, linux/unaligned/be_byteshift.h implementation.
>
>
> Unforunately it is not the same. put_unaligned_be64 will take the
> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> tmp[1], etc. So I think my current implementation is correct.

Yes, I missed correction of the start address in memcpy(). Otherwise
it's still the same what I was talking about.

> >>>>>> +       return num_bytes;
> >>>>>> +}
> >>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
> >>>>>> +{
> >>>>> Ditto as for above function. (put_unaligned ...)
> >>> Ditto.
> >>
> >> I don't understand how this could work for transfers of less than 8
> >> bytes, any put_unaligned would access memory that it doesn't own.
> > Ditto.
> >
> >>>>>> +}

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 19:39             ` Andy Shevchenko
@ 2020-02-07 20:04               ` Eddie James
  2020-02-07 20:34                 ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-07 20:04 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery


On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> ...
>
>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>> Redundant & 0xffULL part.
>>>>>>>
>>>>>>> Isn't it NIH of get_unalinged_be64 / le64 or something similar?
>>>>>> No, these are shift in/out operations. The read register will also have
>>>>>> previous operations data in them and must be extracted with only the
>>>>>> correct number of bytes.
>>>>> Why not to call put_unaligned() how the tail in this case (it's 0 or
>>>>> can be easily made to be 0) will affect the result?
>>>> The shift-in is not the same as any byte-swap or unaligned operation.
>>>> For however many bytes we've read, we start at that many bytes
>>>> left-shifted in the register and copy out to our buffer, moving right
>>>> for each next byte... I don't think there is an existing function for
>>>> this operation.
>>> For me it looks like
>>>
>>>     u8 tmp[8];
>>>
>>>     put_unaligned_be64(in, tmp);
>>>     memcpy(rx, tmp, num_bytes);
>>>
>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>
>> Unforunately it is not the same. put_unaligned_be64 will take the
>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>> tmp[1], etc. So I think my current implementation is correct.
> Yes, I missed correction of the start address in memcpy(). Otherwise
> it's still the same what I was talking about.


I see now, yes, thanks.

Do you think this is worth a v3? Perhaps put_unaligned is slightly more 
optimized than the loop but there is more memory copy with that way too.

Eddie


>
>>>>>>>> +       return num_bytes;
>>>>>>>> +}
>>>>>>>> +static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
>>>>>>>> +{
>>>>>>> Ditto as for above function. (put_unaligned ...)
>>>>> Ditto.
>>>> I don't understand how this could work for transfers of less than 8
>>>> bytes, any put_unaligned would access memory that it doesn't own.
>>> Ditto.
>>>
>>>>>>>> +}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 20:04               ` Eddie James
@ 2020-02-07 20:34                 ` Andy Shevchenko
  2020-02-07 20:59                   ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-07 20:34 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
> >>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:


> >>>>>>>> +       for (i = 0; i < num_bytes; ++i)
> >>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>> Redundant & 0xffULL part.

> >>> For me it looks like
> >>>
> >>>     u8 tmp[8];
> >>>
> >>>     put_unaligned_be64(in, tmp);
> >>>     memcpy(rx, tmp, num_bytes);
> >>>
> >>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>
> >> Unforunately it is not the same. put_unaligned_be64 will take the
> >> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >> tmp[1], etc. So I think my current implementation is correct.
> > Yes, I missed correction of the start address in memcpy(). Otherwise
> > it's still the same what I was talking about.
>
>
> I see now, yes, thanks.
>
> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> optimized than the loop but there is more memory copy with that way too.

I already forgot the entire context when this has been called. Can you
summarize what the sequence(s) of num_bytes are expected usually.

IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
correct assumption?

> >>>>>>>> +       return num_bytes;
> >>>>>>>> +}

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 20:34                 ` Andy Shevchenko
@ 2020-02-07 20:59                   ` Eddie James
  2020-02-07 22:04                     ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-07 20:59 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery


On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>
>>>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
>>>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>> Redundant & 0xffULL part.
>>>>> For me it looks like
>>>>>
>>>>>      u8 tmp[8];
>>>>>
>>>>>      put_unaligned_be64(in, tmp);
>>>>>      memcpy(rx, tmp, num_bytes);
>>>>>
>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>> tmp[1], etc. So I think my current implementation is correct.
>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>> it's still the same what I was talking about.
>>
>> I see now, yes, thanks.
>>
>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>> optimized than the loop but there is more memory copy with that way too.
> I already forgot the entire context when this has been called. Can you
> summarize what the sequence(s) of num_bytes are expected usually.
>
> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> correct assumption?


Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX, 
the driver polls for the rx register full. Once full, it will read 
however much data is left to be transferred. Since we use min(len, 8) 
then we read 8 usually, until we get to the end.


>
>>>>>>>>>> +       return num_bytes;
>>>>>>>>>> +}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 20:59                   ` Eddie James
@ 2020-02-07 22:04                     ` Andy Shevchenko
  2020-02-10 20:05                       ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-07 22:04 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Fri, Feb 7, 2020 at 11:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> >>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
> >>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >
> >>>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
> >>>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>>>> Redundant & 0xffULL part.
> >>>>> For me it looks like
> >>>>>
> >>>>>      u8 tmp[8];
> >>>>>
> >>>>>      put_unaligned_be64(in, tmp);
> >>>>>      memcpy(rx, tmp, num_bytes);
> >>>>>
> >>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>>> Unforunately it is not the same. put_unaligned_be64 will take the
> >>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >>>> tmp[1], etc. So I think my current implementation is correct.
> >>> Yes, I missed correction of the start address in memcpy(). Otherwise
> >>> it's still the same what I was talking about.
> >>
> >> I see now, yes, thanks.
> >>
> >> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> >> optimized than the loop but there is more memory copy with that way too.
> > I already forgot the entire context when this has been called. Can you
> > summarize what the sequence(s) of num_bytes are expected usually.
> >
> > IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> > Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> > correct assumption?
>
>
> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
> the driver polls for the rx register full. Once full, it will read
> however much data is left to be transferred. Since we use min(len, 8)
> then we read 8 usually, until we get to the end.

I asked that because we might have a better optimization, i.e, call
directly put_unaligned_be64() when we know that length is 8 bytes. For
the rest your approach might be simpler. Similar for the TX case.

> >>>>>>>>>> +       return num_bytes;
> >>>>>>>>>> +}

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-07 22:04                     ` Andy Shevchenko
@ 2020-02-10 20:05                       ` Eddie James
  2020-02-10 20:33                         ` Andy Shevchenko
  0 siblings, 1 reply; 17+ messages in thread
From: Eddie James @ 2020-02-10 20:05 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery


On 2/7/20 4:04 PM, Andy Shevchenko wrote:
> On Fri, Feb 7, 2020 at 11:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
>>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
>>>>>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>>>> Redundant & 0xffULL part.
>>>>>>> For me it looks like
>>>>>>>
>>>>>>>       u8 tmp[8];
>>>>>>>
>>>>>>>       put_unaligned_be64(in, tmp);
>>>>>>>       memcpy(rx, tmp, num_bytes);
>>>>>>>
>>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>>>> tmp[1], etc. So I think my current implementation is correct.
>>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>>>> it's still the same what I was talking about.
>>>> I see now, yes, thanks.
>>>>
>>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>>>> optimized than the loop but there is more memory copy with that way too.
>>> I already forgot the entire context when this has been called. Can you
>>> summarize what the sequence(s) of num_bytes are expected usually.
>>>
>>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
>>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
>>> correct assumption?
>>
>> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
>> the driver polls for the rx register full. Once full, it will read
>> however much data is left to be transferred. Since we use min(len, 8)
>> then we read 8 usually, until we get to the end.
> I asked that because we might have a better optimization, i.e, call
> directly put_unaligned_be64() when we know that length is 8 bytes. For
> the rest your approach might be simpler. Similar for the TX case.


I just tried to implement as you suggested but I realized something: The 
value is already swapped from BE to CPU when the register is read in 
fsi_spi_read_reg. It happens to work out correctly to use 
put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU, 
this wouldn't be correct I think. Now I don't anticipate this driver 
running on a BE CPU, but I think it is weird to flip it twice, and 
better to do it manually here.

What do you think Andy?

Thanks,

Eddie


>
>>>>>>>>>>>> +       return num_bytes;
>>>>>>>>>>>> +}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-10 20:05                       ` Eddie James
@ 2020-02-10 20:33                         ` Andy Shevchenko
  2020-02-10 20:50                           ` Eddie James
  0 siblings, 1 reply; 17+ messages in thread
From: Andy Shevchenko @ 2020-02-10 20:33 UTC (permalink / raw)
  To: Eddie James
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery

On Mon, Feb 10, 2020 at 10:05 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> On 2/7/20 4:04 PM, Andy Shevchenko wrote:
> > On Fri, Feb 7, 2020 at 11:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
> >>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
> >>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
> >>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
> >>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
> >>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
> >>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
> >>>>>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
> >>>>>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
> >>>>>>>>>>> Redundant & 0xffULL part.
> >>>>>>> For me it looks like
> >>>>>>>
> >>>>>>>       u8 tmp[8];
> >>>>>>>
> >>>>>>>       put_unaligned_be64(in, tmp);
> >>>>>>>       memcpy(rx, tmp, num_bytes);
> >>>>>>>
> >>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
> >>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
> >>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
> >>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
> >>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
> >>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
> >>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
> >>>>>> tmp[1], etc. So I think my current implementation is correct.
> >>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
> >>>>> it's still the same what I was talking about.
> >>>> I see now, yes, thanks.
> >>>>
> >>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
> >>>> optimized than the loop but there is more memory copy with that way too.
> >>> I already forgot the entire context when this has been called. Can you
> >>> summarize what the sequence(s) of num_bytes are expected usually.
> >>>
> >>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
> >>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
> >>> correct assumption?
> >>
> >> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
> >> the driver polls for the rx register full. Once full, it will read
> >> however much data is left to be transferred. Since we use min(len, 8)
> >> then we read 8 usually, until we get to the end.
> > I asked that because we might have a better optimization, i.e, call
> > directly put_unaligned_be64() when we know that length is 8 bytes. For
> > the rest your approach might be simpler. Similar for the TX case.
>
>
> I just tried to implement as you suggested but I realized something: The
> value is already swapped from BE to CPU when the register is read in
> fsi_spi_read_reg. It happens to work out correctly to use
> put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU,
> this wouldn't be correct I think.

Hmm... Any BE conversion op on BE architecture is no-op.
Same for LE on LE.

> Now I don't anticipate this driver
> running on a BE CPU, but I think it is weird to flip it twice, and
> better to do it manually here.
>
> What do you think Andy?



> >>>>>>>>>>>> +       return num_bytes;
> >>>>>>>>>>>> +}



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH] spi: Add FSI-attached SPI controller driver
  2020-02-10 20:33                         ` Andy Shevchenko
@ 2020-02-10 20:50                           ` Eddie James
  0 siblings, 0 replies; 17+ messages in thread
From: Eddie James @ 2020-02-10 20:50 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Eddie James, linux-spi, Linux Kernel Mailing List, Mark Brown,
	Joel Stanley, Andrew Jeffery


On 2/10/20 2:33 PM, Andy Shevchenko wrote:
> On Mon, Feb 10, 2020 at 10:05 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>> On 2/7/20 4:04 PM, Andy Shevchenko wrote:
>>> On Fri, Feb 7, 2020 at 11:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>> On 2/7/20 2:34 PM, Andy Shevchenko wrote:
>>>>> On Fri, Feb 7, 2020 at 10:04 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>> On 2/7/20 1:39 PM, Andy Shevchenko wrote:
>>>>>>> On Fri, Feb 7, 2020 at 9:28 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>>>> On 2/5/20 9:51 AM, Andy Shevchenko wrote:
>>>>>>>>> On Tue, Feb 4, 2020 at 6:06 PM Eddie James <eajames@linux.ibm.com> wrote:
>>>>>>>>>> On 2/4/20 5:02 AM, Andy Shevchenko wrote:
>>>>>>>>>>> On Mon, Feb 3, 2020 at 10:33 PM Eddie James <eajames@linux.vnet.ibm.com> wrote:
>>>>>>>>>>>> On 1/30/20 10:37 AM, Andy Shevchenko wrote:
>>>>>>>>>>>>>> +       for (i = 0; i < num_bytes; ++i)
>>>>>>>>>>>>>> +               rx[i] = (u8)((in >> (8 * ((num_bytes - 1) - i))) & 0xffULL);
>>>>>>>>>>>>> Redundant & 0xffULL part.
>>>>>>>>> For me it looks like
>>>>>>>>>
>>>>>>>>>        u8 tmp[8];
>>>>>>>>>
>>>>>>>>>        put_unaligned_be64(in, tmp);
>>>>>>>>>        memcpy(rx, tmp, num_bytes);
>>>>>>>>>
>>>>>>>>> put_unaligned*() is just a method to unroll the value to the u8 buffer.
>>>>>>>>> See, for example, linux/unaligned/be_byteshift.h implementation.
>>>>>>>> Unforunately it is not the same. put_unaligned_be64 will take the
>>>>>>>> highest 8 bits (0xff00000000000000) and move it into tmp[0]. Then
>>>>>>>> 0x00ff000000000000 into tmp[1], etc. This is only correct for this
>>>>>>>> driver IF my transfer is 8 bytes. If, for example, I transfer 5 bytes,
>>>>>>>> then I need 0x000000ff00000000 into tmp[0], 0x00000000ff000000 into
>>>>>>>> tmp[1], etc. So I think my current implementation is correct.
>>>>>>> Yes, I missed correction of the start address in memcpy(). Otherwise
>>>>>>> it's still the same what I was talking about.
>>>>>> I see now, yes, thanks.
>>>>>>
>>>>>> Do you think this is worth a v3? Perhaps put_unaligned is slightly more
>>>>>> optimized than the loop but there is more memory copy with that way too.
>>>>> I already forgot the entire context when this has been called. Can you
>>>>> summarize what the sequence(s) of num_bytes are expected usually.
>>>>>
>>>>> IIUC if packets small, less than 8 bytes, than num_bytes will be that value.
>>>>> Otherwise it will be something like 8 + 8 + 8 ... + tail. Is it
>>>>> correct assumption?
>>>> Yes, it will typically be 8 + 8 +... remainder. Basically, on any RX,
>>>> the driver polls for the rx register full. Once full, it will read
>>>> however much data is left to be transferred. Since we use min(len, 8)
>>>> then we read 8 usually, until we get to the end.
>>> I asked that because we might have a better optimization, i.e, call
>>> directly put_unaligned_be64() when we know that length is 8 bytes. For
>>> the rest your approach might be simpler. Similar for the TX case.
>>
>> I just tried to implement as you suggested but I realized something: The
>> value is already swapped from BE to CPU when the register is read in
>> fsi_spi_read_reg. It happens to work out correctly to use
>> put_unaligned_be64 on a LE CPU to flip the bytes here. But on a BE CPU,
>> this wouldn't be correct I think.
> Hmm... Any BE conversion op on BE architecture is no-op.
> Same for LE on LE.


Right. So regardless of architecture, by the time we get to 
fsi_spi_data_in, the data is in the correct endianness. But on a BE 
architecture, it would still need to get flipped because that's what the 
specification indicates. So doing it manually seems correct to me.


>
>> Now I don't anticipate this driver
>> running on a BE CPU, but I think it is weird to flip it twice, and
>> better to do it manually here.
>>
>> What do you think Andy?
>
>
>>>>>>>>>>>>>> +       return num_bytes;
>>>>>>>>>>>>>> +}
>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2020-02-10 20:50 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-29 20:08 [PATCH] spi: Add FSI-attached SPI controller driver Eddie James
2020-01-30 14:46 ` Mark Brown
2020-01-30 15:32   ` Eddie James
2020-01-30 16:37 ` Andy Shevchenko
2020-02-03 20:33   ` Eddie James
2020-02-04 11:02     ` Andy Shevchenko
2020-02-04 16:06       ` Eddie James
2020-02-05 15:51         ` Andy Shevchenko
2020-02-07 19:28           ` Eddie James
2020-02-07 19:39             ` Andy Shevchenko
2020-02-07 20:04               ` Eddie James
2020-02-07 20:34                 ` Andy Shevchenko
2020-02-07 20:59                   ` Eddie James
2020-02-07 22:04                     ` Andy Shevchenko
2020-02-10 20:05                       ` Eddie James
2020-02-10 20:33                         ` Andy Shevchenko
2020-02-10 20:50                           ` Eddie James

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).