kvmarm.lists.cs.columbia.edu archive mirror
 help / color / mirror / Atom feed
* [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
@ 2020-02-07 12:19 Andre Przywara
  2020-02-07 17:34 ` Alexandru Elisei
  2020-02-17 17:20 ` Alexandru Elisei
  0 siblings, 2 replies; 8+ messages in thread
From: Andre Przywara @ 2020-02-07 12:19 UTC (permalink / raw)
  To: Will Deacon, Julien Thierry
  Cc: Raphael Gault, Sami Mujawar, kvmarm, linux-arm-kernel

From: Raphael Gault <raphael.gault@arm.com>

The EDK II UEFI firmware implementation requires some storage for the EFI
variables, which is typically some flash storage.
Since this is already supported on the EDK II side, we add a CFI flash
emulation to kvmtool.
This is backed by a file, specified via the --flash or -F command line
option. Any flash writes done by the guest will immediately be reflected
into this file (kvmtool mmap's the file).

This implements a CFI flash using the "Intel/Sharp extended command
set", as specified in:
- JEDEC JESD68.01
- JEDEC JEP137B
- Intel Application Note 646
Some gaps in those specs have been filled by looking at real devices and
other implementations (QEMU, Linux kernel driver).

At the moment this relies on DT to advertise the base address of the
flash memory (mapped into the MMIO address space) and is only enabled
for ARM/ARM64. The emulation itself is architecture agnostic, though.

This is one missing piece toward a working UEFI boot with kvmtool on
ARM guests, the other is to provide writable PCI BARs, which is WIP.

Signed-off-by: Raphael Gault <raphael.gault@arm.com>
[Andre: rewriting and fixing]
Signed-off-by: Andre Przywra <andre.przywara@arm.com>
---
Hi,

an update addressing Will's comments. I added coarse grained locking
to the MMIO handler, to prevent concurrent vCPU accesses from messing up
the internal CFI flash state machine.
I also folded the actual flash array read access into the MMIO handler
and fixed the other small issues.

Cheers,
Andre

 Makefile                          |   6 +
 arm/include/arm-common/kvm-arch.h |   3 +
 builtin-run.c                     |   2 +
 hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
 include/kvm/kvm-config.h          |   1 +
 include/kvm/util.h                |   5 +
 6 files changed, 563 insertions(+)
 create mode 100644 hw/cfi_flash.c

diff --git a/Makefile b/Makefile
index 3862112c..7ed6fb5e 100644
--- a/Makefile
+++ b/Makefile
@@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
 	CFLAGS		+= -march=armv7-a
 
 	ARCH_WANT_LIBFDT := y
+	ARCH_HAS_FLASH_MEM := y
 endif
 
 # ARM64
@@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
 	ARCH_INCLUDE	+= -Iarm/aarch64/include
 
 	ARCH_WANT_LIBFDT := y
+	ARCH_HAS_FLASH_MEM := y
 endif
 
 ifeq ($(ARCH),mips)
@@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
 	endif
 endif
 
+ifeq (y,$(ARCH_HAS_FLASH_MEM))
+	OBJS	+= hw/cfi_flash.o
+endif
+
 ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
 	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
 	LIBS_DYNOPT	+= -lz
diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
index b9d486d5..2bb085f4 100644
--- a/arm/include/arm-common/kvm-arch.h
+++ b/arm/include/arm-common/kvm-arch.h
@@ -21,6 +21,9 @@
 #define ARM_GIC_DIST_SIZE	0x10000
 #define ARM_GIC_CPUI_SIZE	0x20000
 
+#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
+#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE
+
 #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
 #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
 #define ARM_PCI_CFG_SIZE	(1ULL << 24)
diff --git a/builtin-run.c b/builtin-run.c
index f8dc6c72..df8c6741 100644
--- a/builtin-run.c
+++ b/builtin-run.c
@@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
 			"Kernel command line arguments"),		\
 	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
 			"Firmware image to boot in virtual machine"),	\
+	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
+			"Flash image to present to virtual machine"),	\
 									\
 	OPT_GROUP("Networking options:"),				\
 	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
new file mode 100644
index 00000000..d7c0e7e8
--- /dev/null
+++ b/hw/cfi_flash.c
@@ -0,0 +1,546 @@
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include "kvm/kvm.h"
+#include "kvm/kvm-arch.h"
+#include "kvm/devices.h"
+#include "kvm/fdt.h"
+#include "kvm/mutex.h"
+#include "kvm/util.h"
+
+/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
+#define CFI_NR_FLASH_CHIPS			2
+
+/* We always emulate a 32 bit bus width. */
+#define CFI_BUS_WIDTH				4
+
+/* The *effective* size of an erase block (over all chips) */
+#define FLASH_BLOCK_SIZE			SZ_64K
+
+#define PROGRAM_BUFF_SIZE_BITS			7
+#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)
+
+/* CFI commands */
+#define CFI_CMD_LOCK_BLOCK			0x01
+#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
+#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
+#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
+#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
+#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
+#define CFI_CMD_READ_STATUS_REGISTER		0x70
+#define CFI_CMD_READ_JEDEC			0x90
+#define CFI_CMD_READ_CFI_QUERY			0x98
+#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
+#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
+#define CFI_CMD_UNLOCK_BLOCK			0xd0
+#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
+#define CFI_CMD_READ_ARRAY			0xff
+
+/*
+ * CFI query table contents, as far as it is constant.
+ */
+#define CFI_GEOM_OFFSET				0x27
+static u8 cfi_query_table[] = {
+		/* offset 0x10: CFI query identification string */
+	'Q', 'R', 'Y',		/* ID string */
+	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
+	0x31, 0x00,		/* address of primary extended query table */
+	0x00, 0x00,		/* alternative command set: unused */
+	0x00, 0x00,		/* address of alternative extended query table*/
+		/* offset 0x1b: system interface information */
+	0x45,			/* minimum Vcc voltage: 4.5V */
+	0x55,			/* maximum Vcc voltage: 5.5V */
+	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
+	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
+	0x01,			/* timeout for single word program: 2 us */
+	0x01,			/* timeout for multi-byte program: 2 us */
+	0x01,			/* timeout for block erase: 2 ms */
+	0x00,			/* timeout for full chip erase: not supported */
+	0x00,			/* max timeout for single word program: 1x */
+	0x00,			/* max timeout for mulit-byte program: 1x */
+	0x00,			/* max timeout for block erase: 1x */
+	0x00,			/* max timeout for chip erase: not supported */
+		/* offset 0x27: flash geometry information */
+	0x00,			/* size in power-of-2 bytes, filled later */
+	0x06, 0x00,		/* interface description: 32 and 16 bits */
+	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
+				/* number of multi-byte writes */
+	0x01,			/* one erase block region */
+	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
+		/* offset 0x31: Intel primary algorithm extended query table */
+	'P', 'R', 'I',
+	'1', '0',		/* version 1.0 */
+	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
+	0x00,			/* no functions after suspend */
+	0x01, 0x00,		/* only lock bit supported */
+	0x50,			/* best Vcc value: 5.0V */
+	0x00,			/* best Vpp value: 0.0V (unused) */
+	0x01,			/* number of protection register fields */
+	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
+};
+
+
+/*
+ * Those states represent a subset of the CFI flash state machine.
+ */
+enum cfi_flash_state {
+	READY,
+	LOCK_SETUP,
+	WP_SETUP,
+	BP_SETUP,
+	BP_LOAD,
+	ERASE_SETUP,
+};
+
+/*
+ * The device can be in several **Read** modes.
+ * We don't implement the asynchronous burst mode.
+ */
+enum cfi_read_mode {
+	READ_ARRAY,
+	READ_STATUS,
+	READ_DEVICE_ID,
+	READ_QUERY,
+};
+
+struct cfi_flash_device {
+	struct device_header	dev_hdr;
+	/* Protects the CFI state machine variables in this data structure. */
+	struct mutex		mutex;
+	u64			base_addr;
+	u32			size;
+
+	void			*flash_memory;
+	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];
+	unsigned long		*lock_bm;
+	u64			last_address;
+	unsigned int		buff_written;
+	unsigned int		program_length;
+
+	enum cfi_flash_state	state;
+	enum cfi_read_mode	read_mode;
+	u16			rcr;
+	u8			sr;
+};
+
+static int nr_erase_blocks(struct cfi_flash_device *sfdev)
+{
+	return sfdev->size / FLASH_BLOCK_SIZE;
+}
+
+/*
+ * CFI queries always deal with one byte of information, possibly mirrored
+ * to other bytes on the bus. This is dealt with in the callers.
+ * The address provided is the one for 8-bit addressing, and would need to
+ * be adjusted for wider accesses.
+ */
+static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
+{
+	if (addr < 0x10)		/* CFI information starts at 0x10 */
+		return 0;
+
+	if (addr - 0x10 > sizeof(cfi_query_table)) {
+		pr_debug("CFI query read access beyond the end of table");
+		return 0;
+	}
+
+	/* Fixup dynamic information in the geometry part of the table. */
+	switch (addr) {
+	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
+		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
+	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
+		return (nr_erase_blocks(sfdev) - 1) & 0xff;
+	case CFI_GEOM_OFFSET + 7:
+		return (nr_erase_blocks(sfdev) - 1) >> 8;
+	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
+		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
+	case CFI_GEOM_OFFSET + 9:
+		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
+	}
+
+	return cfi_query_table[addr - 0x10];
+}
+
+static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
+{
+	int block_nr = addr / FLASH_BLOCK_SIZE;
+
+	return test_bit(block_nr, sfdev->lock_bm);
+}
+
+#define DEV_ID_MASK 0x7ff
+static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
+{
+	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
+	case 0x0:				/* vendor ID */
+		return 0x0000;
+	case 0x1:				/* device ID */
+		return 0xffff;
+	case 0x2:
+		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
+	case 0x5:
+		return sfdev->rcr;
+	default:			/* Ignore the other entries. */
+		return 0;
+	}
+}
+
+static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
+{
+	int block_nr = addr / FLASH_BLOCK_SIZE;
+
+	if (lock)
+		set_bit(block_nr, sfdev->lock_bm);
+	else
+		clear_bit(block_nr, sfdev->lock_bm);
+}
+
+static void word_program(struct cfi_flash_device *sfdev,
+			 u64 addr, void *data, int len)
+{
+	if (block_is_locked(sfdev, addr)) {
+		sfdev->sr |= 0x12;
+		return;
+	}
+
+	memcpy(sfdev->flash_memory + addr, data, len);
+}
+
+/* Reset the program buffer state to prepare for follow-up writes. */
+static void buffer_setup(struct cfi_flash_device *sfdev)
+{
+	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
+	sfdev->last_address = ~0ULL;
+	sfdev->buff_written = 0;
+}
+
+static bool buffer_program(struct cfi_flash_device *sfdev,
+			   u64 addr, void *buffer, int len)
+{
+	unsigned int buf_addr;
+
+	if (sfdev->buff_written >= sfdev->program_length)
+		return false;
+
+	/*
+	 * The first word written into the buffer after the setup command
+	 * happens to be the base address for the buffer.
+	 * All subsequent writes need to be within this address and this
+	 * address plus the buffer size, so keep this value around.
+	 */
+	if (sfdev->last_address == ~0ULL)
+		sfdev->last_address = addr;
+
+	if (addr < sfdev->last_address)
+		return false;
+	buf_addr = addr - sfdev->last_address;
+	if (buf_addr >= PROGRAM_BUFF_SIZE)
+		return false;
+
+	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
+	sfdev->buff_written++;
+
+	return true;
+}
+
+static void buffer_confirm(struct cfi_flash_device *sfdev)
+{
+	if (block_is_locked(sfdev, sfdev->last_address)) {
+		sfdev->sr |= 0x12;
+		return;
+	}
+	memcpy(sfdev->flash_memory + sfdev->last_address,
+	       sfdev->program_buffer,
+	       sfdev->buff_written * sizeof(u32));
+}
+
+static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
+{
+	if (block_is_locked(sfdev, addr)) {
+		sfdev->sr |= 0x12;
+		return;
+	}
+
+	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
+}
+
+static void cfi_flash_mmio(struct kvm_cpu *vcpu,
+			   u64 addr, u8 *data, u32 len, u8 is_write,
+			   void *context)
+{
+	struct cfi_flash_device *sfdev = context;
+	u64 faddr = addr - sfdev->base_addr;
+	u32 value;
+
+	if (!is_write) {
+		u16 cfi_value = 0;
+
+		mutex_lock(&sfdev->mutex);
+
+		switch (sfdev->read_mode) {
+		case READ_ARRAY:
+			/* just copy the requested bytes from the array */
+			memcpy(data, sfdev->flash_memory + faddr, len);
+			goto out_unlock;
+		case READ_STATUS:
+			cfi_value = sfdev->sr;
+			break;
+		case READ_DEVICE_ID:
+			cfi_value = read_dev_id(sfdev, faddr);
+			break;
+		case READ_QUERY:
+			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
+			break;
+		}
+		switch (len) {
+		case 1:
+			*data = cfi_value;
+			break;
+		case 8: memset(data + 4, 0, 4);
+			/* fall-through */
+		case 4:
+			if (CFI_NR_FLASH_CHIPS == 2)
+				memcpy(data + 2, &cfi_value, 2);
+			else
+				memset(data + 2, 0, 2);
+			/* fall-through */
+		case 2:
+			memcpy(data, &cfi_value, 2);
+			break;
+		default:
+			pr_debug("CFI flash: illegal access length %d for read mode %d",
+				 len, sfdev->read_mode);
+			break;
+		}
+
+		goto out_unlock;
+	}
+
+	if (len > 4) {
+		pr_info("CFI flash: MMIO %d-bit write access not supported",
+			 len * 8);
+		return;
+	}
+
+	memcpy(&value, data, len);
+
+	mutex_lock(&sfdev->mutex);
+
+	switch (sfdev->state) {
+	case READY:			/* handled below */
+		break;
+
+	case LOCK_SETUP:
+		switch (value & 0xff) {
+		case CFI_CMD_LOCK_BLOCK:
+			lock_block(sfdev, faddr, true);
+			sfdev->read_mode = READ_STATUS;
+			break;
+		case CFI_CMD_UNLOCK_BLOCK:
+			lock_block(sfdev, faddr, false);
+			sfdev->read_mode = READ_STATUS;
+			break;
+		default:
+			sfdev->sr |= 0x30;
+			break;
+		}
+		sfdev->state = READY;
+		goto out_unlock;
+
+	case WP_SETUP:
+		word_program(sfdev, faddr, data, len);
+		sfdev->read_mode = READ_STATUS;
+		sfdev->state = READY;
+		goto out_unlock;
+
+	case BP_LOAD:
+		if (buffer_program(sfdev, faddr, data, len))
+			goto out_unlock;
+
+		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
+			buffer_confirm(sfdev);
+			sfdev->read_mode = READ_STATUS;
+		} else {
+			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
+				 value, faddr);
+			sfdev->sr |= 0x10;
+		}
+		sfdev->state = READY;
+		goto out_unlock;
+
+	case BP_SETUP:
+		sfdev->program_length = (value & 0xffff) + 1;
+		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
+			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
+		sfdev->state = BP_LOAD;
+		sfdev->read_mode = READ_STATUS;
+		goto out_unlock;
+
+	case ERASE_SETUP:
+		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
+			block_erase_confirm(sfdev, faddr);
+		else
+			sfdev->sr |= 0x30;
+
+		sfdev->state = READY;
+		sfdev->read_mode = READ_STATUS;
+		goto out_unlock;
+	}
+
+	/* write commands in READY state */
+	switch (value & 0xFF) {
+	case CFI_CMD_READ_JEDEC:
+		sfdev->read_mode = READ_DEVICE_ID;
+		break;
+	case CFI_CMD_READ_STATUS_REGISTER:
+		sfdev->read_mode = READ_STATUS;
+		break;
+	case CFI_CMD_READ_CFI_QUERY:
+		sfdev->read_mode = READ_QUERY;
+		break;
+	case CFI_CMD_CLEAR_STATUS_REGISTER:
+		sfdev->sr = 0x80;
+		break;
+	case CFI_CMD_WORD_PROGRAM_SETUP:
+	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
+		sfdev->state = WP_SETUP;
+		sfdev->read_mode = READ_STATUS;
+		break;
+	case CFI_CMD_LOCK_BLOCK_SETUP:
+		sfdev->state = LOCK_SETUP;
+		break;
+	case CFI_CMD_BLOCK_ERASE_SETUP:
+		sfdev->state = ERASE_SETUP;
+		sfdev->read_mode = READ_STATUS;
+		break;
+	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
+		buffer_setup(sfdev);
+		sfdev->state = BP_SETUP;
+		sfdev->read_mode = READ_STATUS;
+		break;
+	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
+		pr_debug("CFI flash: unexpected confirm command 0xD0");
+		break;
+	default:
+		pr_debug("CFI flash: unknown command 0x%x", value);
+		/* fall through */
+	case CFI_CMD_READ_ARRAY:
+		sfdev->read_mode = READ_ARRAY;
+		break;
+	}
+
+out_unlock:
+	mutex_unlock(&sfdev->mutex);
+}
+
+#ifdef CONFIG_HAS_LIBFDT
+static void generate_cfi_flash_fdt_node(void *fdt,
+					struct device_header *dev_hdr,
+					void (*generate_irq_prop)(void *fdt,
+								  u8 irq,
+								enum irq_type))
+{
+	struct cfi_flash_device *sfdev;
+	u64 reg_prop[2];
+
+	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
+	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
+	reg_prop[1] = cpu_to_fdt64(sfdev->size);
+
+	_FDT(fdt_begin_node(fdt, "flash"));
+	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
+	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
+	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
+	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
+	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
+	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
+	_FDT(fdt_end_node(fdt));
+}
+#else
+#define generate_cfi_flash_fdt_node NULL
+#endif
+
+static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
+							 const char *filename)
+{
+	struct cfi_flash_device *sfdev;
+	struct stat statbuf;
+	unsigned int value;
+	int ret;
+	int fd;
+
+	fd = open(filename, O_RDWR);
+	if (fd < 0)
+		return ERR_PTR(-errno);
+	if (fstat(fd, &statbuf) < 0) {
+		close(fd);
+		return ERR_PTR(-errno);
+	}
+
+	sfdev = malloc(sizeof(struct cfi_flash_device));
+	if (!sfdev) {
+		close(fd);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
+	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
+				   PROT_READ | PROT_WRITE, MAP_SHARED,
+				   fd, 0);
+	if (sfdev->flash_memory == MAP_FAILED) {
+		close(fd);
+		free(sfdev);
+		return ERR_PTR(-errno);
+	}
+	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
+	sfdev->state = READY;
+	sfdev->read_mode = READ_ARRAY;
+	sfdev->sr = 0x80;
+	sfdev->rcr = 0xbfcf;
+
+	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
+	sfdev->lock_bm = malloc(value);
+	memset(sfdev->lock_bm, 0, value);
+
+	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
+	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
+	mutex_init(&sfdev->mutex);
+	ret = device__register(&sfdev->dev_hdr);
+	if (ret) {
+		free(sfdev->flash_memory);
+		free(sfdev);
+		return ERR_PTR(ret);
+	}
+
+	ret = kvm__register_mmio(kvm,
+				 sfdev->base_addr, sfdev->size,
+				 false, cfi_flash_mmio, sfdev);
+	if (ret) {
+		device__unregister(&sfdev->dev_hdr);
+		free(sfdev->flash_memory);
+		free(sfdev);
+		return ERR_PTR(ret);
+	}
+
+	return sfdev;
+}
+
+static int flash__init(struct kvm *kvm)
+{
+	struct cfi_flash_device *sfdev;
+
+	if (!kvm->cfg.flash_filename)
+		return 0;
+
+	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
+	if (IS_ERR(sfdev))
+		return PTR_ERR(sfdev);
+
+	return 0;
+}
+dev_init(flash__init);
diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
index a052b0bc..f4a8b831 100644
--- a/include/kvm/kvm-config.h
+++ b/include/kvm/kvm-config.h
@@ -35,6 +35,7 @@ struct kvm_config {
 	const char *vmlinux_filename;
 	const char *initrd_filename;
 	const char *firmware_filename;
+	const char *flash_filename;
 	const char *console;
 	const char *dev;
 	const char *network;
diff --git a/include/kvm/util.h b/include/kvm/util.h
index 4ca7aa93..5c37f0b7 100644
--- a/include/kvm/util.h
+++ b/include/kvm/util.h
@@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
 	return x ? 1UL << fls_long(x - 1) : 0;
 }
 
+static inline int pow2_size(unsigned long x)
+{
+	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
+}
+
 struct kvm;
 void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
 void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
-- 
2.17.1

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-07 12:19 [PATCH kvmtool v2] Add emulation for CFI compatible flash memory Andre Przywara
@ 2020-02-07 17:34 ` Alexandru Elisei
  2020-02-14 13:47   ` Andre Przywara
  2020-02-17 17:20 ` Alexandru Elisei
  1 sibling, 1 reply; 8+ messages in thread
From: Alexandru Elisei @ 2020-02-07 17:34 UTC (permalink / raw)
  To: Andre Przywara, Will Deacon, Julien Thierry
  Cc: Raphael Gault, Sami Mujawar, linux-arm-kernel, kvmarm

Hi,

I'm going to do my best to review your patch :) I'll do it in chunks, because it's
pretty large, and definitely not trivial.

On 2/7/20 12:19 PM, Andre Przywara wrote:
> From: Raphael Gault <raphael.gault@arm.com>
>
> The EDK II UEFI firmware implementation requires some storage for the EFI
> variables, which is typically some flash storage.
> Since this is already supported on the EDK II side, we add a CFI flash
> emulation to kvmtool.
> This is backed by a file, specified via the --flash or -F command line
> option. Any flash writes done by the guest will immediately be reflected
> into this file (kvmtool mmap's the file).
>
> This implements a CFI flash using the "Intel/Sharp extended command
> set", as specified in:
> - JEDEC JESD68.01
> - JEDEC JEP137B
> - Intel Application Note 646
> Some gaps in those specs have been filled by looking at real devices and
> other implementations (QEMU, Linux kernel driver).
>
> At the moment this relies on DT to advertise the base address of the
> flash memory (mapped into the MMIO address space) and is only enabled
> for ARM/ARM64. The emulation itself is architecture agnostic, though.
>
> This is one missing piece toward a working UEFI boot with kvmtool on
> ARM guests, the other is to provide writable PCI BARs, which is WIP.
>
> Signed-off-by: Raphael Gault <raphael.gault@arm.com>
> [Andre: rewriting and fixing]
> Signed-off-by: Andre Przywra <andre.przywara@arm.com>
> ---
> Hi,
>
> an update addressing Will's comments. I added coarse grained locking
> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> the internal CFI flash state machine.
> I also folded the actual flash array read access into the MMIO handler
> and fixed the other small issues.
>
> Cheers,
> Andre
>
>  Makefile                          |   6 +
>  arm/include/arm-common/kvm-arch.h |   3 +
>  builtin-run.c                     |   2 +
>  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
>  include/kvm/kvm-config.h          |   1 +
>  include/kvm/util.h                |   5 +
>  6 files changed, 563 insertions(+)
>  create mode 100644 hw/cfi_flash.c
>
> diff --git a/Makefile b/Makefile
> index 3862112c..7ed6fb5e 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
>  	CFLAGS		+= -march=armv7-a
>  
>  	ARCH_WANT_LIBFDT := y
> +	ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  # ARM64
> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
>  	ARCH_INCLUDE	+= -Iarm/aarch64/include
>  
>  	ARCH_WANT_LIBFDT := y
> +	ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  ifeq ($(ARCH),mips)
> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
>  	endif
>  endif
>  
> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> +	OBJS	+= hw/cfi_flash.o
> +endif
> +
>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
>  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
>  	LIBS_DYNOPT	+= -lz
> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
> index b9d486d5..2bb085f4 100644
> --- a/arm/include/arm-common/kvm-arch.h
> +++ b/arm/include/arm-common/kvm-arch.h
> @@ -21,6 +21,9 @@
>  #define ARM_GIC_DIST_SIZE	0x10000
>  #define ARM_GIC_CPUI_SIZE	0x20000
>  
> +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
> +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE

Each time I try to read the memory layout for ARM I get a headache. According to
my calculations, this falls right inside ARM_MMIO_AREA, right? Any particular
reason for choosing this address? Why not carve its own dedicate area, so we won't
run the highly unlikely risk that it will be overwritten, since it's in the MMIO
allocation area?

> +
>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))

That's not correct anymore, because flash memory is in the ARM_MMIO_AREA.

>  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
> diff --git a/builtin-run.c b/builtin-run.c
> index f8dc6c72..df8c6741 100644
> --- a/builtin-run.c
> +++ b/builtin-run.c
> @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
>  			"Kernel command line arguments"),		\
>  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
>  			"Firmware image to boot in virtual machine"),	\
> +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
> +			"Flash image to present to virtual machine"),	\
>  									\
>  	OPT_GROUP("Networking options:"),				\
>  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> new file mode 100644
> index 00000000..d7c0e7e8
> --- /dev/null
> +++ b/hw/cfi_flash.c
> @@ -0,0 +1,546 @@
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <linux/bitops.h>
> +#include <linux/err.h>
> +#include <linux/sizes.h>
> +#include <linux/types.h>
> +
> +#include "kvm/kvm.h"
> +#include "kvm/kvm-arch.h"
> +#include "kvm/devices.h"
> +#include "kvm/fdt.h"
> +#include "kvm/mutex.h"
> +#include "kvm/util.h"
> +
> +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
> +#define CFI_NR_FLASH_CHIPS			2
> +
> +/* We always emulate a 32 bit bus width. */
> +#define CFI_BUS_WIDTH				4
> +
> +/* The *effective* size of an erase block (over all chips) */
> +#define FLASH_BLOCK_SIZE			SZ_64K
> +
> +#define PROGRAM_BUFF_SIZE_BITS			7
> +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)

Just making sure this is not an off-by-one error. The buffer size is 2^7 = 128
words, which makes it 512 bytes, right?

> +
> +/* CFI commands */
> +#define CFI_CMD_LOCK_BLOCK			0x01
> +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
> +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
> +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
> +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
> +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
> +#define CFI_CMD_READ_STATUS_REGISTER		0x70
> +#define CFI_CMD_READ_JEDEC			0x90
> +#define CFI_CMD_READ_CFI_QUERY			0x98
> +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
> +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
> +#define CFI_CMD_UNLOCK_BLOCK			0xd0
> +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
> +#define CFI_CMD_READ_ARRAY			0xff
> +
> +/*
> + * CFI query table contents, as far as it is constant.
> + */
> +#define CFI_GEOM_OFFSET				0x27
> +static u8 cfi_query_table[] = {
> +		/* offset 0x10: CFI query identification string */
> +	'Q', 'R', 'Y',		/* ID string */
> +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
> +	0x31, 0x00,		/* address of primary extended query table */
> +	0x00, 0x00,		/* alternative command set: unused */
> +	0x00, 0x00,		/* address of alternative extended query table*/
> +		/* offset 0x1b: system interface information */
> +	0x45,			/* minimum Vcc voltage: 4.5V */
> +	0x55,			/* maximum Vcc voltage: 5.5V */
> +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
> +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
> +	0x01,			/* timeout for single word program: 2 us */
> +	0x01,			/* timeout for multi-byte program: 2 us */
> +	0x01,			/* timeout for block erase: 2 ms */
> +	0x00,			/* timeout for full chip erase: not supported */
> +	0x00,			/* max timeout for single word program: 1x */
> +	0x00,			/* max timeout for mulit-byte program: 1x */
> +	0x00,			/* max timeout for block erase: 1x */
> +	0x00,			/* max timeout for chip erase: not supported */
> +		/* offset 0x27: flash geometry information */
> +	0x00,			/* size in power-of-2 bytes, filled later */
> +	0x06, 0x00,		/* interface description: 32 and 16 bits */
> +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> +				/* number of multi-byte writes */
> +	0x01,			/* one erase block region */
> +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> +		/* offset 0x31: Intel primary algorithm extended query table */
> +	'P', 'R', 'I',
> +	'1', '0',		/* version 1.0 */
> +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> +	0x00,			/* no functions after suspend */
> +	0x01, 0x00,		/* only lock bit supported */
> +	0x50,			/* best Vcc value: 5.0V */
> +	0x00,			/* best Vpp value: 0.0V (unused) */
> +	0x01,			/* number of protection register fields */
> +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
> +};
> +
> +
> +/*
> + * Those states represent a subset of the CFI flash state machine.
> + */
> +enum cfi_flash_state {
> +	READY,
> +	LOCK_SETUP,
> +	WP_SETUP,
> +	BP_SETUP,
> +	BP_LOAD,
> +	ERASE_SETUP,
> +};
> +
> +/*
> + * The device can be in several **Read** modes.
> + * We don't implement the asynchronous burst mode.
> + */
> +enum cfi_read_mode {
> +	READ_ARRAY,
> +	READ_STATUS,
> +	READ_DEVICE_ID,
> +	READ_QUERY,
> +};
> +
> +struct cfi_flash_device {
> +	struct device_header	dev_hdr;
> +	/* Protects the CFI state machine variables in this data structure. */
> +	struct mutex		mutex;
> +	u64			base_addr;
> +	u32			size;
> +
> +	void			*flash_memory;
> +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];

You're multiplying by 4 because PROGRAM_BUFF_SIZE is the size of the buffer in
words, right?

> +	unsigned long		*lock_bm;
> +	u64			last_address;
> +	unsigned int		buff_written;
> +	unsigned int		program_length;
> +
> +	enum cfi_flash_state	state;
> +	enum cfi_read_mode	read_mode;
> +	u16			rcr;
> +	u8			sr;
> +};
> +
> +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> +{
> +	return sfdev->size / FLASH_BLOCK_SIZE;
> +}
> +
> +/*
> + * CFI queries always deal with one byte of information, possibly mirrored
> + * to other bytes on the bus. This is dealt with in the callers.
> + * The address provided is the one for 8-bit addressing, and would need to
> + * be adjusted for wider accesses.
> + */
> +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	if (addr < 0x10)		/* CFI information starts at 0x10 */
> +		return 0;
> +
> +	if (addr - 0x10 > sizeof(cfi_query_table)) {
> +		pr_debug("CFI query read access beyond the end of table");
> +		return 0;
> +	}
> +
> +	/* Fixup dynamic information in the geometry part of the table. */
> +	switch (addr) {
> +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
> +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
> +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
> +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
> +	case CFI_GEOM_OFFSET + 7:
> +		return (nr_erase_blocks(sfdev) - 1) >> 8;
> +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
> +	case CFI_GEOM_OFFSET + 9:
> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> +	}
> +
> +	return cfi_query_table[addr - 0x10];
> +}
> +
> +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> +
> +	return test_bit(block_nr, sfdev->lock_bm);
> +}
> +
> +#define DEV_ID_MASK 0x7ff
> +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
> +	case 0x0:				/* vendor ID */
> +		return 0x0000;
> +	case 0x1:				/* device ID */
> +		return 0xffff;
> +	case 0x2:
> +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
> +	case 0x5:
> +		return sfdev->rcr;
> +	default:			/* Ignore the other entries. */
> +		return 0;
> +	}
> +}
> +
> +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
> +{
> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> +
> +	if (lock)
> +		set_bit(block_nr, sfdev->lock_bm);
> +	else
> +		clear_bit(block_nr, sfdev->lock_bm);
> +}
> +
> +static void word_program(struct cfi_flash_device *sfdev,
> +			 u64 addr, void *data, int len)
> +{
> +	if (block_is_locked(sfdev, addr)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +
> +	memcpy(sfdev->flash_memory + addr, data, len);
> +}
> +
> +/* Reset the program buffer state to prepare for follow-up writes. */
> +static void buffer_setup(struct cfi_flash_device *sfdev)
> +{
> +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
> +	sfdev->last_address = ~0ULL;
> +	sfdev->buff_written = 0;
> +}
> +
> +static bool buffer_program(struct cfi_flash_device *sfdev,
> +			   u64 addr, void *buffer, int len)
> +{
> +	unsigned int buf_addr;
> +
> +	if (sfdev->buff_written >= sfdev->program_length)
> +		return false;
> +
> +	/*
> +	 * The first word written into the buffer after the setup command
> +	 * happens to be the base address for the buffer.
> +	 * All subsequent writes need to be within this address and this
> +	 * address plus the buffer size, so keep this value around.
> +	 */
> +	if (sfdev->last_address == ~0ULL)
> +		sfdev->last_address = addr;
> +
> +	if (addr < sfdev->last_address)
> +		return false;
> +	buf_addr = addr - sfdev->last_address;
> +	if (buf_addr >= PROGRAM_BUFF_SIZE)
> +		return false;
> +
> +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
> +	sfdev->buff_written++;
> +
> +	return true;
> +}
> +
> +static void buffer_confirm(struct cfi_flash_device *sfdev)
> +{
> +	if (block_is_locked(sfdev, sfdev->last_address)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +	memcpy(sfdev->flash_memory + sfdev->last_address,
> +	       sfdev->program_buffer,
> +	       sfdev->buff_written * sizeof(u32));
> +}
> +
> +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	if (block_is_locked(sfdev, addr)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +
> +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
> +}
> +
> +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
> +			   u64 addr, u8 *data, u32 len, u8 is_write,
> +			   void *context)
> +{
> +	struct cfi_flash_device *sfdev = context;
> +	u64 faddr = addr - sfdev->base_addr;
> +	u32 value;
> +
> +	if (!is_write) {
> +		u16 cfi_value = 0;
> +
> +		mutex_lock(&sfdev->mutex);
> +
> +		switch (sfdev->read_mode) {
> +		case READ_ARRAY:
> +			/* just copy the requested bytes from the array */
> +			memcpy(data, sfdev->flash_memory + faddr, len);
> +			goto out_unlock;
> +		case READ_STATUS:
> +			cfi_value = sfdev->sr;
> +			break;
> +		case READ_DEVICE_ID:
> +			cfi_value = read_dev_id(sfdev, faddr);
> +			break;
> +		case READ_QUERY:
> +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
> +			break;
> +		}
> +		switch (len) {
> +		case 1:
> +			*data = cfi_value;
> +			break;
> +		case 8: memset(data + 4, 0, 4);
> +			/* fall-through */
> +		case 4:
> +			if (CFI_NR_FLASH_CHIPS == 2)
> +				memcpy(data + 2, &cfi_value, 2);
> +			else
> +				memset(data + 2, 0, 2);
> +			/* fall-through */
> +		case 2:
> +			memcpy(data, &cfi_value, 2);
> +			break;
> +		default:
> +			pr_debug("CFI flash: illegal access length %d for read mode %d",
> +				 len, sfdev->read_mode);
> +			break;
> +		}
> +
> +		goto out_unlock;
> +	}
> +
> +	if (len > 4) {
> +		pr_info("CFI flash: MMIO %d-bit write access not supported",
> +			 len * 8);
> +		return;
> +	}
> +
> +	memcpy(&value, data, len);
> +
> +	mutex_lock(&sfdev->mutex);
> +
> +	switch (sfdev->state) {
> +	case READY:			/* handled below */
> +		break;
> +
> +	case LOCK_SETUP:
> +		switch (value & 0xff) {
> +		case CFI_CMD_LOCK_BLOCK:
> +			lock_block(sfdev, faddr, true);
> +			sfdev->read_mode = READ_STATUS;
> +			break;
> +		case CFI_CMD_UNLOCK_BLOCK:
> +			lock_block(sfdev, faddr, false);
> +			sfdev->read_mode = READ_STATUS;
> +			break;
> +		default:
> +			sfdev->sr |= 0x30;
> +			break;
> +		}
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case WP_SETUP:
> +		word_program(sfdev, faddr, data, len);
> +		sfdev->read_mode = READ_STATUS;
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case BP_LOAD:
> +		if (buffer_program(sfdev, faddr, data, len))
> +			goto out_unlock;
> +
> +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
> +			buffer_confirm(sfdev);
> +			sfdev->read_mode = READ_STATUS;
> +		} else {
> +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
> +				 value, faddr);
> +			sfdev->sr |= 0x10;
> +		}
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case BP_SETUP:
> +		sfdev->program_length = (value & 0xffff) + 1;
> +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
> +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
> +		sfdev->state = BP_LOAD;
> +		sfdev->read_mode = READ_STATUS;
> +		goto out_unlock;
> +
> +	case ERASE_SETUP:
> +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
> +			block_erase_confirm(sfdev, faddr);
> +		else
> +			sfdev->sr |= 0x30;
> +
> +		sfdev->state = READY;
> +		sfdev->read_mode = READ_STATUS;
> +		goto out_unlock;
> +	}
> +
> +	/* write commands in READY state */
> +	switch (value & 0xFF) {
> +	case CFI_CMD_READ_JEDEC:
> +		sfdev->read_mode = READ_DEVICE_ID;
> +		break;
> +	case CFI_CMD_READ_STATUS_REGISTER:
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_READ_CFI_QUERY:
> +		sfdev->read_mode = READ_QUERY;
> +		break;
> +	case CFI_CMD_CLEAR_STATUS_REGISTER:
> +		sfdev->sr = 0x80;
> +		break;
> +	case CFI_CMD_WORD_PROGRAM_SETUP:
> +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
> +		sfdev->state = WP_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_LOCK_BLOCK_SETUP:
> +		sfdev->state = LOCK_SETUP;
> +		break;
> +	case CFI_CMD_BLOCK_ERASE_SETUP:
> +		sfdev->state = ERASE_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
> +		buffer_setup(sfdev);
> +		sfdev->state = BP_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
> +		pr_debug("CFI flash: unexpected confirm command 0xD0");
> +		break;
> +	default:
> +		pr_debug("CFI flash: unknown command 0x%x", value);
> +		/* fall through */

Above (in the read case), you wrote it "fall-through".

> +	case CFI_CMD_READ_ARRAY:
> +		sfdev->read_mode = READ_ARRAY;
> +		break;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&sfdev->mutex);
> +}

The function is huge and complicated. How about splitting it into a read and write
function, at the very least?

> +
> +#ifdef CONFIG_HAS_LIBFDT
> +static void generate_cfi_flash_fdt_node(void *fdt,
> +					struct device_header *dev_hdr,
> +					void (*generate_irq_prop)(void *fdt,
> +								  u8 irq,
> +								enum irq_type))
> +{
> +	struct cfi_flash_device *sfdev;
> +	u64 reg_prop[2];
> +
> +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
> +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
> +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
> +
> +	_FDT(fdt_begin_node(fdt, "flash"));
> +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
> +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
> +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
> +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
> +	_FDT(fdt_end_node(fdt));
> +}
> +#else
> +#define generate_cfi_flash_fdt_node NULL
> +#endif
> +
> +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
> +							 const char *filename)
> +{
> +	struct cfi_flash_device *sfdev;
> +	struct stat statbuf;

Here you're using "buf" as shorthand for "buffer", but at the top of the file
(PROGRAM_BUFF_*) you use "buff".

> +	unsigned int value;
> +	int ret;
> +	int fd;
> +
> +	fd = open(filename, O_RDWR);
> +	if (fd < 0)
> +		return ERR_PTR(-errno);
> +	if (fstat(fd, &statbuf) < 0) {
> +		close(fd);
> +		return ERR_PTR(-errno);
> +	}
> +
> +	sfdev = malloc(sizeof(struct cfi_flash_device));
> +	if (!sfdev) {
> +		close(fd);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
> +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
> +				   PROT_READ | PROT_WRITE, MAP_SHARED,
> +				   fd, 0);
> +	if (sfdev->flash_memory == MAP_FAILED) {
> +		close(fd);
> +		free(sfdev);
> +		return ERR_PTR(-errno);
> +	}
> +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
> +	sfdev->state = READY;
> +	sfdev->read_mode = READ_ARRAY;
> +	sfdev->sr = 0x80;
> +	sfdev->rcr = 0xbfcf;
> +
> +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
> +	sfdev->lock_bm = malloc(value);
> +	memset(sfdev->lock_bm, 0, value);
> +
> +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
> +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
> +	mutex_init(&sfdev->mutex);
> +	ret = device__register(&sfdev->dev_hdr);
> +	if (ret) {
> +		free(sfdev->flash_memory);

That's strange, I wrote a quick test for this and free'ing a a file-backed mmap'ed
memory resulted in a segmentation fault. Did you mean munmap? Also, if mmap'ing
the flash fails, you close the file descriptor, which you don't do here. To be
honest, I think the best approach would be to add all the cleaning up  after the
return statement and a series of labels to jump to depending where you got an
error (similar to virtio__pci_init).

> +		free(sfdev);
> +		return ERR_PTR(ret);
> +	}
> +
> +	ret = kvm__register_mmio(kvm,
> +				 sfdev->base_addr, sfdev->size,
> +				 false, cfi_flash_mmio, sfdev);
> +	if (ret) {
> +		device__unregister(&sfdev->dev_hdr);
> +		free(sfdev->flash_memory);
> +		free(sfdev);
> +		return ERR_PTR(ret);
> +	}
> +
> +	return sfdev;
> +}
> +
> +static int flash__init(struct kvm *kvm)
> +{
> +	struct cfi_flash_device *sfdev;
> +
> +	if (!kvm->cfg.flash_filename)
> +		return 0;
> +
> +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
> +	if (IS_ERR(sfdev))
> +		return PTR_ERR(sfdev);
> +
> +	return 0;
> +}
> +dev_init(flash__init);
> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
> index a052b0bc..f4a8b831 100644
> --- a/include/kvm/kvm-config.h
> +++ b/include/kvm/kvm-config.h
> @@ -35,6 +35,7 @@ struct kvm_config {
>  	const char *vmlinux_filename;
>  	const char *initrd_filename;
>  	const char *firmware_filename;
> +	const char *flash_filename;
>  	const char *console;
>  	const char *dev;
>  	const char *network;
> diff --git a/include/kvm/util.h b/include/kvm/util.h
> index 4ca7aa93..5c37f0b7 100644
> --- a/include/kvm/util.h
> +++ b/include/kvm/util.h
> @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
>  	return x ? 1UL << fls_long(x - 1) : 0;
>  }
>  
> +static inline int pow2_size(unsigned long x)
> +{
> +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
> +}

For the life of me I can't understand what this function is supposed to do. Also,
from the gcc online docs:

"Returns the number of leading 0-bits in x, starting at the most significant bit
position. If xis 0, the result is undefined."

you might want to add a special case for x == 1.

Thanks,
Alex
> +
>  struct kvm;
>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-07 17:34 ` Alexandru Elisei
@ 2020-02-14 13:47   ` Andre Przywara
  2020-02-14 15:38     ` Alexandru Elisei
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Przywara @ 2020-02-14 13:47 UTC (permalink / raw)
  To: Alexandru Elisei
  Cc: Raphael Gault, Sami Mujawar, Will Deacon, kvmarm, linux-arm-kernel

On Fri, 7 Feb 2020 17:34:20 +0000
Alexandru Elisei <alexandru.elisei@arm.com> wrote:

Hi Alex,

many thanks for having a look!

> I'm going to do my best to review your patch :) I'll do it in chunks, because it's
> pretty large, and definitely not trivial.

OK, replying here, and having it mostly fixed already.
Will wait for further replies before a re-post, unless you want to benefit from the split MMIO function, which should make reviewing the state machine easier. Just let me know.

> On 2/7/20 12:19 PM, Andre Przywara wrote:
> > From: Raphael Gault <raphael.gault@arm.com>
> >
> > The EDK II UEFI firmware implementation requires some storage for the EFI
> > variables, which is typically some flash storage.
> > Since this is already supported on the EDK II side, we add a CFI flash
> > emulation to kvmtool.
> > This is backed by a file, specified via the --flash or -F command line
> > option. Any flash writes done by the guest will immediately be reflected
> > into this file (kvmtool mmap's the file).
> >
> > This implements a CFI flash using the "Intel/Sharp extended command
> > set", as specified in:
> > - JEDEC JESD68.01
> > - JEDEC JEP137B
> > - Intel Application Note 646
> > Some gaps in those specs have been filled by looking at real devices and
> > other implementations (QEMU, Linux kernel driver).
> >
> > At the moment this relies on DT to advertise the base address of the
> > flash memory (mapped into the MMIO address space) and is only enabled
> > for ARM/ARM64. The emulation itself is architecture agnostic, though.
> >
> > This is one missing piece toward a working UEFI boot with kvmtool on
> > ARM guests, the other is to provide writable PCI BARs, which is WIP.
> >
> > Signed-off-by: Raphael Gault <raphael.gault@arm.com>
> > [Andre: rewriting and fixing]
> > Signed-off-by: Andre Przywra <andre.przywara@arm.com>
> > ---
> > Hi,
> >
> > an update addressing Will's comments. I added coarse grained locking
> > to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> > the internal CFI flash state machine.
> > I also folded the actual flash array read access into the MMIO handler
> > and fixed the other small issues.
> >
> > Cheers,
> > Andre
> >
> >  Makefile                          |   6 +
> >  arm/include/arm-common/kvm-arch.h |   3 +
> >  builtin-run.c                     |   2 +
> >  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
> >  include/kvm/kvm-config.h          |   1 +
> >  include/kvm/util.h                |   5 +
> >  6 files changed, 563 insertions(+)
> >  create mode 100644 hw/cfi_flash.c
> >
> > diff --git a/Makefile b/Makefile
> > index 3862112c..7ed6fb5e 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
> >  	CFLAGS		+= -march=armv7-a
> >  
> >  	ARCH_WANT_LIBFDT := y
> > +	ARCH_HAS_FLASH_MEM := y
> >  endif
> >  
> >  # ARM64
> > @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
> >  	ARCH_INCLUDE	+= -Iarm/aarch64/include
> >  
> >  	ARCH_WANT_LIBFDT := y
> > +	ARCH_HAS_FLASH_MEM := y
> >  endif
> >  
> >  ifeq ($(ARCH),mips)
> > @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
> >  	endif
> >  endif
> >  
> > +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> > +	OBJS	+= hw/cfi_flash.o
> > +endif
> > +
> >  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
> >  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
> >  	LIBS_DYNOPT	+= -lz
> > diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
> > index b9d486d5..2bb085f4 100644
> > --- a/arm/include/arm-common/kvm-arch.h
> > +++ b/arm/include/arm-common/kvm-arch.h
> > @@ -21,6 +21,9 @@
> >  #define ARM_GIC_DIST_SIZE	0x10000
> >  #define ARM_GIC_CPUI_SIZE	0x20000
> >  
> > +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
> > +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE  
> 
> Each time I try to read the memory layout for ARM I get a headache. According to
> my calculations, this falls right inside ARM_MMIO_AREA, right? Any particular
> reason for choosing this address? Why not carve its own dedicate area, so we won't
> run the highly unlikely risk that it will be overwritten, since it's in the MMIO
> allocation area?

The EDK2 build I used has the base address fixed at 32MB. So I just used this address here. Sami is working on making this flexible as we speak, but it's not easy due to some EDK-2 design issues.
As an interim measure I would try to describe this using the existing MMIO layout macros, to at least avoid overlaps with virtio-mmio.
I actually might move that address to the beginning for now, as 32MB is currently in the middle of the MMIO area.
QEMU has that hardcoded (both in QEMU and EDK-2) as well, btw.
 
> > +
> >  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
> >  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))  
> 
> That's not correct anymore, because flash memory is in the ARM_MMIO_AREA.

True, I will try to find the right place for this. Somewhat problematic is the differing size, but we could just impose an upper limit on this.

> >  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
> > diff --git a/builtin-run.c b/builtin-run.c
> > index f8dc6c72..df8c6741 100644
> > --- a/builtin-run.c
> > +++ b/builtin-run.c
> > @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
> >  			"Kernel command line arguments"),		\
> >  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
> >  			"Firmware image to boot in virtual machine"),	\
> > +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
> > +			"Flash image to present to virtual machine"),	\
> >  									\
> >  	OPT_GROUP("Networking options:"),				\
> >  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
> > diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> > new file mode 100644
> > index 00000000..d7c0e7e8
> > --- /dev/null
> > +++ b/hw/cfi_flash.c
> > @@ -0,0 +1,546 @@
> > +#include <stdbool.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <linux/bitops.h>
> > +#include <linux/err.h>
> > +#include <linux/sizes.h>
> > +#include <linux/types.h>
> > +
> > +#include "kvm/kvm.h"
> > +#include "kvm/kvm-arch.h"
> > +#include "kvm/devices.h"
> > +#include "kvm/fdt.h"
> > +#include "kvm/mutex.h"
> > +#include "kvm/util.h"
> > +
> > +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
> > +#define CFI_NR_FLASH_CHIPS			2
> > +
> > +/* We always emulate a 32 bit bus width. */
> > +#define CFI_BUS_WIDTH				4
> > +
> > +/* The *effective* size of an erase block (over all chips) */
> > +#define FLASH_BLOCK_SIZE			SZ_64K
> > +
> > +#define PROGRAM_BUFF_SIZE_BITS			7
> > +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)  
> 
> Just making sure this is not an off-by-one error. The buffer size is 2^7 = 128
> words, which makes it 512 bytes, right?

Looks like it ;-)
The reason this is presented in this rather awkward way here is that we need the number of bits to be presented in the CFI query structure later on.
I will add a comment pointing out this is in units of "words" - after double checking that it really is ;-)
 
> > +
> > +/* CFI commands */
> > +#define CFI_CMD_LOCK_BLOCK			0x01
> > +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
> > +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
> > +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
> > +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
> > +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
> > +#define CFI_CMD_READ_STATUS_REGISTER		0x70
> > +#define CFI_CMD_READ_JEDEC			0x90
> > +#define CFI_CMD_READ_CFI_QUERY			0x98
> > +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
> > +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
> > +#define CFI_CMD_UNLOCK_BLOCK			0xd0
> > +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
> > +#define CFI_CMD_READ_ARRAY			0xff
> > +
> > +/*
> > + * CFI query table contents, as far as it is constant.
> > + */
> > +#define CFI_GEOM_OFFSET				0x27
> > +static u8 cfi_query_table[] = {
> > +		/* offset 0x10: CFI query identification string */
> > +	'Q', 'R', 'Y',		/* ID string */
> > +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
> > +	0x31, 0x00,		/* address of primary extended query table */
> > +	0x00, 0x00,		/* alternative command set: unused */
> > +	0x00, 0x00,		/* address of alternative extended query table*/
> > +		/* offset 0x1b: system interface information */
> > +	0x45,			/* minimum Vcc voltage: 4.5V */
> > +	0x55,			/* maximum Vcc voltage: 5.5V */
> > +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
> > +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
> > +	0x01,			/* timeout for single word program: 2 us */
> > +	0x01,			/* timeout for multi-byte program: 2 us */
> > +	0x01,			/* timeout for block erase: 2 ms */
> > +	0x00,			/* timeout for full chip erase: not supported */
> > +	0x00,			/* max timeout for single word program: 1x */
> > +	0x00,			/* max timeout for mulit-byte program: 1x */
> > +	0x00,			/* max timeout for block erase: 1x */
> > +	0x00,			/* max timeout for chip erase: not supported */
> > +		/* offset 0x27: flash geometry information */
> > +	0x00,			/* size in power-of-2 bytes, filled later */
> > +	0x06, 0x00,		/* interface description: 32 and 16 bits */
> > +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> > +				/* number of multi-byte writes */
> > +	0x01,			/* one erase block region */
> > +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> > +		/* offset 0x31: Intel primary algorithm extended query table */
> > +	'P', 'R', 'I',
> > +	'1', '0',		/* version 1.0 */
> > +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> > +	0x00,			/* no functions after suspend */
> > +	0x01, 0x00,		/* only lock bit supported */
> > +	0x50,			/* best Vcc value: 5.0V */
> > +	0x00,			/* best Vpp value: 0.0V (unused) */
> > +	0x01,			/* number of protection register fields */
> > +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
> > +};
> > +
> > +
> > +/*
> > + * Those states represent a subset of the CFI flash state machine.
> > + */
> > +enum cfi_flash_state {
> > +	READY,
> > +	LOCK_SETUP,
> > +	WP_SETUP,
> > +	BP_SETUP,
> > +	BP_LOAD,
> > +	ERASE_SETUP,
> > +};
> > +
> > +/*
> > + * The device can be in several **Read** modes.
> > + * We don't implement the asynchronous burst mode.
> > + */
> > +enum cfi_read_mode {
> > +	READ_ARRAY,
> > +	READ_STATUS,
> > +	READ_DEVICE_ID,
> > +	READ_QUERY,
> > +};
> > +
> > +struct cfi_flash_device {
> > +	struct device_header	dev_hdr;
> > +	/* Protects the CFI state machine variables in this data structure. */
> > +	struct mutex		mutex;
> > +	u64			base_addr;
> > +	u32			size;
> > +
> > +	void			*flash_memory;
> > +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];  
> 
> You're multiplying by 4 because PROGRAM_BUFF_SIZE is the size of the buffer in
> words, right?

Yeah, I can use "sizeof(u32)" if that is better.

> > +	unsigned long		*lock_bm;
> > +	u64			last_address;
> > +	unsigned int		buff_written;
> > +	unsigned int		program_length;
> > +
> > +	enum cfi_flash_state	state;
> > +	enum cfi_read_mode	read_mode;
> > +	u16			rcr;
> > +	u8			sr;
> > +};
> > +
> > +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> > +{
> > +	return sfdev->size / FLASH_BLOCK_SIZE;
> > +}
> > +
> > +/*
> > + * CFI queries always deal with one byte of information, possibly mirrored
> > + * to other bytes on the bus. This is dealt with in the callers.
> > + * The address provided is the one for 8-bit addressing, and would need to
> > + * be adjusted for wider accesses.
> > + */
> > +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	if (addr < 0x10)		/* CFI information starts at 0x10 */
> > +		return 0;
> > +
> > +	if (addr - 0x10 > sizeof(cfi_query_table)) {
> > +		pr_debug("CFI query read access beyond the end of table");
> > +		return 0;
> > +	}
> > +
> > +	/* Fixup dynamic information in the geometry part of the table. */
> > +	switch (addr) {
> > +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
> > +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
> > +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
> > +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
> > +	case CFI_GEOM_OFFSET + 7:
> > +		return (nr_erase_blocks(sfdev) - 1) >> 8;
> > +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
> > +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
> > +	case CFI_GEOM_OFFSET + 9:
> > +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> > +	}
> > +
> > +	return cfi_query_table[addr - 0x10];
> > +}
> > +
> > +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	int block_nr = addr / FLASH_BLOCK_SIZE;
> > +
> > +	return test_bit(block_nr, sfdev->lock_bm);
> > +}
> > +
> > +#define DEV_ID_MASK 0x7ff
> > +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
> > +	case 0x0:				/* vendor ID */
> > +		return 0x0000;
> > +	case 0x1:				/* device ID */
> > +		return 0xffff;
> > +	case 0x2:
> > +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
> > +	case 0x5:
> > +		return sfdev->rcr;
> > +	default:			/* Ignore the other entries. */
> > +		return 0;
> > +	}
> > +}
> > +
> > +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
> > +{
> > +	int block_nr = addr / FLASH_BLOCK_SIZE;
> > +
> > +	if (lock)
> > +		set_bit(block_nr, sfdev->lock_bm);
> > +	else
> > +		clear_bit(block_nr, sfdev->lock_bm);
> > +}
> > +
> > +static void word_program(struct cfi_flash_device *sfdev,
> > +			 u64 addr, void *data, int len)
> > +{
> > +	if (block_is_locked(sfdev, addr)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +
> > +	memcpy(sfdev->flash_memory + addr, data, len);
> > +}
> > +
> > +/* Reset the program buffer state to prepare for follow-up writes. */
> > +static void buffer_setup(struct cfi_flash_device *sfdev)
> > +{
> > +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
> > +	sfdev->last_address = ~0ULL;
> > +	sfdev->buff_written = 0;
> > +}
> > +
> > +static bool buffer_program(struct cfi_flash_device *sfdev,
> > +			   u64 addr, void *buffer, int len)
> > +{
> > +	unsigned int buf_addr;
> > +
> > +	if (sfdev->buff_written >= sfdev->program_length)
> > +		return false;
> > +
> > +	/*
> > +	 * The first word written into the buffer after the setup command
> > +	 * happens to be the base address for the buffer.
> > +	 * All subsequent writes need to be within this address and this
> > +	 * address plus the buffer size, so keep this value around.
> > +	 */
> > +	if (sfdev->last_address == ~0ULL)
> > +		sfdev->last_address = addr;
> > +
> > +	if (addr < sfdev->last_address)
> > +		return false;
> > +	buf_addr = addr - sfdev->last_address;
> > +	if (buf_addr >= PROGRAM_BUFF_SIZE)
> > +		return false;
> > +
> > +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
> > +	sfdev->buff_written++;
> > +
> > +	return true;
> > +}
> > +
> > +static void buffer_confirm(struct cfi_flash_device *sfdev)
> > +{
> > +	if (block_is_locked(sfdev, sfdev->last_address)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +	memcpy(sfdev->flash_memory + sfdev->last_address,
> > +	       sfdev->program_buffer,
> > +	       sfdev->buff_written * sizeof(u32));
> > +}
> > +
> > +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	if (block_is_locked(sfdev, addr)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +
> > +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
> > +}
> > +
> > +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
> > +			   u64 addr, u8 *data, u32 len, u8 is_write,
> > +			   void *context)
> > +{
> > +	struct cfi_flash_device *sfdev = context;
> > +	u64 faddr = addr - sfdev->base_addr;
> > +	u32 value;
> > +
> > +	if (!is_write) {
> > +		u16 cfi_value = 0;
> > +
> > +		mutex_lock(&sfdev->mutex);
> > +
> > +		switch (sfdev->read_mode) {
> > +		case READ_ARRAY:
> > +			/* just copy the requested bytes from the array */
> > +			memcpy(data, sfdev->flash_memory + faddr, len);
> > +			goto out_unlock;
> > +		case READ_STATUS:
> > +			cfi_value = sfdev->sr;
> > +			break;
> > +		case READ_DEVICE_ID:
> > +			cfi_value = read_dev_id(sfdev, faddr);
> > +			break;
> > +		case READ_QUERY:
> > +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
> > +			break;
> > +		}
> > +		switch (len) {
> > +		case 1:
> > +			*data = cfi_value;
> > +			break;
> > +		case 8: memset(data + 4, 0, 4);
> > +			/* fall-through */
> > +		case 4:
> > +			if (CFI_NR_FLASH_CHIPS == 2)
> > +				memcpy(data + 2, &cfi_value, 2);
> > +			else
> > +				memset(data + 2, 0, 2);
> > +			/* fall-through */
> > +		case 2:
> > +			memcpy(data, &cfi_value, 2);
> > +			break;
> > +		default:
> > +			pr_debug("CFI flash: illegal access length %d for read mode %d",
> > +				 len, sfdev->read_mode);
> > +			break;
> > +		}
> > +
> > +		goto out_unlock;
> > +	}
> > +
> > +	if (len > 4) {
> > +		pr_info("CFI flash: MMIO %d-bit write access not supported",
> > +			 len * 8);
> > +		return;
> > +	}
> > +
> > +	memcpy(&value, data, len);
> > +
> > +	mutex_lock(&sfdev->mutex);
> > +
> > +	switch (sfdev->state) {
> > +	case READY:			/* handled below */
> > +		break;
> > +
> > +	case LOCK_SETUP:
> > +		switch (value & 0xff) {
> > +		case CFI_CMD_LOCK_BLOCK:
> > +			lock_block(sfdev, faddr, true);
> > +			sfdev->read_mode = READ_STATUS;
> > +			break;
> > +		case CFI_CMD_UNLOCK_BLOCK:
> > +			lock_block(sfdev, faddr, false);
> > +			sfdev->read_mode = READ_STATUS;
> > +			break;
> > +		default:
> > +			sfdev->sr |= 0x30;
> > +			break;
> > +		}
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case WP_SETUP:
> > +		word_program(sfdev, faddr, data, len);
> > +		sfdev->read_mode = READ_STATUS;
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case BP_LOAD:
> > +		if (buffer_program(sfdev, faddr, data, len))
> > +			goto out_unlock;
> > +
> > +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
> > +			buffer_confirm(sfdev);
> > +			sfdev->read_mode = READ_STATUS;
> > +		} else {
> > +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
> > +				 value, faddr);
> > +			sfdev->sr |= 0x10;
> > +		}
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case BP_SETUP:
> > +		sfdev->program_length = (value & 0xffff) + 1;
> > +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
> > +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
> > +		sfdev->state = BP_LOAD;
> > +		sfdev->read_mode = READ_STATUS;
> > +		goto out_unlock;
> > +
> > +	case ERASE_SETUP:
> > +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
> > +			block_erase_confirm(sfdev, faddr);
> > +		else
> > +			sfdev->sr |= 0x30;
> > +
> > +		sfdev->state = READY;
> > +		sfdev->read_mode = READ_STATUS;
> > +		goto out_unlock;
> > +	}
> > +
> > +	/* write commands in READY state */
> > +	switch (value & 0xFF) {
> > +	case CFI_CMD_READ_JEDEC:
> > +		sfdev->read_mode = READ_DEVICE_ID;
> > +		break;
> > +	case CFI_CMD_READ_STATUS_REGISTER:
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_READ_CFI_QUERY:
> > +		sfdev->read_mode = READ_QUERY;
> > +		break;
> > +	case CFI_CMD_CLEAR_STATUS_REGISTER:
> > +		sfdev->sr = 0x80;
> > +		break;
> > +	case CFI_CMD_WORD_PROGRAM_SETUP:
> > +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
> > +		sfdev->state = WP_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_LOCK_BLOCK_SETUP:
> > +		sfdev->state = LOCK_SETUP;
> > +		break;
> > +	case CFI_CMD_BLOCK_ERASE_SETUP:
> > +		sfdev->state = ERASE_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
> > +		buffer_setup(sfdev);
> > +		sfdev->state = BP_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
> > +		pr_debug("CFI flash: unexpected confirm command 0xD0");
> > +		break;
> > +	default:
> > +		pr_debug("CFI flash: unknown command 0x%x", value);
> > +		/* fall through */  
> 
> Above (in the read case), you wrote it "fall-through".

GCC has a list of allowed spellings, and both versions are in it ;-)
But sure will fix this ...
 
> > +	case CFI_CMD_READ_ARRAY:
> > +		sfdev->read_mode = READ_ARRAY;
> > +		break;
> > +	}
> > +
> > +out_unlock:
> > +	mutex_unlock(&sfdev->mutex);
> > +}  
> 
> The function is huge and complicated. How about splitting it into a read and write
> function, at the very least?

Good point. Looks like "write command in READY state" should be separate as well, since it's only doing state transitions.

> > +
> > +#ifdef CONFIG_HAS_LIBFDT
> > +static void generate_cfi_flash_fdt_node(void *fdt,
> > +					struct device_header *dev_hdr,
> > +					void (*generate_irq_prop)(void *fdt,
> > +								  u8 irq,
> > +								enum irq_type))
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +	u64 reg_prop[2];
> > +
> > +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
> > +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
> > +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
> > +
> > +	_FDT(fdt_begin_node(fdt, "flash"));
> > +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
> > +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> > +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
> > +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
> > +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
> > +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
> > +	_FDT(fdt_end_node(fdt));
> > +}
> > +#else
> > +#define generate_cfi_flash_fdt_node NULL
> > +#endif
> > +
> > +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
> > +							 const char *filename)
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +	struct stat statbuf;  
> 
> Here you're using "buf" as shorthand for "buffer", but at the top of the file
> (PROGRAM_BUFF_*) you use "buff".

I guess because one was written by me, the other by Raphael ;-)
Will consolidate this.

> 
> > +	unsigned int value;
> > +	int ret;
> > +	int fd;
> > +
> > +	fd = open(filename, O_RDWR);
> > +	if (fd < 0)
> > +		return ERR_PTR(-errno);
> > +	if (fstat(fd, &statbuf) < 0) {
> > +		close(fd);
> > +		return ERR_PTR(-errno);
> > +	}
> > +
> > +	sfdev = malloc(sizeof(struct cfi_flash_device));
> > +	if (!sfdev) {
> > +		close(fd);
> > +		return ERR_PTR(-ENOMEM);
> > +	}
> > +
> > +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
> > +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
> > +				   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +				   fd, 0);
> > +	if (sfdev->flash_memory == MAP_FAILED) {
> > +		close(fd);
> > +		free(sfdev);
> > +		return ERR_PTR(-errno);
> > +	}
> > +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
> > +	sfdev->state = READY;
> > +	sfdev->read_mode = READ_ARRAY;
> > +	sfdev->sr = 0x80;
> > +	sfdev->rcr = 0xbfcf;
> > +
> > +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
> > +	sfdev->lock_bm = malloc(value);
> > +	memset(sfdev->lock_bm, 0, value);
> > +
> > +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
> > +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
> > +	mutex_init(&sfdev->mutex);
> > +	ret = device__register(&sfdev->dev_hdr);
> > +	if (ret) {
> > +		free(sfdev->flash_memory);  
> 
> That's strange, I wrote a quick test for this and free'ing a a file-backed mmap'ed
> memory resulted in a segmentation fault. Did you mean munmap?

Ouch, indeed. Looks like a leftover from the original code, which was using malloc().

> Also, if mmap'ing
> the flash fails, you close the file descriptor, which you don't do here. To be
> honest, I think the best approach would be to add all the cleaning up  after the
> return statement and a series of labels to jump to depending where you got an
> error (similar to virtio__pci_init).

Yeah, it looks much better now that way.

> 
> > +		free(sfdev);
> > +		return ERR_PTR(ret);
> > +	}
> > +
> > +	ret = kvm__register_mmio(kvm,
> > +				 sfdev->base_addr, sfdev->size,
> > +				 false, cfi_flash_mmio, sfdev);
> > +	if (ret) {
> > +		device__unregister(&sfdev->dev_hdr);
> > +		free(sfdev->flash_memory);
> > +		free(sfdev);
> > +		return ERR_PTR(ret);
> > +	}
> > +
> > +	return sfdev;
> > +}
> > +
> > +static int flash__init(struct kvm *kvm)
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +
> > +	if (!kvm->cfg.flash_filename)
> > +		return 0;
> > +
> > +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
> > +	if (IS_ERR(sfdev))
> > +		return PTR_ERR(sfdev);
> > +
> > +	return 0;
> > +}
> > +dev_init(flash__init);
> > diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
> > index a052b0bc..f4a8b831 100644
> > --- a/include/kvm/kvm-config.h
> > +++ b/include/kvm/kvm-config.h
> > @@ -35,6 +35,7 @@ struct kvm_config {
> >  	const char *vmlinux_filename;
> >  	const char *initrd_filename;
> >  	const char *firmware_filename;
> > +	const char *flash_filename;
> >  	const char *console;
> >  	const char *dev;
> >  	const char *network;
> > diff --git a/include/kvm/util.h b/include/kvm/util.h
> > index 4ca7aa93..5c37f0b7 100644
> > --- a/include/kvm/util.h
> > +++ b/include/kvm/util.h
> > @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
> >  	return x ? 1UL << fls_long(x - 1) : 0;
> >  }
> >  
> > +static inline int pow2_size(unsigned long x)
> > +{
> > +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
> > +}  
> 
> For the life of me I can't understand what this function is supposed to do. Also,
> from the gcc online docs:

The idea is to determine the "number of address bits needed to cover x bytes of memory", which is something that is well known on actual hardware. I will add a comment.
 
> "Returns the number of leading 0-bits in x, starting at the most significant bit
> position. If xis 0, the result is undefined."
> 
> you might want to add a special case for x == 1.

Good point, although in our case the input value is always at least 2048. But 0 isn't covered as well and also I moved this to generic code, so will fix it.

Cheers,
Andre

> 
> Thanks,
> Alex
> > +
> >  struct kvm;
> >  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> >  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);  

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-14 13:47   ` Andre Przywara
@ 2020-02-14 15:38     ` Alexandru Elisei
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandru Elisei @ 2020-02-14 15:38 UTC (permalink / raw)
  To: Andre Przywara
  Cc: Raphael Gault, Sami Mujawar, Will Deacon, kvmarm, linux-arm-kernel

Hi,

On 2/14/20 1:47 PM, Andre Przywara wrote:
> On Fri, 7 Feb 2020 17:34:20 +0000
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
>
> Hi Alex,
>
> many thanks for having a look!
>
>> I'm going to do my best to review your patch :) I'll do it in chunks, because it's
>> pretty large, and definitely not trivial.
> OK, replying here, and having it mostly fixed already.
> Will wait for further replies before a re-post, unless you want to benefit from the split MMIO function, which should make reviewing the state machine easier. Just let me know.

I'll finish my review on this version of the patch, no need to post a v3.

>
>> On 2/7/20 12:19 PM, Andre Przywara wrote:
>>> From: Raphael Gault <raphael.gault@arm.com>
>>>
>>> The EDK II UEFI firmware implementation requires some storage for the EFI
>>> variables, which is typically some flash storage.
>>> Since this is already supported on the EDK II side, we add a CFI flash
>>> emulation to kvmtool.
>>> This is backed by a file, specified via the --flash or -F command line
>>> option. Any flash writes done by the guest will immediately be reflected
>>> into this file (kvmtool mmap's the file).
>>>
>>> This implements a CFI flash using the "Intel/Sharp extended command
>>> set", as specified in:
>>> - JEDEC JESD68.01
>>> - JEDEC JEP137B
>>> - Intel Application Note 646
>>> Some gaps in those specs have been filled by looking at real devices and
>>> other implementations (QEMU, Linux kernel driver).
>>>
>>> At the moment this relies on DT to advertise the base address of the
>>> flash memory (mapped into the MMIO address space) and is only enabled
>>> for ARM/ARM64. The emulation itself is architecture agnostic, though.
>>>
>>> This is one missing piece toward a working UEFI boot with kvmtool on
>>> ARM guests, the other is to provide writable PCI BARs, which is WIP.
>>>
>>> Signed-off-by: Raphael Gault <raphael.gault@arm.com>
>>> [Andre: rewriting and fixing]
>>> Signed-off-by: Andre Przywra <andre.przywara@arm.com>
>>> ---
>>> Hi,
>>>
>>> an update addressing Will's comments. I added coarse grained locking
>>> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
>>> the internal CFI flash state machine.
>>> I also folded the actual flash array read access into the MMIO handler
>>> and fixed the other small issues.
>>>
>>> Cheers,
>>> Andre
>>>
>>>  Makefile                          |   6 +
>>>  arm/include/arm-common/kvm-arch.h |   3 +
>>>  builtin-run.c                     |   2 +
>>>  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
>>>  include/kvm/kvm-config.h          |   1 +
>>>  include/kvm/util.h                |   5 +
>>>  6 files changed, 563 insertions(+)
>>>  create mode 100644 hw/cfi_flash.c
>>>
>>> diff --git a/Makefile b/Makefile
>>> index 3862112c..7ed6fb5e 100644
>>> --- a/Makefile
>>> +++ b/Makefile
>>> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
>>>  	CFLAGS		+= -march=armv7-a
>>>  
>>>  	ARCH_WANT_LIBFDT := y
>>> +	ARCH_HAS_FLASH_MEM := y
>>>  endif
>>>  
>>>  # ARM64
>>> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
>>>  	ARCH_INCLUDE	+= -Iarm/aarch64/include
>>>  
>>>  	ARCH_WANT_LIBFDT := y
>>> +	ARCH_HAS_FLASH_MEM := y
>>>  endif
>>>  
>>>  ifeq ($(ARCH),mips)
>>> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
>>>  	endif
>>>  endif
>>>  
>>> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
>>> +	OBJS	+= hw/cfi_flash.o
>>> +endif
>>> +
>>>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
>>>  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
>>>  	LIBS_DYNOPT	+= -lz
>>> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
>>> index b9d486d5..2bb085f4 100644
>>> --- a/arm/include/arm-common/kvm-arch.h
>>> +++ b/arm/include/arm-common/kvm-arch.h
>>> @@ -21,6 +21,9 @@
>>>  #define ARM_GIC_DIST_SIZE	0x10000
>>>  #define ARM_GIC_CPUI_SIZE	0x20000
>>>  
>>> +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
>>> +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE  
>> Each time I try to read the memory layout for ARM I get a headache. According to
>> my calculations, this falls right inside ARM_MMIO_AREA, right? Any particular
>> reason for choosing this address? Why not carve its own dedicate area, so we won't
>> run the highly unlikely risk that it will be overwritten, since it's in the MMIO
>> allocation area?
> The EDK2 build I used has the base address fixed at 32MB. So I just used this address here. Sami is working on making this flexible as we speak, but it's not easy due to some EDK-2 design issues.
> As an interim measure I would try to describe this using the existing MMIO layout macros, to at least avoid overlaps with virtio-mmio.
> I actually might move that address to the beginning for now, as 32MB is currently in the middle of the MMIO area.
> QEMU has that hardcoded (both in QEMU and EDK-2) as well, btw.
>  
>>> +
>>>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
>>>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))  
>> That's not correct anymore, because flash memory is in the ARM_MMIO_AREA.
> True, I will try to find the right place for this. Somewhat problematic is the differing size, but we could just impose an upper limit on this.

From this and the above, it kinda sounds like we a flexible memory layout for
kvmtool, doesn't it? ;-)

>
>>>  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
>>> diff --git a/builtin-run.c b/builtin-run.c
>>> index f8dc6c72..df8c6741 100644
>>> --- a/builtin-run.c
>>> +++ b/builtin-run.c
>>> @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
>>>  			"Kernel command line arguments"),		\
>>>  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
>>>  			"Firmware image to boot in virtual machine"),	\
>>> +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
>>> +			"Flash image to present to virtual machine"),	\
>>>  									\
>>>  	OPT_GROUP("Networking options:"),				\
>>>  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
>>> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
>>> new file mode 100644
>>> index 00000000..d7c0e7e8
>>> --- /dev/null
>>> +++ b/hw/cfi_flash.c
>>> @@ -0,0 +1,546 @@
>>> +#include <stdbool.h>
>>> +#include <stdlib.h>
>>> +#include <string.h>
>>> +#include <linux/bitops.h>
>>> +#include <linux/err.h>
>>> +#include <linux/sizes.h>
>>> +#include <linux/types.h>
>>> +
>>> +#include "kvm/kvm.h"
>>> +#include "kvm/kvm-arch.h"
>>> +#include "kvm/devices.h"
>>> +#include "kvm/fdt.h"
>>> +#include "kvm/mutex.h"
>>> +#include "kvm/util.h"
>>> +
>>> +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
>>> +#define CFI_NR_FLASH_CHIPS			2
>>> +
>>> +/* We always emulate a 32 bit bus width. */
>>> +#define CFI_BUS_WIDTH				4
>>> +
>>> +/* The *effective* size of an erase block (over all chips) */
>>> +#define FLASH_BLOCK_SIZE			SZ_64K
>>> +
>>> +#define PROGRAM_BUFF_SIZE_BITS			7
>>> +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)  
>> Just making sure this is not an off-by-one error. The buffer size is 2^7 = 128
>> words, which makes it 512 bytes, right?
> Looks like it ;-)
> The reason this is presented in this rather awkward way here is that we need the number of bits to be presented in the CFI query structure later on.
> I will add a comment pointing out this is in units of "words" - after double checking that it really is ;-)
>  
>>> +
>>> +/* CFI commands */
>>> +#define CFI_CMD_LOCK_BLOCK			0x01
>>> +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
>>> +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
>>> +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
>>> +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
>>> +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
>>> +#define CFI_CMD_READ_STATUS_REGISTER		0x70
>>> +#define CFI_CMD_READ_JEDEC			0x90
>>> +#define CFI_CMD_READ_CFI_QUERY			0x98
>>> +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
>>> +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
>>> +#define CFI_CMD_UNLOCK_BLOCK			0xd0
>>> +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
>>> +#define CFI_CMD_READ_ARRAY			0xff
>>> +
>>> +/*
>>> + * CFI query table contents, as far as it is constant.
>>> + */
>>> +#define CFI_GEOM_OFFSET				0x27
>>> +static u8 cfi_query_table[] = {
>>> +		/* offset 0x10: CFI query identification string */
>>> +	'Q', 'R', 'Y',		/* ID string */
>>> +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
>>> +	0x31, 0x00,		/* address of primary extended query table */
>>> +	0x00, 0x00,		/* alternative command set: unused */
>>> +	0x00, 0x00,		/* address of alternative extended query table*/
>>> +		/* offset 0x1b: system interface information */
>>> +	0x45,			/* minimum Vcc voltage: 4.5V */
>>> +	0x55,			/* maximum Vcc voltage: 5.5V */
>>> +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
>>> +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
>>> +	0x01,			/* timeout for single word program: 2 us */
>>> +	0x01,			/* timeout for multi-byte program: 2 us */
>>> +	0x01,			/* timeout for block erase: 2 ms */
>>> +	0x00,			/* timeout for full chip erase: not supported */
>>> +	0x00,			/* max timeout for single word program: 1x */
>>> +	0x00,			/* max timeout for mulit-byte program: 1x */
>>> +	0x00,			/* max timeout for block erase: 1x */
>>> +	0x00,			/* max timeout for chip erase: not supported */
>>> +		/* offset 0x27: flash geometry information */
>>> +	0x00,			/* size in power-of-2 bytes, filled later */
>>> +	0x06, 0x00,		/* interface description: 32 and 16 bits */
>>> +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>>> +				/* number of multi-byte writes */
>>> +	0x01,			/* one erase block region */
>>> +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>>> +		/* offset 0x31: Intel primary algorithm extended query table */
>>> +	'P', 'R', 'I',
>>> +	'1', '0',		/* version 1.0 */
>>> +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
>>> +	0x00,			/* no functions after suspend */
>>> +	0x01, 0x00,		/* only lock bit supported */
>>> +	0x50,			/* best Vcc value: 5.0V */
>>> +	0x00,			/* best Vpp value: 0.0V (unused) */
>>> +	0x01,			/* number of protection register fields */
>>> +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
>>> +};
>>> +
>>> +
>>> +/*
>>> + * Those states represent a subset of the CFI flash state machine.
>>> + */
>>> +enum cfi_flash_state {
>>> +	READY,
>>> +	LOCK_SETUP,
>>> +	WP_SETUP,
>>> +	BP_SETUP,
>>> +	BP_LOAD,
>>> +	ERASE_SETUP,
>>> +};
>>> +
>>> +/*
>>> + * The device can be in several **Read** modes.
>>> + * We don't implement the asynchronous burst mode.
>>> + */
>>> +enum cfi_read_mode {
>>> +	READ_ARRAY,
>>> +	READ_STATUS,
>>> +	READ_DEVICE_ID,
>>> +	READ_QUERY,
>>> +};
>>> +
>>> +struct cfi_flash_device {
>>> +	struct device_header	dev_hdr;
>>> +	/* Protects the CFI state machine variables in this data structure. */
>>> +	struct mutex		mutex;
>>> +	u64			base_addr;
>>> +	u32			size;
>>> +
>>> +	void			*flash_memory;
>>> +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];  
>> You're multiplying by 4 because PROGRAM_BUFF_SIZE is the size of the buffer in
>> words, right?
> Yeah, I can use "sizeof(u32)" if that is better.
>
>>> +	unsigned long		*lock_bm;
>>> +	u64			last_address;
>>> +	unsigned int		buff_written;
>>> +	unsigned int		program_length;
>>> +
>>> +	enum cfi_flash_state	state;
>>> +	enum cfi_read_mode	read_mode;
>>> +	u16			rcr;
>>> +	u8			sr;
>>> +};
>>> +
>>> +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
>>> +{
>>> +	return sfdev->size / FLASH_BLOCK_SIZE;
>>> +}
>>> +
>>> +/*
>>> + * CFI queries always deal with one byte of information, possibly mirrored
>>> + * to other bytes on the bus. This is dealt with in the callers.
>>> + * The address provided is the one for 8-bit addressing, and would need to
>>> + * be adjusted for wider accesses.
>>> + */
>>> +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	if (addr < 0x10)		/* CFI information starts at 0x10 */
>>> +		return 0;
>>> +
>>> +	if (addr - 0x10 > sizeof(cfi_query_table)) {
>>> +		pr_debug("CFI query read access beyond the end of table");
>>> +		return 0;
>>> +	}
>>> +
>>> +	/* Fixup dynamic information in the geometry part of the table. */
>>> +	switch (addr) {
>>> +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
>>> +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
>>> +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
>>> +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
>>> +	case CFI_GEOM_OFFSET + 7:
>>> +		return (nr_erase_blocks(sfdev) - 1) >> 8;
>>> +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
>>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
>>> +	case CFI_GEOM_OFFSET + 9:
>>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
>>> +	}
>>> +
>>> +	return cfi_query_table[addr - 0x10];
>>> +}
>>> +
>>> +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
>>> +
>>> +	return test_bit(block_nr, sfdev->lock_bm);
>>> +}
>>> +
>>> +#define DEV_ID_MASK 0x7ff
>>> +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
>>> +	case 0x0:				/* vendor ID */
>>> +		return 0x0000;
>>> +	case 0x1:				/* device ID */
>>> +		return 0xffff;
>>> +	case 0x2:
>>> +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
>>> +	case 0x5:
>>> +		return sfdev->rcr;
>>> +	default:			/* Ignore the other entries. */
>>> +		return 0;
>>> +	}
>>> +}
>>> +
>>> +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
>>> +{
>>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
>>> +
>>> +	if (lock)
>>> +		set_bit(block_nr, sfdev->lock_bm);
>>> +	else
>>> +		clear_bit(block_nr, sfdev->lock_bm);
>>> +}
>>> +
>>> +static void word_program(struct cfi_flash_device *sfdev,
>>> +			 u64 addr, void *data, int len)
>>> +{
>>> +	if (block_is_locked(sfdev, addr)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +
>>> +	memcpy(sfdev->flash_memory + addr, data, len);
>>> +}
>>> +
>>> +/* Reset the program buffer state to prepare for follow-up writes. */
>>> +static void buffer_setup(struct cfi_flash_device *sfdev)
>>> +{
>>> +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
>>> +	sfdev->last_address = ~0ULL;
>>> +	sfdev->buff_written = 0;
>>> +}
>>> +
>>> +static bool buffer_program(struct cfi_flash_device *sfdev,
>>> +			   u64 addr, void *buffer, int len)
>>> +{
>>> +	unsigned int buf_addr;
>>> +
>>> +	if (sfdev->buff_written >= sfdev->program_length)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The first word written into the buffer after the setup command
>>> +	 * happens to be the base address for the buffer.
>>> +	 * All subsequent writes need to be within this address and this
>>> +	 * address plus the buffer size, so keep this value around.
>>> +	 */
>>> +	if (sfdev->last_address == ~0ULL)
>>> +		sfdev->last_address = addr;
>>> +
>>> +	if (addr < sfdev->last_address)
>>> +		return false;
>>> +	buf_addr = addr - sfdev->last_address;
>>> +	if (buf_addr >= PROGRAM_BUFF_SIZE)
>>> +		return false;
>>> +
>>> +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
>>> +	sfdev->buff_written++;
>>> +
>>> +	return true;
>>> +}
>>> +
>>> +static void buffer_confirm(struct cfi_flash_device *sfdev)
>>> +{
>>> +	if (block_is_locked(sfdev, sfdev->last_address)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +	memcpy(sfdev->flash_memory + sfdev->last_address,
>>> +	       sfdev->program_buffer,
>>> +	       sfdev->buff_written * sizeof(u32));
>>> +}
>>> +
>>> +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	if (block_is_locked(sfdev, addr)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +
>>> +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
>>> +}
>>> +
>>> +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
>>> +			   u64 addr, u8 *data, u32 len, u8 is_write,
>>> +			   void *context)
>>> +{
>>> +	struct cfi_flash_device *sfdev = context;
>>> +	u64 faddr = addr - sfdev->base_addr;
>>> +	u32 value;
>>> +
>>> +	if (!is_write) {
>>> +		u16 cfi_value = 0;
>>> +
>>> +		mutex_lock(&sfdev->mutex);
>>> +
>>> +		switch (sfdev->read_mode) {
>>> +		case READ_ARRAY:
>>> +			/* just copy the requested bytes from the array */
>>> +			memcpy(data, sfdev->flash_memory + faddr, len);
>>> +			goto out_unlock;
>>> +		case READ_STATUS:
>>> +			cfi_value = sfdev->sr;
>>> +			break;
>>> +		case READ_DEVICE_ID:
>>> +			cfi_value = read_dev_id(sfdev, faddr);
>>> +			break;
>>> +		case READ_QUERY:
>>> +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
>>> +			break;
>>> +		}
>>> +		switch (len) {
>>> +		case 1:
>>> +			*data = cfi_value;
>>> +			break;
>>> +		case 8: memset(data + 4, 0, 4);
>>> +			/* fall-through */
>>> +		case 4:
>>> +			if (CFI_NR_FLASH_CHIPS == 2)
>>> +				memcpy(data + 2, &cfi_value, 2);
>>> +			else
>>> +				memset(data + 2, 0, 2);
>>> +			/* fall-through */
>>> +		case 2:
>>> +			memcpy(data, &cfi_value, 2);
>>> +			break;
>>> +		default:
>>> +			pr_debug("CFI flash: illegal access length %d for read mode %d",
>>> +				 len, sfdev->read_mode);
>>> +			break;
>>> +		}
>>> +
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	if (len > 4) {
>>> +		pr_info("CFI flash: MMIO %d-bit write access not supported",
>>> +			 len * 8);
>>> +		return;
>>> +	}
>>> +
>>> +	memcpy(&value, data, len);
>>> +
>>> +	mutex_lock(&sfdev->mutex);
>>> +
>>> +	switch (sfdev->state) {
>>> +	case READY:			/* handled below */
>>> +		break;
>>> +
>>> +	case LOCK_SETUP:
>>> +		switch (value & 0xff) {
>>> +		case CFI_CMD_LOCK_BLOCK:
>>> +			lock_block(sfdev, faddr, true);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +			break;
>>> +		case CFI_CMD_UNLOCK_BLOCK:
>>> +			lock_block(sfdev, faddr, false);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +			break;
>>> +		default:
>>> +			sfdev->sr |= 0x30;
>>> +			break;
>>> +		}
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case WP_SETUP:
>>> +		word_program(sfdev, faddr, data, len);
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case BP_LOAD:
>>> +		if (buffer_program(sfdev, faddr, data, len))
>>> +			goto out_unlock;
>>> +
>>> +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
>>> +			buffer_confirm(sfdev);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +		} else {
>>> +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
>>> +				 value, faddr);
>>> +			sfdev->sr |= 0x10;
>>> +		}
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case BP_SETUP:
>>> +		sfdev->program_length = (value & 0xffff) + 1;
>>> +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
>>> +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
>>> +		sfdev->state = BP_LOAD;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		goto out_unlock;
>>> +
>>> +	case ERASE_SETUP:
>>> +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
>>> +			block_erase_confirm(sfdev, faddr);
>>> +		else
>>> +			sfdev->sr |= 0x30;
>>> +
>>> +		sfdev->state = READY;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	/* write commands in READY state */
>>> +	switch (value & 0xFF) {
>>> +	case CFI_CMD_READ_JEDEC:
>>> +		sfdev->read_mode = READ_DEVICE_ID;
>>> +		break;
>>> +	case CFI_CMD_READ_STATUS_REGISTER:
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_READ_CFI_QUERY:
>>> +		sfdev->read_mode = READ_QUERY;
>>> +		break;
>>> +	case CFI_CMD_CLEAR_STATUS_REGISTER:
>>> +		sfdev->sr = 0x80;
>>> +		break;
>>> +	case CFI_CMD_WORD_PROGRAM_SETUP:
>>> +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
>>> +		sfdev->state = WP_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_LOCK_BLOCK_SETUP:
>>> +		sfdev->state = LOCK_SETUP;
>>> +		break;
>>> +	case CFI_CMD_BLOCK_ERASE_SETUP:
>>> +		sfdev->state = ERASE_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
>>> +		buffer_setup(sfdev);
>>> +		sfdev->state = BP_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
>>> +		pr_debug("CFI flash: unexpected confirm command 0xD0");
>>> +		break;
>>> +	default:
>>> +		pr_debug("CFI flash: unknown command 0x%x", value);
>>> +		/* fall through */  
>> Above (in the read case), you wrote it "fall-through".
> GCC has a list of allowed spellings, and both versions are in it ;-)
> But sure will fix this ...

I was commenting on the consistency. I don't have a preference for a particular
spelling.

>  
>>> +	case CFI_CMD_READ_ARRAY:
>>> +		sfdev->read_mode = READ_ARRAY;
>>> +		break;
>>> +	}
>>> +
>>> +out_unlock:
>>> +	mutex_unlock(&sfdev->mutex);
>>> +}  
>> The function is huge and complicated. How about splitting it into a read and write
>> function, at the very least?
> Good point. Looks like "write command in READY state" should be separate as well, since it's only doing state transitions.

Good idea.

Thanks,
Alex
>
>>> +
>>> +#ifdef CONFIG_HAS_LIBFDT
>>> +static void generate_cfi_flash_fdt_node(void *fdt,
>>> +					struct device_header *dev_hdr,
>>> +					void (*generate_irq_prop)(void *fdt,
>>> +								  u8 irq,
>>> +								enum irq_type))
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +	u64 reg_prop[2];
>>> +
>>> +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
>>> +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
>>> +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
>>> +
>>> +	_FDT(fdt_begin_node(fdt, "flash"));
>>> +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
>>> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
>>> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
>>> +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
>>> +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
>>> +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
>>> +	_FDT(fdt_end_node(fdt));
>>> +}
>>> +#else
>>> +#define generate_cfi_flash_fdt_node NULL
>>> +#endif
>>> +
>>> +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
>>> +							 const char *filename)
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +	struct stat statbuf;  
>> Here you're using "buf" as shorthand for "buffer", but at the top of the file
>> (PROGRAM_BUFF_*) you use "buff".
> I guess because one was written by me, the other by Raphael ;-)
> Will consolidate this.
>
>>> +	unsigned int value;
>>> +	int ret;
>>> +	int fd;
>>> +
>>> +	fd = open(filename, O_RDWR);
>>> +	if (fd < 0)
>>> +		return ERR_PTR(-errno);
>>> +	if (fstat(fd, &statbuf) < 0) {
>>> +		close(fd);
>>> +		return ERR_PTR(-errno);
>>> +	}
>>> +
>>> +	sfdev = malloc(sizeof(struct cfi_flash_device));
>>> +	if (!sfdev) {
>>> +		close(fd);
>>> +		return ERR_PTR(-ENOMEM);
>>> +	}
>>> +
>>> +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
>>> +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
>>> +				   PROT_READ | PROT_WRITE, MAP_SHARED,
>>> +				   fd, 0);
>>> +	if (sfdev->flash_memory == MAP_FAILED) {
>>> +		close(fd);
>>> +		free(sfdev);
>>> +		return ERR_PTR(-errno);
>>> +	}
>>> +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
>>> +	sfdev->state = READY;
>>> +	sfdev->read_mode = READ_ARRAY;
>>> +	sfdev->sr = 0x80;
>>> +	sfdev->rcr = 0xbfcf;
>>> +
>>> +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
>>> +	sfdev->lock_bm = malloc(value);
>>> +	memset(sfdev->lock_bm, 0, value);
>>> +
>>> +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
>>> +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
>>> +	mutex_init(&sfdev->mutex);
>>> +	ret = device__register(&sfdev->dev_hdr);
>>> +	if (ret) {
>>> +		free(sfdev->flash_memory);  
>> That's strange, I wrote a quick test for this and free'ing a a file-backed mmap'ed
>> memory resulted in a segmentation fault. Did you mean munmap?
> Ouch, indeed. Looks like a leftover from the original code, which was using malloc().
>
>> Also, if mmap'ing
>> the flash fails, you close the file descriptor, which you don't do here. To be
>> honest, I think the best approach would be to add all the cleaning up  after the
>> return statement and a series of labels to jump to depending where you got an
>> error (similar to virtio__pci_init).
> Yeah, it looks much better now that way.
>
>>> +		free(sfdev);
>>> +		return ERR_PTR(ret);
>>> +	}
>>> +
>>> +	ret = kvm__register_mmio(kvm,
>>> +				 sfdev->base_addr, sfdev->size,
>>> +				 false, cfi_flash_mmio, sfdev);
>>> +	if (ret) {
>>> +		device__unregister(&sfdev->dev_hdr);
>>> +		free(sfdev->flash_memory);
>>> +		free(sfdev);
>>> +		return ERR_PTR(ret);
>>> +	}
>>> +
>>> +	return sfdev;
>>> +}
>>> +
>>> +static int flash__init(struct kvm *kvm)
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +
>>> +	if (!kvm->cfg.flash_filename)
>>> +		return 0;
>>> +
>>> +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
>>> +	if (IS_ERR(sfdev))
>>> +		return PTR_ERR(sfdev);
>>> +
>>> +	return 0;
>>> +}
>>> +dev_init(flash__init);
>>> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
>>> index a052b0bc..f4a8b831 100644
>>> --- a/include/kvm/kvm-config.h
>>> +++ b/include/kvm/kvm-config.h
>>> @@ -35,6 +35,7 @@ struct kvm_config {
>>>  	const char *vmlinux_filename;
>>>  	const char *initrd_filename;
>>>  	const char *firmware_filename;
>>> +	const char *flash_filename;
>>>  	const char *console;
>>>  	const char *dev;
>>>  	const char *network;
>>> diff --git a/include/kvm/util.h b/include/kvm/util.h
>>> index 4ca7aa93..5c37f0b7 100644
>>> --- a/include/kvm/util.h
>>> +++ b/include/kvm/util.h
>>> @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
>>>  	return x ? 1UL << fls_long(x - 1) : 0;
>>>  }
>>>  
>>> +static inline int pow2_size(unsigned long x)
>>> +{
>>> +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
>>> +}  
>> For the life of me I can't understand what this function is supposed to do. Also,
>> from the gcc online docs:
> The idea is to determine the "number of address bits needed to cover x bytes of memory", which is something that is well known on actual hardware. I will add a comment.
>  
>> "Returns the number of leading 0-bits in x, starting at the most significant bit
>> position. If xis 0, the result is undefined."
>>
>> you might want to add a special case for x == 1.
> Good point, although in our case the input value is always at least 2048. But 0 isn't covered as well and also I moved this to generic code, so will fix it.
>
> Cheers,
> Andre
>
>> Thanks,
>> Alex
>>> +
>>>  struct kvm;
>>>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
>>>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);  
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-07 12:19 [PATCH kvmtool v2] Add emulation for CFI compatible flash memory Andre Przywara
  2020-02-07 17:34 ` Alexandru Elisei
@ 2020-02-17 17:20 ` Alexandru Elisei
  2020-02-19 17:26   ` Andre Przywara
  1 sibling, 1 reply; 8+ messages in thread
From: Alexandru Elisei @ 2020-02-17 17:20 UTC (permalink / raw)
  To: Andre Przywara, Will Deacon, Julien Thierry
  Cc: Raphael Gault, Sami Mujawar, linux-arm-kernel, kvmarm

Hi,

I guess the device hasn't been tested with Linux. This is what I'm getting when
trying to boot a Linux guest using the command:

$ ./lkvm run -c4 -m4096 -k /path/to/kernel -d /path/to/disk -p root="/dev/vda2" -F
flash.img

[    0.659167] physmap-flash 2000000.flash: physmap platform flash device: [mem
0x02000000-0x029fffff]
[    0.660444] Number of erase regions: 1
[    0.661036] Primary Vendor Command Set: 0001 (Intel/Sharp Extended)
[    0.661688] Primary Algorithm Table at 0031
[    0.662168] Alternative Vendor Command Set: 0000 (None)
[    0.662711] No Alternate Algorithm Table
[    0.663120] Vcc Minimum:  4.5 V
[    0.663450] Vcc Maximum:  5.5 V
[    0.663779] No Vpp line
[    0.664039] Typical byte/word write timeout: 2 µs
[    0.664590] Maximum byte/word write timeout: 2 µs
[    0.665240] Typical full buffer write timeout: 2 µs
[    0.665775] Maximum full buffer write timeout: 2 µs
[    0.666373] Typical block erase timeout: 2 ms
[    0.666828] Maximum block erase timeout: 2 ms
[    0.667282] Chip erase not supported
[    0.667659] Device size: 0x800000 bytes (8 MiB)
[    0.668137] Flash Device Interface description: 0x0006
[    0.668697]   - Unknown
[    0.668963] Max. bytes in buffer write: 0x40
[    0.669407] Number of Erase Block Regions: 1
[    0.669865]   Erase Region #0: BlockSize 0x8000 bytes, 160 blocks
[    0.672299] 2000000.flash: Found 2 x16 devices at 0x0 in 32-bit bank.
Manufacturer ID 0x000000 Chip ID 0x00ffff
[    0.681328] NOR chip too large to fit in mapping. Attempting to cope...
[    0.682046] Intel/Sharp Extended Query Table at 0x0031
[    0.682645] Using buffer write method
[    0.683031] Sum of regions (a00000) != total size of set of interleaved chips
(1000000)
[    0.683854] gen_probe: No supported Vendor Command Set found
[    0.684441] physmap-flash 2000000.flash: map_probe failed

I also defined DEBUG_CFI in drivers/mtd/chips/cfi_probe.c.

The Flash Device Interface description that we provide is wrong, it should 0x05.
More details below.

On 2/7/20 12:19 PM, Andre Przywara wrote:
> From: Raphael Gault <raphael.gault@arm.com>
>
> The EDK II UEFI firmware implementation requires some storage for the EFI
> variables, which is typically some flash storage.
> Since this is already supported on the EDK II side, we add a CFI flash
> emulation to kvmtool.
> This is backed by a file, specified via the --flash or -F command line
> option. Any flash writes done by the guest will immediately be reflected
> into this file (kvmtool mmap's the file).
>
> This implements a CFI flash using the "Intel/Sharp extended command
> set", as specified in:
> - JEDEC JESD68.01
> - JEDEC JEP137B
> - Intel Application Note 646
> Some gaps in those specs have been filled by looking at real devices and
> other implementations (QEMU, Linux kernel driver).
>
> At the moment this relies on DT to advertise the base address of the
> flash memory (mapped into the MMIO address space) and is only enabled
> for ARM/ARM64. The emulation itself is architecture agnostic, though.
>
> This is one missing piece toward a working UEFI boot with kvmtool on
> ARM guests, the other is to provide writable PCI BARs, which is WIP.
>
> Signed-off-by: Raphael Gault <raphael.gault@arm.com>
> [Andre: rewriting and fixing]
> Signed-off-by: Andre Przywra <andre.przywara@arm.com>
> ---
> Hi,
>
> an update addressing Will's comments. I added coarse grained locking
> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> the internal CFI flash state machine.
> I also folded the actual flash array read access into the MMIO handler
> and fixed the other small issues.
>
> Cheers,
> Andre
>
>  Makefile                          |   6 +
>  arm/include/arm-common/kvm-arch.h |   3 +
>  builtin-run.c                     |   2 +
>  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
>  include/kvm/kvm-config.h          |   1 +
>  include/kvm/util.h                |   5 +
>  6 files changed, 563 insertions(+)
>  create mode 100644 hw/cfi_flash.c
>
> diff --git a/Makefile b/Makefile
> index 3862112c..7ed6fb5e 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
>  	CFLAGS		+= -march=armv7-a
>  
>  	ARCH_WANT_LIBFDT := y
> +	ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  # ARM64
> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
>  	ARCH_INCLUDE	+= -Iarm/aarch64/include
>  
>  	ARCH_WANT_LIBFDT := y
> +	ARCH_HAS_FLASH_MEM := y
>  endif
>  
>  ifeq ($(ARCH),mips)
> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
>  	endif
>  endif
>  
> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> +	OBJS	+= hw/cfi_flash.o
> +endif
> +
>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
>  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
>  	LIBS_DYNOPT	+= -lz
> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
> index b9d486d5..2bb085f4 100644
> --- a/arm/include/arm-common/kvm-arch.h
> +++ b/arm/include/arm-common/kvm-arch.h
> @@ -21,6 +21,9 @@
>  #define ARM_GIC_DIST_SIZE	0x10000
>  #define ARM_GIC_CPUI_SIZE	0x20000
>  
> +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
> +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE
> +
>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
>  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
> diff --git a/builtin-run.c b/builtin-run.c
> index f8dc6c72..df8c6741 100644
> --- a/builtin-run.c
> +++ b/builtin-run.c
> @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
>  			"Kernel command line arguments"),		\
>  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
>  			"Firmware image to boot in virtual machine"),	\
> +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
> +			"Flash image to present to virtual machine"),	\
>  									\
>  	OPT_GROUP("Networking options:"),				\
>  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> new file mode 100644
> index 00000000..d7c0e7e8
> --- /dev/null
> +++ b/hw/cfi_flash.c
> @@ -0,0 +1,546 @@
> +#include <stdbool.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <linux/bitops.h>
> +#include <linux/err.h>
> +#include <linux/sizes.h>
> +#include <linux/types.h>
> +
> +#include "kvm/kvm.h"
> +#include "kvm/kvm-arch.h"
> +#include "kvm/devices.h"
> +#include "kvm/fdt.h"
> +#include "kvm/mutex.h"
> +#include "kvm/util.h"
> +
> +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
> +#define CFI_NR_FLASH_CHIPS			2
> +
> +/* We always emulate a 32 bit bus width. */
> +#define CFI_BUS_WIDTH				4
> +
> +/* The *effective* size of an erase block (over all chips) */
> +#define FLASH_BLOCK_SIZE			SZ_64K
> +
> +#define PROGRAM_BUFF_SIZE_BITS			7
> +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)
> +
> +/* CFI commands */
> +#define CFI_CMD_LOCK_BLOCK			0x01
> +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
> +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
> +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
> +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
> +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
> +#define CFI_CMD_READ_STATUS_REGISTER		0x70
> +#define CFI_CMD_READ_JEDEC			0x90
> +#define CFI_CMD_READ_CFI_QUERY			0x98
> +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
> +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
> +#define CFI_CMD_UNLOCK_BLOCK			0xd0
> +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
> +#define CFI_CMD_READ_ARRAY			0xff
> +
> +/*
> + * CFI query table contents, as far as it is constant.
> + */
> +#define CFI_GEOM_OFFSET				0x27
> +static u8 cfi_query_table[] = {
> +		/* offset 0x10: CFI query identification string */
> +	'Q', 'R', 'Y',		/* ID string */
> +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
> +	0x31, 0x00,		/* address of primary extended query table */
> +	0x00, 0x00,		/* alternative command set: unused */
> +	0x00, 0x00,		/* address of alternative extended query table*/
> +		/* offset 0x1b: system interface information */
> +	0x45,			/* minimum Vcc voltage: 4.5V */
> +	0x55,			/* maximum Vcc voltage: 5.5V */
> +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
> +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
> +	0x01,			/* timeout for single word program: 2 us */
> +	0x01,			/* timeout for multi-byte program: 2 us */
> +	0x01,			/* timeout for block erase: 2 ms */
> +	0x00,			/* timeout for full chip erase: not supported */
> +	0x00,			/* max timeout for single word program: 1x */
> +	0x00,			/* max timeout for mulit-byte program: 1x */
> +	0x00,			/* max timeout for block erase: 1x */
> +	0x00,			/* max timeout for chip erase: not supported */
> +		/* offset 0x27: flash geometry information */
> +	0x00,			/* size in power-of-2 bytes, filled later */
> +	0x06, 0x00,		/* interface description: 32 and 16 bits */

I don't think this is correct. From Intel StrataFlash Embedded Memory (P30)
Family, table 34:

""n" such that n+1 specifies the bit field that represents the flash device width
capabilities as described in the table".

If you want to advertise 32 and 16 bit write capabilities, it should be 5 because
5+1=6. This is also the value that the Linux kernel checks for (see
include/linux/mtd/cfi.h, define CFI_INTERFACE_X16_BY_X32_ASYNC"). 6 actually means
32, 16 and 8 bit accesses.

This begs another question: why do we support both 16 and 32 bit accesses instead
of supporting only 32 bit?

> +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> +				/* number of multi-byte writes */

Shouldn't the comment be maximum number of bytes in the write buffer?

> +	0x01,			/* one erase block region */
> +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> +		/* offset 0x31: Intel primary algorithm extended query table */
> +	'P', 'R', 'I',
> +	'1', '0',		/* version 1.0 */
> +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> +	0x00,			/* no functions after suspend */
> +	0x01, 0x00,		/* only lock bit supported */
> +	0x50,			/* best Vcc value: 5.0V */
> +	0x00,			/* best Vpp value: 0.0V (unused) */
> +	0x01,			/* number of protection register fields */
> +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
> +};

As an aside, I found it impossible to review the cfi_query_table array in its
current form. This is how I wrote the array so I could read it. I also took the
liberty to remove the offset when indexing the array, making read_cfi less error
prone, in my opinion:

diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
index d7c0e7e80d69..65a90e288be8 100644
--- a/hw/cfi_flash.c
+++ b/hw/cfi_flash.c
@@ -46,45 +46,43 @@
  */
 #define CFI_GEOM_OFFSET                                0x27
 static u8 cfi_query_table[] = {
-               /* offset 0x10: CFI query identification string */
-       'Q', 'R', 'Y',          /* ID string */
-       0x01, 0x00,             /* primary command set: Intel/Sharp extended */
-       0x31, 0x00,             /* address of primary extended query table */
-       0x00, 0x00,             /* alternative command set: unused */
-       0x00, 0x00,             /* address of alternative extended query table*/
-               /* offset 0x1b: system interface information */
-       0x45,                   /* minimum Vcc voltage: 4.5V */
-       0x55,                   /* maximum Vcc voltage: 5.5V */
-       0x00,                   /* minimum Vpp voltage: 0.0V (unused) */
-       0x00,                   /* maximum Vpp voltage: 0.0V *(unused) */
-       0x01,                   /* timeout for single word program: 2 us */
-       0x01,                   /* timeout for multi-byte program: 2 us */
-       0x01,                   /* timeout for block erase: 2 ms */
-       0x00,                   /* timeout for full chip erase: not supported */
-       0x00,                   /* max timeout for single word program: 1x */
-       0x00,                   /* max timeout for mulit-byte program: 1x */
-       0x00,                   /* max timeout for block erase: 1x */
-       0x00,                   /* max timeout for chip erase: not supported */
-               /* offset 0x27: flash geometry information */
-       0x00,                   /* size in power-of-2 bytes, filled later */
-       0x06, 0x00,             /* interface description: 32 and 16 bits */
-       PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
+       [0x10] = 'Q', 'R', 'Y', /* ID string */
+       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
+       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
+       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
+       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
+       /* System interface information */
+       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
+       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
+       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
+       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
+       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
+       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
+       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
+       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
+       [0x23] = 0x00,          /* max timeout for single word program: 1x */
+       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
+       [0x25] = 0x00,          /* max timeout for block erase: 1x */
+       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
+       /* Flash geometry information */
+       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
+       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
+       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
                                /* number of multi-byte writes */
-       0x01,                   /* one erase block region */
-       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
-               /* offset 0x31: Intel primary algorithm extended query table */
-       'P', 'R', 'I',
-       '1', '0',               /* version 1.0 */
-       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
-       0x00,                   /* no functions after suspend */
-       0x01, 0x00,             /* only lock bit supported */
-       0x50,                   /* best Vcc value: 5.0V */
-       0x00,                   /* best Vpp value: 0.0V (unused) */
-       0x01,                   /* number of protection register fields */
-       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
+       [0x2c] = 0x01,          /* one erase block region */
+       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
+       /* Intel primary algorithm extended query table */
+       [0x31] = 'P', 'R', 'I', /* ID string */
+       [0x34] = '1', '0',      /* version 1.0 */
+       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
pm-read */
+       [0x40] = 0x00,          /* no functions after suspend */
+       [0x41] = 0x01, 0x00,    /* only lock bit supported */
...skipping...
+       [0x10] = 'Q', 'R', 'Y', /* ID string */
+       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
+       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
+       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
+       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
+       /* System interface information */
+       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
+       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
+       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
+       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
+       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
+       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
+       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
+       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
+       [0x23] = 0x00,          /* max timeout for single word program: 1x */
+       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
+       [0x25] = 0x00,          /* max timeout for block erase: 1x */
+       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
+       /* Flash geometry information */
+       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
+       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
+       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
                                /* number of multi-byte writes */
-       0x01,                   /* one erase block region */
-       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
-               /* offset 0x31: Intel primary algorithm extended query table */
-       'P', 'R', 'I',
-       '1', '0',               /* version 1.0 */
-       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
-       0x00,                   /* no functions after suspend */
-       0x01, 0x00,             /* only lock bit supported */
-       0x50,                   /* best Vcc value: 5.0V */
-       0x00,                   /* best Vpp value: 0.0V (unused) */
-       0x01,                   /* number of protection register fields */
-       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
+       [0x2c] = 0x01,          /* one erase block region */
+       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
+       /* Intel primary algorithm extended query table */
+       [0x31] = 'P', 'R', 'I', /* ID string */
+       [0x34] = '1', '0',      /* version 1.0 */
+       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
pm-read */
+       [0x40] = 0x00,          /* no functions after suspend */
+       [0x41] = 0x01, 0x00,    /* only lock bit supported */
+       [0x43] = 0x50,          /* best Vcc value: 5.0V */
+       [0x43] = 0x00,          /* best Vpp value: 0.0V (unused) */
+       [0x44] = 0x01,          /* number of protection register fields */
+       [0x45] = 0x00, 0x00, 0x00, 0x00,/* protection field 1 description */
 };
 
-
 /*
  * Those states represent a subset of the CFI flash state machine.
  */
@@ -141,10 +139,7 @@ static int nr_erase_blocks(struct cfi_flash_device *sfdev)
  */
 static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
 {
-       if (addr < 0x10)                /* CFI information starts at 0x10 */
-               return 0;
-
-       if (addr - 0x10 > sizeof(cfi_query_table)) {
+       if (addr > sizeof(cfi_query_table)) {
                pr_debug("CFI query read access beyond the end of table");
                return 0;
        }
@@ -163,7 +158,7 @@ static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
                return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
        }
 
-       return cfi_query_table[addr - 0x10];
+       return cfi_query_table[addr];
 }
 
 static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)

Thanks,
Alex
> +
> +
> +/*
> + * Those states represent a subset of the CFI flash state machine.
> + */
> +enum cfi_flash_state {
> +	READY,
> +	LOCK_SETUP,
> +	WP_SETUP,
> +	BP_SETUP,
> +	BP_LOAD,
> +	ERASE_SETUP,
> +};
> +
> +/*
> + * The device can be in several **Read** modes.
> + * We don't implement the asynchronous burst mode.
> + */
> +enum cfi_read_mode {
> +	READ_ARRAY,
> +	READ_STATUS,
> +	READ_DEVICE_ID,
> +	READ_QUERY,
> +};
> +
> +struct cfi_flash_device {
> +	struct device_header	dev_hdr;
> +	/* Protects the CFI state machine variables in this data structure. */
> +	struct mutex		mutex;
> +	u64			base_addr;
> +	u32			size;
> +
> +	void			*flash_memory;
> +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];
> +	unsigned long		*lock_bm;
> +	u64			last_address;
> +	unsigned int		buff_written;
> +	unsigned int		program_length;
> +
> +	enum cfi_flash_state	state;
> +	enum cfi_read_mode	read_mode;
> +	u16			rcr;
> +	u8			sr;
> +};
> +
> +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> +{
> +	return sfdev->size / FLASH_BLOCK_SIZE;
> +}
> +
> +/*
> + * CFI queries always deal with one byte of information, possibly mirrored
> + * to other bytes on the bus. This is dealt with in the callers.
> + * The address provided is the one for 8-bit addressing, and would need to
> + * be adjusted for wider accesses.
> + */
> +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	if (addr < 0x10)		/* CFI information starts at 0x10 */
> +		return 0;
> +
> +	if (addr - 0x10 > sizeof(cfi_query_table)) {
> +		pr_debug("CFI query read access beyond the end of table");
> +		return 0;
> +	}
> +
> +	/* Fixup dynamic information in the geometry part of the table. */
> +	switch (addr) {
> +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
> +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
> +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
> +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
> +	case CFI_GEOM_OFFSET + 7:
> +		return (nr_erase_blocks(sfdev) - 1) >> 8;
> +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
> +	case CFI_GEOM_OFFSET + 9:
> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> +	}
> +
> +	return cfi_query_table[addr - 0x10];
> +}
> +
> +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> +
> +	return test_bit(block_nr, sfdev->lock_bm);
> +}
> +
> +#define DEV_ID_MASK 0x7ff
> +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
> +	case 0x0:				/* vendor ID */
> +		return 0x0000;
> +	case 0x1:				/* device ID */
> +		return 0xffff;
> +	case 0x2:
> +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
> +	case 0x5:
> +		return sfdev->rcr;
> +	default:			/* Ignore the other entries. */
> +		return 0;
> +	}
> +}
> +
> +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
> +{
> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> +
> +	if (lock)
> +		set_bit(block_nr, sfdev->lock_bm);
> +	else
> +		clear_bit(block_nr, sfdev->lock_bm);
> +}
> +
> +static void word_program(struct cfi_flash_device *sfdev,
> +			 u64 addr, void *data, int len)
> +{
> +	if (block_is_locked(sfdev, addr)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +
> +	memcpy(sfdev->flash_memory + addr, data, len);
> +}
> +
> +/* Reset the program buffer state to prepare for follow-up writes. */
> +static void buffer_setup(struct cfi_flash_device *sfdev)
> +{
> +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
> +	sfdev->last_address = ~0ULL;
> +	sfdev->buff_written = 0;
> +}
> +
> +static bool buffer_program(struct cfi_flash_device *sfdev,
> +			   u64 addr, void *buffer, int len)
> +{
> +	unsigned int buf_addr;
> +
> +	if (sfdev->buff_written >= sfdev->program_length)
> +		return false;
> +
> +	/*
> +	 * The first word written into the buffer after the setup command
> +	 * happens to be the base address for the buffer.
> +	 * All subsequent writes need to be within this address and this
> +	 * address plus the buffer size, so keep this value around.
> +	 */
> +	if (sfdev->last_address == ~0ULL)
> +		sfdev->last_address = addr;
> +
> +	if (addr < sfdev->last_address)
> +		return false;
> +	buf_addr = addr - sfdev->last_address;
> +	if (buf_addr >= PROGRAM_BUFF_SIZE)
> +		return false;
> +
> +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
> +	sfdev->buff_written++;
> +
> +	return true;
> +}
> +
> +static void buffer_confirm(struct cfi_flash_device *sfdev)
> +{
> +	if (block_is_locked(sfdev, sfdev->last_address)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +	memcpy(sfdev->flash_memory + sfdev->last_address,
> +	       sfdev->program_buffer,
> +	       sfdev->buff_written * sizeof(u32));
> +}
> +
> +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
> +{
> +	if (block_is_locked(sfdev, addr)) {
> +		sfdev->sr |= 0x12;
> +		return;
> +	}
> +
> +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
> +}
> +
> +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
> +			   u64 addr, u8 *data, u32 len, u8 is_write,
> +			   void *context)
> +{
> +	struct cfi_flash_device *sfdev = context;
> +	u64 faddr = addr - sfdev->base_addr;
> +	u32 value;
> +
> +	if (!is_write) {
> +		u16 cfi_value = 0;
> +
> +		mutex_lock(&sfdev->mutex);
> +
> +		switch (sfdev->read_mode) {
> +		case READ_ARRAY:
> +			/* just copy the requested bytes from the array */
> +			memcpy(data, sfdev->flash_memory + faddr, len);
> +			goto out_unlock;
> +		case READ_STATUS:
> +			cfi_value = sfdev->sr;
> +			break;
> +		case READ_DEVICE_ID:
> +			cfi_value = read_dev_id(sfdev, faddr);
> +			break;
> +		case READ_QUERY:
> +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
> +			break;
> +		}
> +		switch (len) {
> +		case 1:
> +			*data = cfi_value;
> +			break;
> +		case 8: memset(data + 4, 0, 4);
> +			/* fall-through */
> +		case 4:
> +			if (CFI_NR_FLASH_CHIPS == 2)
> +				memcpy(data + 2, &cfi_value, 2);
> +			else
> +				memset(data + 2, 0, 2);
> +			/* fall-through */
> +		case 2:
> +			memcpy(data, &cfi_value, 2);
> +			break;
> +		default:
> +			pr_debug("CFI flash: illegal access length %d for read mode %d",
> +				 len, sfdev->read_mode);
> +			break;
> +		}
> +
> +		goto out_unlock;
> +	}
> +
> +	if (len > 4) {
> +		pr_info("CFI flash: MMIO %d-bit write access not supported",
> +			 len * 8);
> +		return;
> +	}
> +
> +	memcpy(&value, data, len);
> +
> +	mutex_lock(&sfdev->mutex);
> +
> +	switch (sfdev->state) {
> +	case READY:			/* handled below */
> +		break;
> +
> +	case LOCK_SETUP:
> +		switch (value & 0xff) {
> +		case CFI_CMD_LOCK_BLOCK:
> +			lock_block(sfdev, faddr, true);
> +			sfdev->read_mode = READ_STATUS;
> +			break;
> +		case CFI_CMD_UNLOCK_BLOCK:
> +			lock_block(sfdev, faddr, false);
> +			sfdev->read_mode = READ_STATUS;
> +			break;
> +		default:
> +			sfdev->sr |= 0x30;
> +			break;
> +		}
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case WP_SETUP:
> +		word_program(sfdev, faddr, data, len);
> +		sfdev->read_mode = READ_STATUS;
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case BP_LOAD:
> +		if (buffer_program(sfdev, faddr, data, len))
> +			goto out_unlock;
> +
> +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
> +			buffer_confirm(sfdev);
> +			sfdev->read_mode = READ_STATUS;
> +		} else {
> +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
> +				 value, faddr);
> +			sfdev->sr |= 0x10;
> +		}
> +		sfdev->state = READY;
> +		goto out_unlock;
> +
> +	case BP_SETUP:
> +		sfdev->program_length = (value & 0xffff) + 1;
> +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
> +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
> +		sfdev->state = BP_LOAD;
> +		sfdev->read_mode = READ_STATUS;
> +		goto out_unlock;
> +
> +	case ERASE_SETUP:
> +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
> +			block_erase_confirm(sfdev, faddr);
> +		else
> +			sfdev->sr |= 0x30;
> +
> +		sfdev->state = READY;
> +		sfdev->read_mode = READ_STATUS;
> +		goto out_unlock;
> +	}
> +
> +	/* write commands in READY state */
> +	switch (value & 0xFF) {
> +	case CFI_CMD_READ_JEDEC:
> +		sfdev->read_mode = READ_DEVICE_ID;
> +		break;
> +	case CFI_CMD_READ_STATUS_REGISTER:
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_READ_CFI_QUERY:
> +		sfdev->read_mode = READ_QUERY;
> +		break;
> +	case CFI_CMD_CLEAR_STATUS_REGISTER:
> +		sfdev->sr = 0x80;
> +		break;
> +	case CFI_CMD_WORD_PROGRAM_SETUP:
> +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
> +		sfdev->state = WP_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_LOCK_BLOCK_SETUP:
> +		sfdev->state = LOCK_SETUP;
> +		break;
> +	case CFI_CMD_BLOCK_ERASE_SETUP:
> +		sfdev->state = ERASE_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
> +		buffer_setup(sfdev);
> +		sfdev->state = BP_SETUP;
> +		sfdev->read_mode = READ_STATUS;
> +		break;
> +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
> +		pr_debug("CFI flash: unexpected confirm command 0xD0");
> +		break;
> +	default:
> +		pr_debug("CFI flash: unknown command 0x%x", value);
> +		/* fall through */
> +	case CFI_CMD_READ_ARRAY:
> +		sfdev->read_mode = READ_ARRAY;
> +		break;
> +	}
> +
> +out_unlock:
> +	mutex_unlock(&sfdev->mutex);
> +}
> +
> +#ifdef CONFIG_HAS_LIBFDT
> +static void generate_cfi_flash_fdt_node(void *fdt,
> +					struct device_header *dev_hdr,
> +					void (*generate_irq_prop)(void *fdt,
> +								  u8 irq,
> +								enum irq_type))
> +{
> +	struct cfi_flash_device *sfdev;
> +	u64 reg_prop[2];
> +
> +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
> +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
> +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
> +
> +	_FDT(fdt_begin_node(fdt, "flash"));
> +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
> +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
> +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
> +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
> +	_FDT(fdt_end_node(fdt));
> +}
> +#else
> +#define generate_cfi_flash_fdt_node NULL
> +#endif
> +
> +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
> +							 const char *filename)
> +{
> +	struct cfi_flash_device *sfdev;
> +	struct stat statbuf;
> +	unsigned int value;
> +	int ret;
> +	int fd;
> +
> +	fd = open(filename, O_RDWR);
> +	if (fd < 0)
> +		return ERR_PTR(-errno);
> +	if (fstat(fd, &statbuf) < 0) {
> +		close(fd);
> +		return ERR_PTR(-errno);
> +	}
> +
> +	sfdev = malloc(sizeof(struct cfi_flash_device));
> +	if (!sfdev) {
> +		close(fd);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
> +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
> +				   PROT_READ | PROT_WRITE, MAP_SHARED,
> +				   fd, 0);
> +	if (sfdev->flash_memory == MAP_FAILED) {
> +		close(fd);
> +		free(sfdev);
> +		return ERR_PTR(-errno);
> +	}
> +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
> +	sfdev->state = READY;
> +	sfdev->read_mode = READ_ARRAY;
> +	sfdev->sr = 0x80;
> +	sfdev->rcr = 0xbfcf;
> +
> +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
> +	sfdev->lock_bm = malloc(value);
> +	memset(sfdev->lock_bm, 0, value);
> +
> +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
> +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
> +	mutex_init(&sfdev->mutex);
> +	ret = device__register(&sfdev->dev_hdr);
> +	if (ret) {
> +		free(sfdev->flash_memory);
> +		free(sfdev);
> +		return ERR_PTR(ret);
> +	}
> +
> +	ret = kvm__register_mmio(kvm,
> +				 sfdev->base_addr, sfdev->size,
> +				 false, cfi_flash_mmio, sfdev);
> +	if (ret) {
> +		device__unregister(&sfdev->dev_hdr);
> +		free(sfdev->flash_memory);
> +		free(sfdev);
> +		return ERR_PTR(ret);
> +	}
> +
> +	return sfdev;
> +}
> +
> +static int flash__init(struct kvm *kvm)
> +{
> +	struct cfi_flash_device *sfdev;
> +
> +	if (!kvm->cfg.flash_filename)
> +		return 0;
> +
> +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
> +	if (IS_ERR(sfdev))
> +		return PTR_ERR(sfdev);
> +
> +	return 0;
> +}
> +dev_init(flash__init);
> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
> index a052b0bc..f4a8b831 100644
> --- a/include/kvm/kvm-config.h
> +++ b/include/kvm/kvm-config.h
> @@ -35,6 +35,7 @@ struct kvm_config {
>  	const char *vmlinux_filename;
>  	const char *initrd_filename;
>  	const char *firmware_filename;
> +	const char *flash_filename;
>  	const char *console;
>  	const char *dev;
>  	const char *network;
> diff --git a/include/kvm/util.h b/include/kvm/util.h
> index 4ca7aa93..5c37f0b7 100644
> --- a/include/kvm/util.h
> +++ b/include/kvm/util.h
> @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
>  	return x ? 1UL << fls_long(x - 1) : 0;
>  }
>  
> +static inline int pow2_size(unsigned long x)
> +{
> +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
> +}
> +
>  struct kvm;
>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-17 17:20 ` Alexandru Elisei
@ 2020-02-19 17:26   ` Andre Przywara
  2020-02-20 10:24     ` Alexandru Elisei
  0 siblings, 1 reply; 8+ messages in thread
From: Andre Przywara @ 2020-02-19 17:26 UTC (permalink / raw)
  To: Alexandru Elisei
  Cc: Raphael Gault, Sami Mujawar, Will Deacon, kvmarm, linux-arm-kernel

On Mon, 17 Feb 2020 17:20:43 +0000
Alexandru Elisei <alexandru.elisei@arm.com> wrote:

Hi,

> I guess the device hasn't been tested with Linux. This is what I'm getting when
> trying to boot a Linux guest using the command:

It was actually developed with a Linux guest, because that's more verbatim and easier to debug.

And I just tested this again with Linux and it worked for me:
[    2.164992] physmap-flash 20000.flash: physmap platform flash device: [mem 0x00020000-0x0021ffff]
[    2.166539] 20000.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x00ffff
...
# mtd_debug info /dev/mtd0
mtd.type = MTD_NORFLASH
mtd.flags = MTD_CAP_NORFLASH
mtd.size = 2097152 (2M)
mtd.erasesize = 65536 (64K)
mtd.writesize = 1 
mtd.oobsize = 0 
regions = 1

I think what you are seeing are problems when you give a non-power-of-2 sized flash image. The current patch does not really support this (since it's hardly a thing in the real world). I originally wanted to expand any "uneven" size to the next power-of-2, but this doesn't work easily with mmap.
So I now changed the code to downgrade, so you get 8MB with any file ranging from [8MB, 16MB(, for instance.
That fixed the Linux problems with those files for me.

> $ ./lkvm run -c4 -m4096 -k /path/to/kernel -d /path/to/disk -p root="/dev/vda2" -F
> flash.img
> 
> [    0.659167] physmap-flash 2000000.flash: physmap platform flash device: [mem
> 0x02000000-0x029fffff]
> [    0.660444] Number of erase regions: 1
> [    0.661036] Primary Vendor Command Set: 0001 (Intel/Sharp Extended)
> [    0.661688] Primary Algorithm Table at 0031
> [    0.662168] Alternative Vendor Command Set: 0000 (None)
> [    0.662711] No Alternate Algorithm Table
> [    0.663120] Vcc Minimum:  4.5 V
> [    0.663450] Vcc Maximum:  5.5 V
> [    0.663779] No Vpp line
> [    0.664039] Typical byte/word write timeout: 2 µs
> [    0.664590] Maximum byte/word write timeout: 2 µs
> [    0.665240] Typical full buffer write timeout: 2 µs
> [    0.665775] Maximum full buffer write timeout: 2 µs
> [    0.666373] Typical block erase timeout: 2 ms
> [    0.666828] Maximum block erase timeout: 2 ms
> [    0.667282] Chip erase not supported
> [    0.667659] Device size: 0x800000 bytes (8 MiB)
> [    0.668137] Flash Device Interface description: 0x0006
> [    0.668697]   - Unknown
> [    0.668963] Max. bytes in buffer write: 0x40
> [    0.669407] Number of Erase Block Regions: 1
> [    0.669865]   Erase Region #0: BlockSize 0x8000 bytes, 160 blocks
> [    0.672299] 2000000.flash: Found 2 x16 devices at 0x0 in 32-bit bank.
> Manufacturer ID 0x000000 Chip ID 0x00ffff
> [    0.681328] NOR chip too large to fit in mapping. Attempting to cope...
> [    0.682046] Intel/Sharp Extended Query Table at 0x0031
> [    0.682645] Using buffer write method
> [    0.683031] Sum of regions (a00000) != total size of set of interleaved chips
> (1000000)
> [    0.683854] gen_probe: No supported Vendor Command Set found
> [    0.684441] physmap-flash 2000000.flash: map_probe failed
> 
> I also defined DEBUG_CFI in drivers/mtd/chips/cfi_probe.c.
> 
> The Flash Device Interface description that we provide is wrong, it should 0x05.
> More details below.
> 
> On 2/7/20 12:19 PM, Andre Przywara wrote:
> > From: Raphael Gault <raphael.gault@arm.com>
> >
> > The EDK II UEFI firmware implementation requires some storage for the EFI
> > variables, which is typically some flash storage.
> > Since this is already supported on the EDK II side, we add a CFI flash
> > emulation to kvmtool.
> > This is backed by a file, specified via the --flash or -F command line
> > option. Any flash writes done by the guest will immediately be reflected
> > into this file (kvmtool mmap's the file).
> >
> > This implements a CFI flash using the "Intel/Sharp extended command
> > set", as specified in:
> > - JEDEC JESD68.01
> > - JEDEC JEP137B
> > - Intel Application Note 646
> > Some gaps in those specs have been filled by looking at real devices and
> > other implementations (QEMU, Linux kernel driver).
> >
> > At the moment this relies on DT to advertise the base address of the
> > flash memory (mapped into the MMIO address space) and is only enabled
> > for ARM/ARM64. The emulation itself is architecture agnostic, though.
> >
> > This is one missing piece toward a working UEFI boot with kvmtool on
> > ARM guests, the other is to provide writable PCI BARs, which is WIP.
> >
> > Signed-off-by: Raphael Gault <raphael.gault@arm.com>
> > [Andre: rewriting and fixing]
> > Signed-off-by: Andre Przywra <andre.przywara@arm.com>
> > ---
> > Hi,
> >
> > an update addressing Will's comments. I added coarse grained locking
> > to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> > the internal CFI flash state machine.
> > I also folded the actual flash array read access into the MMIO handler
> > and fixed the other small issues.
> >
> > Cheers,
> > Andre
> >
> >  Makefile                          |   6 +
> >  arm/include/arm-common/kvm-arch.h |   3 +
> >  builtin-run.c                     |   2 +
> >  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
> >  include/kvm/kvm-config.h          |   1 +
> >  include/kvm/util.h                |   5 +
> >  6 files changed, 563 insertions(+)
> >  create mode 100644 hw/cfi_flash.c
> >
> > diff --git a/Makefile b/Makefile
> > index 3862112c..7ed6fb5e 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
> >  	CFLAGS		+= -march=armv7-a
> >  
> >  	ARCH_WANT_LIBFDT := y
> > +	ARCH_HAS_FLASH_MEM := y
> >  endif
> >  
> >  # ARM64
> > @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
> >  	ARCH_INCLUDE	+= -Iarm/aarch64/include
> >  
> >  	ARCH_WANT_LIBFDT := y
> > +	ARCH_HAS_FLASH_MEM := y
> >  endif
> >  
> >  ifeq ($(ARCH),mips)
> > @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
> >  	endif
> >  endif
> >  
> > +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> > +	OBJS	+= hw/cfi_flash.o
> > +endif
> > +
> >  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
> >  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
> >  	LIBS_DYNOPT	+= -lz
> > diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
> > index b9d486d5..2bb085f4 100644
> > --- a/arm/include/arm-common/kvm-arch.h
> > +++ b/arm/include/arm-common/kvm-arch.h
> > @@ -21,6 +21,9 @@
> >  #define ARM_GIC_DIST_SIZE	0x10000
> >  #define ARM_GIC_CPUI_SIZE	0x20000
> >  
> > +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
> > +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE
> > +
> >  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
> >  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
> >  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
> > diff --git a/builtin-run.c b/builtin-run.c
> > index f8dc6c72..df8c6741 100644
> > --- a/builtin-run.c
> > +++ b/builtin-run.c
> > @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
> >  			"Kernel command line arguments"),		\
> >  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
> >  			"Firmware image to boot in virtual machine"),	\
> > +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
> > +			"Flash image to present to virtual machine"),	\
> >  									\
> >  	OPT_GROUP("Networking options:"),				\
> >  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
> > diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> > new file mode 100644
> > index 00000000..d7c0e7e8
> > --- /dev/null
> > +++ b/hw/cfi_flash.c
> > @@ -0,0 +1,546 @@
> > +#include <stdbool.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <linux/bitops.h>
> > +#include <linux/err.h>
> > +#include <linux/sizes.h>
> > +#include <linux/types.h>
> > +
> > +#include "kvm/kvm.h"
> > +#include "kvm/kvm-arch.h"
> > +#include "kvm/devices.h"
> > +#include "kvm/fdt.h"
> > +#include "kvm/mutex.h"
> > +#include "kvm/util.h"
> > +
> > +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
> > +#define CFI_NR_FLASH_CHIPS			2
> > +
> > +/* We always emulate a 32 bit bus width. */
> > +#define CFI_BUS_WIDTH				4
> > +
> > +/* The *effective* size of an erase block (over all chips) */
> > +#define FLASH_BLOCK_SIZE			SZ_64K
> > +
> > +#define PROGRAM_BUFF_SIZE_BITS			7
> > +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)
> > +
> > +/* CFI commands */
> > +#define CFI_CMD_LOCK_BLOCK			0x01
> > +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
> > +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
> > +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
> > +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
> > +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
> > +#define CFI_CMD_READ_STATUS_REGISTER		0x70
> > +#define CFI_CMD_READ_JEDEC			0x90
> > +#define CFI_CMD_READ_CFI_QUERY			0x98
> > +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
> > +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
> > +#define CFI_CMD_UNLOCK_BLOCK			0xd0
> > +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
> > +#define CFI_CMD_READ_ARRAY			0xff
> > +
> > +/*
> > + * CFI query table contents, as far as it is constant.
> > + */
> > +#define CFI_GEOM_OFFSET				0x27
> > +static u8 cfi_query_table[] = {
> > +		/* offset 0x10: CFI query identification string */
> > +	'Q', 'R', 'Y',		/* ID string */
> > +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
> > +	0x31, 0x00,		/* address of primary extended query table */
> > +	0x00, 0x00,		/* alternative command set: unused */
> > +	0x00, 0x00,		/* address of alternative extended query table*/
> > +		/* offset 0x1b: system interface information */
> > +	0x45,			/* minimum Vcc voltage: 4.5V */
> > +	0x55,			/* maximum Vcc voltage: 5.5V */
> > +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
> > +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
> > +	0x01,			/* timeout for single word program: 2 us */
> > +	0x01,			/* timeout for multi-byte program: 2 us */
> > +	0x01,			/* timeout for block erase: 2 ms */
> > +	0x00,			/* timeout for full chip erase: not supported */
> > +	0x00,			/* max timeout for single word program: 1x */
> > +	0x00,			/* max timeout for mulit-byte program: 1x */
> > +	0x00,			/* max timeout for block erase: 1x */
> > +	0x00,			/* max timeout for chip erase: not supported */
> > +		/* offset 0x27: flash geometry information */
> > +	0x00,			/* size in power-of-2 bytes, filled later */
> > +	0x06, 0x00,		/* interface description: 32 and 16 bits */  
> 
> I don't think this is correct. From Intel StrataFlash Embedded Memory (P30)
> Family, table 34:
> 
> ""n" such that n+1 specifies the bit field that represents the flash device width
> capabilities as described in the table".

Yeah, seems to be correct, but it looks this Intel Strata document is the only place which details this encoding (which looks like being retrofit somehow).
And I didn't really use this document, because it's a manufacturer data sheet and not a specification.
I will change it to 0x5, but for the records Linux worked even with 0x6 for me.

> If you want to advertise 32 and 16 bit write capabilities, it should be 5 because
> 5+1=6. This is also the value that the Linux kernel checks for (see
> include/linux/mtd/cfi.h, define CFI_INTERFACE_X16_BY_X32_ASYNC"). 6 actually means
> 32, 16 and 8 bit accesses.
> 
> This begs another question: why do we support both 16 and 32 bit accesses instead
> of supporting only 32 bit?

Because we can, there is no reason to restrict this. I feel like we should be as capable as possible, especially since it's trivial to emulate.

> > +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> > +				/* number of multi-byte writes */  
> 
> Shouldn't the comment be maximum number of bytes in the write buffer?

Yes, possibly.

> > +	0x01,			/* one erase block region */
> > +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> > +		/* offset 0x31: Intel primary algorithm extended query table */
> > +	'P', 'R', 'I',
> > +	'1', '0',		/* version 1.0 */
> > +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> > +	0x00,			/* no functions after suspend */
> > +	0x01, 0x00,		/* only lock bit supported */
> > +	0x50,			/* best Vcc value: 5.0V */
> > +	0x00,			/* best Vpp value: 0.0V (unused) */
> > +	0x01,			/* number of protection register fields */
> > +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
> > +};  
> 
> As an aside, I found it impossible to review the cfi_query_table array in its
> current form. This is how I wrote the array so I could read it. I also took the
> liberty to remove the offset when indexing the array, making read_cfi less error
> prone, in my opinion:

Please don't post elaborate code sequences as a comment, especially not if it gets mangled (Thunderbird is annoyingly bad in this respect).
I think I would have got what you mean by showing just one line ;-)

Cheers,
Andre

> 
> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> index d7c0e7e80d69..65a90e288be8 100644
> --- a/hw/cfi_flash.c
> +++ b/hw/cfi_flash.c
> @@ -46,45 +46,43 @@
>   */
>  #define CFI_GEOM_OFFSET                                0x27
>  static u8 cfi_query_table[] = {
> -               /* offset 0x10: CFI query identification string */
> -       'Q', 'R', 'Y',          /* ID string */
> -       0x01, 0x00,             /* primary command set: Intel/Sharp extended */
> -       0x31, 0x00,             /* address of primary extended query table */
> -       0x00, 0x00,             /* alternative command set: unused */
> -       0x00, 0x00,             /* address of alternative extended query table*/
> -               /* offset 0x1b: system interface information */
> -       0x45,                   /* minimum Vcc voltage: 4.5V */
> -       0x55,                   /* maximum Vcc voltage: 5.5V */
> -       0x00,                   /* minimum Vpp voltage: 0.0V (unused) */
> -       0x00,                   /* maximum Vpp voltage: 0.0V *(unused) */
> -       0x01,                   /* timeout for single word program: 2 us */
> -       0x01,                   /* timeout for multi-byte program: 2 us */
> -       0x01,                   /* timeout for block erase: 2 ms */
> -       0x00,                   /* timeout for full chip erase: not supported */
> -       0x00,                   /* max timeout for single word program: 1x */
> -       0x00,                   /* max timeout for mulit-byte program: 1x */
> -       0x00,                   /* max timeout for block erase: 1x */
> -       0x00,                   /* max timeout for chip erase: not supported */
> -               /* offset 0x27: flash geometry information */
> -       0x00,                   /* size in power-of-2 bytes, filled later */
> -       0x06, 0x00,             /* interface description: 32 and 16 bits */
> -       PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
> +       /* System interface information */
> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
> +       /* Flash geometry information */
> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>                                 /* number of multi-byte writes */
> -       0x01,                   /* one erase block region */
> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> -               /* offset 0x31: Intel primary algorithm extended query table */
> -       'P', 'R', 'I',
> -       '1', '0',               /* version 1.0 */
> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> -       0x00,                   /* no functions after suspend */
> -       0x01, 0x00,             /* only lock bit supported */
> -       0x50,                   /* best Vcc value: 5.0V */
> -       0x00,                   /* best Vpp value: 0.0V (unused) */
> -       0x01,                   /* number of protection register fields */
> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
> +       [0x2c] = 0x01,          /* one erase block region */
> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> +       /* Intel primary algorithm extended query table */
> +       [0x31] = 'P', 'R', 'I', /* ID string */
> +       [0x34] = '1', '0',      /* version 1.0 */
> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
> pm-read */
> +       [0x40] = 0x00,          /* no functions after suspend */
> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
> ...skipping...
> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
> +       /* System interface information */
> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
> +       /* Flash geometry information */
> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>                                 /* number of multi-byte writes */
> -       0x01,                   /* one erase block region */
> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> -               /* offset 0x31: Intel primary algorithm extended query table */
> -       'P', 'R', 'I',
> -       '1', '0',               /* version 1.0 */
> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> -       0x00,                   /* no functions after suspend */
> -       0x01, 0x00,             /* only lock bit supported */
> -       0x50,                   /* best Vcc value: 5.0V */
> -       0x00,                   /* best Vpp value: 0.0V (unused) */
> -       0x01,                   /* number of protection register fields */
> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
> +       [0x2c] = 0x01,          /* one erase block region */
> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> +       /* Intel primary algorithm extended query table */
> +       [0x31] = 'P', 'R', 'I', /* ID string */
> +       [0x34] = '1', '0',      /* version 1.0 */
> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
> pm-read */
> +       [0x40] = 0x00,          /* no functions after suspend */
> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
> +       [0x43] = 0x50,          /* best Vcc value: 5.0V */
> +       [0x43] = 0x00,          /* best Vpp value: 0.0V (unused) */
> +       [0x44] = 0x01,          /* number of protection register fields */
> +       [0x45] = 0x00, 0x00, 0x00, 0x00,/* protection field 1 description */
>  };
>  
> -
>  /*
>   * Those states represent a subset of the CFI flash state machine.
>   */
> @@ -141,10 +139,7 @@ static int nr_erase_blocks(struct cfi_flash_device *sfdev)
>   */
>  static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>  {
> -       if (addr < 0x10)                /* CFI information starts at 0x10 */
> -               return 0;
> -
> -       if (addr - 0x10 > sizeof(cfi_query_table)) {
> +       if (addr > sizeof(cfi_query_table)) {
>                 pr_debug("CFI query read access beyond the end of table");
>                 return 0;
>         }
> @@ -163,7 +158,7 @@ static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>                 return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
>         }
>  
> -       return cfi_query_table[addr - 0x10];
> +       return cfi_query_table[addr];
>  }
>  
>  static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> 
> Thanks,
> Alex
> > +
> > +
> > +/*
> > + * Those states represent a subset of the CFI flash state machine.
> > + */
> > +enum cfi_flash_state {
> > +	READY,
> > +	LOCK_SETUP,
> > +	WP_SETUP,
> > +	BP_SETUP,
> > +	BP_LOAD,
> > +	ERASE_SETUP,
> > +};
> > +
> > +/*
> > + * The device can be in several **Read** modes.
> > + * We don't implement the asynchronous burst mode.
> > + */
> > +enum cfi_read_mode {
> > +	READ_ARRAY,
> > +	READ_STATUS,
> > +	READ_DEVICE_ID,
> > +	READ_QUERY,
> > +};
> > +
> > +struct cfi_flash_device {
> > +	struct device_header	dev_hdr;
> > +	/* Protects the CFI state machine variables in this data structure. */
> > +	struct mutex		mutex;
> > +	u64			base_addr;
> > +	u32			size;
> > +
> > +	void			*flash_memory;
> > +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];
> > +	unsigned long		*lock_bm;
> > +	u64			last_address;
> > +	unsigned int		buff_written;
> > +	unsigned int		program_length;
> > +
> > +	enum cfi_flash_state	state;
> > +	enum cfi_read_mode	read_mode;
> > +	u16			rcr;
> > +	u8			sr;
> > +};
> > +
> > +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> > +{
> > +	return sfdev->size / FLASH_BLOCK_SIZE;
> > +}
> > +
> > +/*
> > + * CFI queries always deal with one byte of information, possibly mirrored
> > + * to other bytes on the bus. This is dealt with in the callers.
> > + * The address provided is the one for 8-bit addressing, and would need to
> > + * be adjusted for wider accesses.
> > + */
> > +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	if (addr < 0x10)		/* CFI information starts at 0x10 */
> > +		return 0;
> > +
> > +	if (addr - 0x10 > sizeof(cfi_query_table)) {
> > +		pr_debug("CFI query read access beyond the end of table");
> > +		return 0;
> > +	}
> > +
> > +	/* Fixup dynamic information in the geometry part of the table. */
> > +	switch (addr) {
> > +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
> > +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
> > +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
> > +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
> > +	case CFI_GEOM_OFFSET + 7:
> > +		return (nr_erase_blocks(sfdev) - 1) >> 8;
> > +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
> > +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
> > +	case CFI_GEOM_OFFSET + 9:
> > +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> > +	}
> > +
> > +	return cfi_query_table[addr - 0x10];
> > +}
> > +
> > +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	int block_nr = addr / FLASH_BLOCK_SIZE;
> > +
> > +	return test_bit(block_nr, sfdev->lock_bm);
> > +}
> > +
> > +#define DEV_ID_MASK 0x7ff
> > +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
> > +	case 0x0:				/* vendor ID */
> > +		return 0x0000;
> > +	case 0x1:				/* device ID */
> > +		return 0xffff;
> > +	case 0x2:
> > +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
> > +	case 0x5:
> > +		return sfdev->rcr;
> > +	default:			/* Ignore the other entries. */
> > +		return 0;
> > +	}
> > +}
> > +
> > +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
> > +{
> > +	int block_nr = addr / FLASH_BLOCK_SIZE;
> > +
> > +	if (lock)
> > +		set_bit(block_nr, sfdev->lock_bm);
> > +	else
> > +		clear_bit(block_nr, sfdev->lock_bm);
> > +}
> > +
> > +static void word_program(struct cfi_flash_device *sfdev,
> > +			 u64 addr, void *data, int len)
> > +{
> > +	if (block_is_locked(sfdev, addr)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +
> > +	memcpy(sfdev->flash_memory + addr, data, len);
> > +}
> > +
> > +/* Reset the program buffer state to prepare for follow-up writes. */
> > +static void buffer_setup(struct cfi_flash_device *sfdev)
> > +{
> > +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
> > +	sfdev->last_address = ~0ULL;
> > +	sfdev->buff_written = 0;
> > +}
> > +
> > +static bool buffer_program(struct cfi_flash_device *sfdev,
> > +			   u64 addr, void *buffer, int len)
> > +{
> > +	unsigned int buf_addr;
> > +
> > +	if (sfdev->buff_written >= sfdev->program_length)
> > +		return false;
> > +
> > +	/*
> > +	 * The first word written into the buffer after the setup command
> > +	 * happens to be the base address for the buffer.
> > +	 * All subsequent writes need to be within this address and this
> > +	 * address plus the buffer size, so keep this value around.
> > +	 */
> > +	if (sfdev->last_address == ~0ULL)
> > +		sfdev->last_address = addr;
> > +
> > +	if (addr < sfdev->last_address)
> > +		return false;
> > +	buf_addr = addr - sfdev->last_address;
> > +	if (buf_addr >= PROGRAM_BUFF_SIZE)
> > +		return false;
> > +
> > +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
> > +	sfdev->buff_written++;
> > +
> > +	return true;
> > +}
> > +
> > +static void buffer_confirm(struct cfi_flash_device *sfdev)
> > +{
> > +	if (block_is_locked(sfdev, sfdev->last_address)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +	memcpy(sfdev->flash_memory + sfdev->last_address,
> > +	       sfdev->program_buffer,
> > +	       sfdev->buff_written * sizeof(u32));
> > +}
> > +
> > +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
> > +{
> > +	if (block_is_locked(sfdev, addr)) {
> > +		sfdev->sr |= 0x12;
> > +		return;
> > +	}
> > +
> > +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
> > +}
> > +
> > +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
> > +			   u64 addr, u8 *data, u32 len, u8 is_write,
> > +			   void *context)
> > +{
> > +	struct cfi_flash_device *sfdev = context;
> > +	u64 faddr = addr - sfdev->base_addr;
> > +	u32 value;
> > +
> > +	if (!is_write) {
> > +		u16 cfi_value = 0;
> > +
> > +		mutex_lock(&sfdev->mutex);
> > +
> > +		switch (sfdev->read_mode) {
> > +		case READ_ARRAY:
> > +			/* just copy the requested bytes from the array */
> > +			memcpy(data, sfdev->flash_memory + faddr, len);
> > +			goto out_unlock;
> > +		case READ_STATUS:
> > +			cfi_value = sfdev->sr;
> > +			break;
> > +		case READ_DEVICE_ID:
> > +			cfi_value = read_dev_id(sfdev, faddr);
> > +			break;
> > +		case READ_QUERY:
> > +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
> > +			break;
> > +		}
> > +		switch (len) {
> > +		case 1:
> > +			*data = cfi_value;
> > +			break;
> > +		case 8: memset(data + 4, 0, 4);
> > +			/* fall-through */
> > +		case 4:
> > +			if (CFI_NR_FLASH_CHIPS == 2)
> > +				memcpy(data + 2, &cfi_value, 2);
> > +			else
> > +				memset(data + 2, 0, 2);
> > +			/* fall-through */
> > +		case 2:
> > +			memcpy(data, &cfi_value, 2);
> > +			break;
> > +		default:
> > +			pr_debug("CFI flash: illegal access length %d for read mode %d",
> > +				 len, sfdev->read_mode);
> > +			break;
> > +		}
> > +
> > +		goto out_unlock;
> > +	}
> > +
> > +	if (len > 4) {
> > +		pr_info("CFI flash: MMIO %d-bit write access not supported",
> > +			 len * 8);
> > +		return;
> > +	}
> > +
> > +	memcpy(&value, data, len);
> > +
> > +	mutex_lock(&sfdev->mutex);
> > +
> > +	switch (sfdev->state) {
> > +	case READY:			/* handled below */
> > +		break;
> > +
> > +	case LOCK_SETUP:
> > +		switch (value & 0xff) {
> > +		case CFI_CMD_LOCK_BLOCK:
> > +			lock_block(sfdev, faddr, true);
> > +			sfdev->read_mode = READ_STATUS;
> > +			break;
> > +		case CFI_CMD_UNLOCK_BLOCK:
> > +			lock_block(sfdev, faddr, false);
> > +			sfdev->read_mode = READ_STATUS;
> > +			break;
> > +		default:
> > +			sfdev->sr |= 0x30;
> > +			break;
> > +		}
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case WP_SETUP:
> > +		word_program(sfdev, faddr, data, len);
> > +		sfdev->read_mode = READ_STATUS;
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case BP_LOAD:
> > +		if (buffer_program(sfdev, faddr, data, len))
> > +			goto out_unlock;
> > +
> > +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
> > +			buffer_confirm(sfdev);
> > +			sfdev->read_mode = READ_STATUS;
> > +		} else {
> > +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
> > +				 value, faddr);
> > +			sfdev->sr |= 0x10;
> > +		}
> > +		sfdev->state = READY;
> > +		goto out_unlock;
> > +
> > +	case BP_SETUP:
> > +		sfdev->program_length = (value & 0xffff) + 1;
> > +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
> > +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
> > +		sfdev->state = BP_LOAD;
> > +		sfdev->read_mode = READ_STATUS;
> > +		goto out_unlock;
> > +
> > +	case ERASE_SETUP:
> > +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
> > +			block_erase_confirm(sfdev, faddr);
> > +		else
> > +			sfdev->sr |= 0x30;
> > +
> > +		sfdev->state = READY;
> > +		sfdev->read_mode = READ_STATUS;
> > +		goto out_unlock;
> > +	}
> > +
> > +	/* write commands in READY state */
> > +	switch (value & 0xFF) {
> > +	case CFI_CMD_READ_JEDEC:
> > +		sfdev->read_mode = READ_DEVICE_ID;
> > +		break;
> > +	case CFI_CMD_READ_STATUS_REGISTER:
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_READ_CFI_QUERY:
> > +		sfdev->read_mode = READ_QUERY;
> > +		break;
> > +	case CFI_CMD_CLEAR_STATUS_REGISTER:
> > +		sfdev->sr = 0x80;
> > +		break;
> > +	case CFI_CMD_WORD_PROGRAM_SETUP:
> > +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
> > +		sfdev->state = WP_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_LOCK_BLOCK_SETUP:
> > +		sfdev->state = LOCK_SETUP;
> > +		break;
> > +	case CFI_CMD_BLOCK_ERASE_SETUP:
> > +		sfdev->state = ERASE_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
> > +		buffer_setup(sfdev);
> > +		sfdev->state = BP_SETUP;
> > +		sfdev->read_mode = READ_STATUS;
> > +		break;
> > +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
> > +		pr_debug("CFI flash: unexpected confirm command 0xD0");
> > +		break;
> > +	default:
> > +		pr_debug("CFI flash: unknown command 0x%x", value);
> > +		/* fall through */
> > +	case CFI_CMD_READ_ARRAY:
> > +		sfdev->read_mode = READ_ARRAY;
> > +		break;
> > +	}
> > +
> > +out_unlock:
> > +	mutex_unlock(&sfdev->mutex);
> > +}
> > +
> > +#ifdef CONFIG_HAS_LIBFDT
> > +static void generate_cfi_flash_fdt_node(void *fdt,
> > +					struct device_header *dev_hdr,
> > +					void (*generate_irq_prop)(void *fdt,
> > +								  u8 irq,
> > +								enum irq_type))
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +	u64 reg_prop[2];
> > +
> > +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
> > +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
> > +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
> > +
> > +	_FDT(fdt_begin_node(fdt, "flash"));
> > +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
> > +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> > +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
> > +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
> > +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
> > +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
> > +	_FDT(fdt_end_node(fdt));
> > +}
> > +#else
> > +#define generate_cfi_flash_fdt_node NULL
> > +#endif
> > +
> > +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
> > +							 const char *filename)
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +	struct stat statbuf;
> > +	unsigned int value;
> > +	int ret;
> > +	int fd;
> > +
> > +	fd = open(filename, O_RDWR);
> > +	if (fd < 0)
> > +		return ERR_PTR(-errno);
> > +	if (fstat(fd, &statbuf) < 0) {
> > +		close(fd);
> > +		return ERR_PTR(-errno);
> > +	}
> > +
> > +	sfdev = malloc(sizeof(struct cfi_flash_device));
> > +	if (!sfdev) {
> > +		close(fd);
> > +		return ERR_PTR(-ENOMEM);
> > +	}
> > +
> > +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
> > +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
> > +				   PROT_READ | PROT_WRITE, MAP_SHARED,
> > +				   fd, 0);
> > +	if (sfdev->flash_memory == MAP_FAILED) {
> > +		close(fd);
> > +		free(sfdev);
> > +		return ERR_PTR(-errno);
> > +	}
> > +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
> > +	sfdev->state = READY;
> > +	sfdev->read_mode = READ_ARRAY;
> > +	sfdev->sr = 0x80;
> > +	sfdev->rcr = 0xbfcf;
> > +
> > +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
> > +	sfdev->lock_bm = malloc(value);
> > +	memset(sfdev->lock_bm, 0, value);
> > +
> > +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
> > +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
> > +	mutex_init(&sfdev->mutex);
> > +	ret = device__register(&sfdev->dev_hdr);
> > +	if (ret) {
> > +		free(sfdev->flash_memory);
> > +		free(sfdev);
> > +		return ERR_PTR(ret);
> > +	}
> > +
> > +	ret = kvm__register_mmio(kvm,
> > +				 sfdev->base_addr, sfdev->size,
> > +				 false, cfi_flash_mmio, sfdev);
> > +	if (ret) {
> > +		device__unregister(&sfdev->dev_hdr);
> > +		free(sfdev->flash_memory);
> > +		free(sfdev);
> > +		return ERR_PTR(ret);
> > +	}
> > +
> > +	return sfdev;
> > +}
> > +
> > +static int flash__init(struct kvm *kvm)
> > +{
> > +	struct cfi_flash_device *sfdev;
> > +
> > +	if (!kvm->cfg.flash_filename)
> > +		return 0;
> > +
> > +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
> > +	if (IS_ERR(sfdev))
> > +		return PTR_ERR(sfdev);
> > +
> > +	return 0;
> > +}
> > +dev_init(flash__init);
> > diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
> > index a052b0bc..f4a8b831 100644
> > --- a/include/kvm/kvm-config.h
> > +++ b/include/kvm/kvm-config.h
> > @@ -35,6 +35,7 @@ struct kvm_config {
> >  	const char *vmlinux_filename;
> >  	const char *initrd_filename;
> >  	const char *firmware_filename;
> > +	const char *flash_filename;
> >  	const char *console;
> >  	const char *dev;
> >  	const char *network;
> > diff --git a/include/kvm/util.h b/include/kvm/util.h
> > index 4ca7aa93..5c37f0b7 100644
> > --- a/include/kvm/util.h
> > +++ b/include/kvm/util.h
> > @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
> >  	return x ? 1UL << fls_long(x - 1) : 0;
> >  }
> >  
> > +static inline int pow2_size(unsigned long x)
> > +{
> > +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
> > +}
> > +
> >  struct kvm;
> >  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> >  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);  

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-19 17:26   ` Andre Przywara
@ 2020-02-20 10:24     ` Alexandru Elisei
  2020-02-20 18:00       ` Andre Przywara
  0 siblings, 1 reply; 8+ messages in thread
From: Alexandru Elisei @ 2020-02-20 10:24 UTC (permalink / raw)
  To: Andre Przywara
  Cc: Raphael Gault, Sami Mujawar, Will Deacon, kvmarm, linux-arm-kernel

Hi,

On 2/19/20 5:26 PM, Andre Przywara wrote:
> On Mon, 17 Feb 2020 17:20:43 +0000
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
>
> Hi,
>
>> I guess the device hasn't been tested with Linux. This is what I'm getting when
>> trying to boot a Linux guest using the command:
> It was actually developed with a Linux guest, because that's more verbatim and easier to debug.
>
> And I just tested this again with Linux and it worked for me:

The flash image you provided is 2 MB. The flash image that I used is 10 MB (it
shows in the log that I sent). I guess you ran a different test.

> [    2.164992] physmap-flash 20000.flash: physmap platform flash device: [mem 0x00020000-0x0021ffff]
> [    2.166539] 20000.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x00ffff
> ...
> # mtd_debug info /dev/mtd0
> mtd.type = MTD_NORFLASH
> mtd.flags = MTD_CAP_NORFLASH
> mtd.size = 2097152 (2M)
> mtd.erasesize = 65536 (64K)
> mtd.writesize = 1 
> mtd.oobsize = 0 
> regions = 1
>
> I think what you are seeing are problems when you give a non-power-of-2 sized flash image. The current patch does not really support this (since it's hardly a thing in the real world). I originally wanted to expand any "uneven" size to the next power-of-2, but this doesn't work easily with mmap.

I would expect that if kvmtool allows the user to specify a non-power-of-2 flash
image size, then it should know how to deal with it and not present a broken
device to a linux guest if that size is forbidden by the spec. Or it is allowed by
the specification and kvmtool doesn't know how to deal with it?

Instead of expanding the file provided by the user to fit a bigger flash, how
about you use the highest power of two size that is smaller than the flash size?

> So I now changed the code to downgrade, so you get 8MB with any file ranging from [8MB, 16MB(, for instance.
> That fixed the Linux problems with those files for me.
>
>> $ ./lkvm run -c4 -m4096 -k /path/to/kernel -d /path/to/disk -p root="/dev/vda2" -F
>> flash.img
>>
>> [    0.659167] physmap-flash 2000000.flash: physmap platform flash device: [mem
>> 0x02000000-0x029fffff]
>> [    0.660444] Number of erase regions: 1
>> [    0.661036] Primary Vendor Command Set: 0001 (Intel/Sharp Extended)
>> [    0.661688] Primary Algorithm Table at 0031
>> [    0.662168] Alternative Vendor Command Set: 0000 (None)
>> [    0.662711] No Alternate Algorithm Table
>> [    0.663120] Vcc Minimum:  4.5 V
>> [    0.663450] Vcc Maximum:  5.5 V
>> [    0.663779] No Vpp line
>> [    0.664039] Typical byte/word write timeout: 2 µs
>> [    0.664590] Maximum byte/word write timeout: 2 µs
>> [    0.665240] Typical full buffer write timeout: 2 µs
>> [    0.665775] Maximum full buffer write timeout: 2 µs
>> [    0.666373] Typical block erase timeout: 2 ms
>> [    0.666828] Maximum block erase timeout: 2 ms
>> [    0.667282] Chip erase not supported
>> [    0.667659] Device size: 0x800000 bytes (8 MiB)
>> [    0.668137] Flash Device Interface description: 0x0006
>> [    0.668697]   - Unknown
>> [    0.668963] Max. bytes in buffer write: 0x40
>> [    0.669407] Number of Erase Block Regions: 1
>> [    0.669865]   Erase Region #0: BlockSize 0x8000 bytes, 160 blocks
>> [    0.672299] 2000000.flash: Found 2 x16 devices at 0x0 in 32-bit bank.
>> Manufacturer ID 0x000000 Chip ID 0x00ffff
>> [    0.681328] NOR chip too large to fit in mapping. Attempting to cope...
>> [    0.682046] Intel/Sharp Extended Query Table at 0x0031
>> [    0.682645] Using buffer write method
>> [    0.683031] Sum of regions (a00000) != total size of set of interleaved chips
>> (1000000)
>> [    0.683854] gen_probe: No supported Vendor Command Set found
>> [    0.684441] physmap-flash 2000000.flash: map_probe failed
>>
>> I also defined DEBUG_CFI in drivers/mtd/chips/cfi_probe.c.
>>
>> The Flash Device Interface description that we provide is wrong, it should 0x05.
>> More details below.
>>
>> On 2/7/20 12:19 PM, Andre Przywara wrote:
>>> From: Raphael Gault <raphael.gault@arm.com>
>>>
>>> The EDK II UEFI firmware implementation requires some storage for the EFI
>>> variables, which is typically some flash storage.
>>> Since this is already supported on the EDK II side, we add a CFI flash
>>> emulation to kvmtool.
>>> This is backed by a file, specified via the --flash or -F command line
>>> option. Any flash writes done by the guest will immediately be reflected
>>> into this file (kvmtool mmap's the file).
>>>
>>> This implements a CFI flash using the "Intel/Sharp extended command
>>> set", as specified in:
>>> - JEDEC JESD68.01
>>> - JEDEC JEP137B
>>> - Intel Application Note 646
>>> Some gaps in those specs have been filled by looking at real devices and
>>> other implementations (QEMU, Linux kernel driver).
>>>
>>> At the moment this relies on DT to advertise the base address of the
>>> flash memory (mapped into the MMIO address space) and is only enabled
>>> for ARM/ARM64. The emulation itself is architecture agnostic, though.
>>>
>>> This is one missing piece toward a working UEFI boot with kvmtool on
>>> ARM guests, the other is to provide writable PCI BARs, which is WIP.
>>>
>>> Signed-off-by: Raphael Gault <raphael.gault@arm.com>
>>> [Andre: rewriting and fixing]
>>> Signed-off-by: Andre Przywra <andre.przywara@arm.com>
>>> ---
>>> Hi,
>>>
>>> an update addressing Will's comments. I added coarse grained locking
>>> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
>>> the internal CFI flash state machine.
>>> I also folded the actual flash array read access into the MMIO handler
>>> and fixed the other small issues.
>>>
>>> Cheers,
>>> Andre
>>>
>>>  Makefile                          |   6 +
>>>  arm/include/arm-common/kvm-arch.h |   3 +
>>>  builtin-run.c                     |   2 +
>>>  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
>>>  include/kvm/kvm-config.h          |   1 +
>>>  include/kvm/util.h                |   5 +
>>>  6 files changed, 563 insertions(+)
>>>  create mode 100644 hw/cfi_flash.c
>>>
>>> diff --git a/Makefile b/Makefile
>>> index 3862112c..7ed6fb5e 100644
>>> --- a/Makefile
>>> +++ b/Makefile
>>> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
>>>  	CFLAGS		+= -march=armv7-a
>>>  
>>>  	ARCH_WANT_LIBFDT := y
>>> +	ARCH_HAS_FLASH_MEM := y
>>>  endif
>>>  
>>>  # ARM64
>>> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
>>>  	ARCH_INCLUDE	+= -Iarm/aarch64/include
>>>  
>>>  	ARCH_WANT_LIBFDT := y
>>> +	ARCH_HAS_FLASH_MEM := y
>>>  endif
>>>  
>>>  ifeq ($(ARCH),mips)
>>> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
>>>  	endif
>>>  endif
>>>  
>>> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
>>> +	OBJS	+= hw/cfi_flash.o
>>> +endif
>>> +
>>>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
>>>  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
>>>  	LIBS_DYNOPT	+= -lz
>>> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
>>> index b9d486d5..2bb085f4 100644
>>> --- a/arm/include/arm-common/kvm-arch.h
>>> +++ b/arm/include/arm-common/kvm-arch.h
>>> @@ -21,6 +21,9 @@
>>>  #define ARM_GIC_DIST_SIZE	0x10000
>>>  #define ARM_GIC_CPUI_SIZE	0x20000
>>>  
>>> +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
>>> +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE
>>> +
>>>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
>>>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
>>>  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
>>> diff --git a/builtin-run.c b/builtin-run.c
>>> index f8dc6c72..df8c6741 100644
>>> --- a/builtin-run.c
>>> +++ b/builtin-run.c
>>> @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
>>>  			"Kernel command line arguments"),		\
>>>  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
>>>  			"Firmware image to boot in virtual machine"),	\
>>> +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
>>> +			"Flash image to present to virtual machine"),	\
>>>  									\
>>>  	OPT_GROUP("Networking options:"),				\
>>>  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
>>> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
>>> new file mode 100644
>>> index 00000000..d7c0e7e8
>>> --- /dev/null
>>> +++ b/hw/cfi_flash.c
>>> @@ -0,0 +1,546 @@
>>> +#include <stdbool.h>
>>> +#include <stdlib.h>
>>> +#include <string.h>
>>> +#include <linux/bitops.h>
>>> +#include <linux/err.h>
>>> +#include <linux/sizes.h>
>>> +#include <linux/types.h>
>>> +
>>> +#include "kvm/kvm.h"
>>> +#include "kvm/kvm-arch.h"
>>> +#include "kvm/devices.h"
>>> +#include "kvm/fdt.h"
>>> +#include "kvm/mutex.h"
>>> +#include "kvm/util.h"
>>> +
>>> +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
>>> +#define CFI_NR_FLASH_CHIPS			2
>>> +
>>> +/* We always emulate a 32 bit bus width. */
>>> +#define CFI_BUS_WIDTH				4
>>> +
>>> +/* The *effective* size of an erase block (over all chips) */
>>> +#define FLASH_BLOCK_SIZE			SZ_64K
>>> +
>>> +#define PROGRAM_BUFF_SIZE_BITS			7
>>> +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)
>>> +
>>> +/* CFI commands */
>>> +#define CFI_CMD_LOCK_BLOCK			0x01
>>> +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
>>> +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
>>> +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
>>> +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
>>> +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
>>> +#define CFI_CMD_READ_STATUS_REGISTER		0x70
>>> +#define CFI_CMD_READ_JEDEC			0x90
>>> +#define CFI_CMD_READ_CFI_QUERY			0x98
>>> +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
>>> +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
>>> +#define CFI_CMD_UNLOCK_BLOCK			0xd0
>>> +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
>>> +#define CFI_CMD_READ_ARRAY			0xff
>>> +
>>> +/*
>>> + * CFI query table contents, as far as it is constant.
>>> + */
>>> +#define CFI_GEOM_OFFSET				0x27
>>> +static u8 cfi_query_table[] = {
>>> +		/* offset 0x10: CFI query identification string */
>>> +	'Q', 'R', 'Y',		/* ID string */
>>> +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
>>> +	0x31, 0x00,		/* address of primary extended query table */
>>> +	0x00, 0x00,		/* alternative command set: unused */
>>> +	0x00, 0x00,		/* address of alternative extended query table*/
>>> +		/* offset 0x1b: system interface information */
>>> +	0x45,			/* minimum Vcc voltage: 4.5V */
>>> +	0x55,			/* maximum Vcc voltage: 5.5V */
>>> +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
>>> +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
>>> +	0x01,			/* timeout for single word program: 2 us */
>>> +	0x01,			/* timeout for multi-byte program: 2 us */
>>> +	0x01,			/* timeout for block erase: 2 ms */
>>> +	0x00,			/* timeout for full chip erase: not supported */
>>> +	0x00,			/* max timeout for single word program: 1x */
>>> +	0x00,			/* max timeout for mulit-byte program: 1x */
>>> +	0x00,			/* max timeout for block erase: 1x */
>>> +	0x00,			/* max timeout for chip erase: not supported */
>>> +		/* offset 0x27: flash geometry information */
>>> +	0x00,			/* size in power-of-2 bytes, filled later */
>>> +	0x06, 0x00,		/* interface description: 32 and 16 bits */  
>> I don't think this is correct. From Intel StrataFlash Embedded Memory (P30)
>> Family, table 34:
>>
>> ""n" such that n+1 specifies the bit field that represents the flash device width
>> capabilities as described in the table".
> Yeah, seems to be correct, but it looks this Intel Strata document is the only place which details this encoding (which looks like being retrofit somehow).
> And I didn't really use this document, because it's a manufacturer data sheet and not a specification.

The device is in the list of specification you provided in the commit message. I
think it would make the reviewers' life a lot easier if you posted all the
documentation that you used, and drop documentation that you didn't. Where did you
get the value 0x06 from? That was the only document from where I could infer what
it means, maybe I didn't dig deep enough.

> I will change it to 0x5, but for the records Linux worked even with 0x6 for me.

I would say that in this case working != correct, because Linux 5.6-rc2 defines
0x05 as a x32/x16 interface, and 0x06 is undefined in the file that I mentioned in
the previous reply. Did you check to see if the Linux driver recognized that
interface type? Maybe it changed between versions. I also tried 0x00,0x00 for the
interface description and Linux also worked.

Either way, I followed the trail of breadcrumbs starting at the comment for the
define and I found this in Common Flash Memory Interface Specification release
2.0, Appendix:

"Note: April 2000 -x16/x32 devices will be represented by hex value 0005h as
requested by Intel in order to make them more software friendly. Changes will be
made to the CFI drivers so that a bit-wise switch is created to represent
different data widths. [..] For example, if we take the description for an x16/x32
device (0005h) and we convert that to binary we get 0101b. If we add one to this
value we get a bit pattern that looks like this: 0110b. This bit-wise switch
indicates that the device a x16/x32 device".

I guess the reason for the inconsistencies is that at some point it used to be
different, but it was changed because Intel requested it.

>> If you want to advertise 32 and 16 bit write capabilities, it should be 5 because
>> 5+1=6. This is also the value that the Linux kernel checks for (see
>> include/linux/mtd/cfi.h, define CFI_INTERFACE_X16_BY_X32_ASYNC"). 6 actually means
>> 32, 16 and 8 bit accesses.
>>
>> This begs another question: why do we support both 16 and 32 bit accesses instead
>> of supporting only 32 bit?
> Because we can, there is no reason to restrict this. I feel like we should be as capable as possible, especially since it's trivial to emulate.

Makes sense.

>
>>> +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>>> +				/* number of multi-byte writes */  
>> Shouldn't the comment be maximum number of bytes in the write buffer?
> Yes, possibly.
>
>>> +	0x01,			/* one erase block region */
>>> +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>>> +		/* offset 0x31: Intel primary algorithm extended query table */
>>> +	'P', 'R', 'I',
>>> +	'1', '0',		/* version 1.0 */
>>> +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
>>> +	0x00,			/* no functions after suspend */
>>> +	0x01, 0x00,		/* only lock bit supported */
>>> +	0x50,			/* best Vcc value: 5.0V */
>>> +	0x00,			/* best Vpp value: 0.0V (unused) */
>>> +	0x01,			/* number of protection register fields */
>>> +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
>>> +};  
>> As an aside, I found it impossible to review the cfi_query_table array in its
>> current form. This is how I wrote the array so I could read it. I also took the
>> liberty to remove the offset when indexing the array, making read_cfi less error
>> prone, in my opinion:
> Please don't post elaborate code sequences as a comment, especially not if it gets mangled (Thunderbird is annoyingly bad in this respect).

I use Thunderbird and it showed fine for me, and I have sent large diffs before in
replies and I got no complaints. I use the settings from
Documentation/process/email-clients.rst.

Thanks,
Alex
> I think I would have got what you mean by showing just one line ;-)
>
> Cheers,
> Andre
>
>> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
>> index d7c0e7e80d69..65a90e288be8 100644
>> --- a/hw/cfi_flash.c
>> +++ b/hw/cfi_flash.c
>> @@ -46,45 +46,43 @@
>>   */
>>  #define CFI_GEOM_OFFSET                                0x27
>>  static u8 cfi_query_table[] = {
>> -               /* offset 0x10: CFI query identification string */
>> -       'Q', 'R', 'Y',          /* ID string */
>> -       0x01, 0x00,             /* primary command set: Intel/Sharp extended */
>> -       0x31, 0x00,             /* address of primary extended query table */
>> -       0x00, 0x00,             /* alternative command set: unused */
>> -       0x00, 0x00,             /* address of alternative extended query table*/
>> -               /* offset 0x1b: system interface information */
>> -       0x45,                   /* minimum Vcc voltage: 4.5V */
>> -       0x55,                   /* maximum Vcc voltage: 5.5V */
>> -       0x00,                   /* minimum Vpp voltage: 0.0V (unused) */
>> -       0x00,                   /* maximum Vpp voltage: 0.0V *(unused) */
>> -       0x01,                   /* timeout for single word program: 2 us */
>> -       0x01,                   /* timeout for multi-byte program: 2 us */
>> -       0x01,                   /* timeout for block erase: 2 ms */
>> -       0x00,                   /* timeout for full chip erase: not supported */
>> -       0x00,                   /* max timeout for single word program: 1x */
>> -       0x00,                   /* max timeout for mulit-byte program: 1x */
>> -       0x00,                   /* max timeout for block erase: 1x */
>> -       0x00,                   /* max timeout for chip erase: not supported */
>> -               /* offset 0x27: flash geometry information */
>> -       0x00,                   /* size in power-of-2 bytes, filled later */
>> -       0x06, 0x00,             /* interface description: 32 and 16 bits */
>> -       PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
>> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
>> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
>> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
>> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
>> +       /* System interface information */
>> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
>> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
>> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
>> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
>> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
>> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
>> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
>> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
>> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
>> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
>> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
>> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
>> +       /* Flash geometry information */
>> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
>> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
>> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>>                                 /* number of multi-byte writes */
>> -       0x01,                   /* one erase block region */
>> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>> -               /* offset 0x31: Intel primary algorithm extended query table */
>> -       'P', 'R', 'I',
>> -       '1', '0',               /* version 1.0 */
>> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
>> -       0x00,                   /* no functions after suspend */
>> -       0x01, 0x00,             /* only lock bit supported */
>> -       0x50,                   /* best Vcc value: 5.0V */
>> -       0x00,                   /* best Vpp value: 0.0V (unused) */
>> -       0x01,                   /* number of protection register fields */
>> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
>> +       [0x2c] = 0x01,          /* one erase block region */
>> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>> +       /* Intel primary algorithm extended query table */
>> +       [0x31] = 'P', 'R', 'I', /* ID string */
>> +       [0x34] = '1', '0',      /* version 1.0 */
>> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
>> pm-read */
>> +       [0x40] = 0x00,          /* no functions after suspend */
>> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
>> ...skipping...
>> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
>> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
>> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
>> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
>> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
>> +       /* System interface information */
>> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
>> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
>> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
>> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
>> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
>> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
>> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
>> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
>> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
>> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
>> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
>> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
>> +       /* Flash geometry information */
>> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
>> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
>> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
>>                                 /* number of multi-byte writes */
>> -       0x01,                   /* one erase block region */
>> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>> -               /* offset 0x31: Intel primary algorithm extended query table */
>> -       'P', 'R', 'I',
>> -       '1', '0',               /* version 1.0 */
>> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
>> -       0x00,                   /* no functions after suspend */
>> -       0x01, 0x00,             /* only lock bit supported */
>> -       0x50,                   /* best Vcc value: 5.0V */
>> -       0x00,                   /* best Vpp value: 0.0V (unused) */
>> -       0x01,                   /* number of protection register fields */
>> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
>> +       [0x2c] = 0x01,          /* one erase block region */
>> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
>> +       /* Intel primary algorithm extended query table */
>> +       [0x31] = 'P', 'R', 'I', /* ID string */
>> +       [0x34] = '1', '0',      /* version 1.0 */
>> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
>> pm-read */
>> +       [0x40] = 0x00,          /* no functions after suspend */
>> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
>> +       [0x43] = 0x50,          /* best Vcc value: 5.0V */
>> +       [0x43] = 0x00,          /* best Vpp value: 0.0V (unused) */
>> +       [0x44] = 0x01,          /* number of protection register fields */
>> +       [0x45] = 0x00, 0x00, 0x00, 0x00,/* protection field 1 description */
>>  };
>>  
>> -
>>  /*
>>   * Those states represent a subset of the CFI flash state machine.
>>   */
>> @@ -141,10 +139,7 @@ static int nr_erase_blocks(struct cfi_flash_device *sfdev)
>>   */
>>  static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>>  {
>> -       if (addr < 0x10)                /* CFI information starts at 0x10 */
>> -               return 0;
>> -
>> -       if (addr - 0x10 > sizeof(cfi_query_table)) {
>> +       if (addr > sizeof(cfi_query_table)) {
>>                 pr_debug("CFI query read access beyond the end of table");
>>                 return 0;
>>         }
>> @@ -163,7 +158,7 @@ static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>>                 return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
>>         }
>>  
>> -       return cfi_query_table[addr - 0x10];
>> +       return cfi_query_table[addr];
>>  }
>>  
>>  static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
>>
>> Thanks,
>> Alex
>>> +
>>> +
>>> +/*
>>> + * Those states represent a subset of the CFI flash state machine.
>>> + */
>>> +enum cfi_flash_state {
>>> +	READY,
>>> +	LOCK_SETUP,
>>> +	WP_SETUP,
>>> +	BP_SETUP,
>>> +	BP_LOAD,
>>> +	ERASE_SETUP,
>>> +};
>>> +
>>> +/*
>>> + * The device can be in several **Read** modes.
>>> + * We don't implement the asynchronous burst mode.
>>> + */
>>> +enum cfi_read_mode {
>>> +	READ_ARRAY,
>>> +	READ_STATUS,
>>> +	READ_DEVICE_ID,
>>> +	READ_QUERY,
>>> +};
>>> +
>>> +struct cfi_flash_device {
>>> +	struct device_header	dev_hdr;
>>> +	/* Protects the CFI state machine variables in this data structure. */
>>> +	struct mutex		mutex;
>>> +	u64			base_addr;
>>> +	u32			size;
>>> +
>>> +	void			*flash_memory;
>>> +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];
>>> +	unsigned long		*lock_bm;
>>> +	u64			last_address;
>>> +	unsigned int		buff_written;
>>> +	unsigned int		program_length;
>>> +
>>> +	enum cfi_flash_state	state;
>>> +	enum cfi_read_mode	read_mode;
>>> +	u16			rcr;
>>> +	u8			sr;
>>> +};
>>> +
>>> +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
>>> +{
>>> +	return sfdev->size / FLASH_BLOCK_SIZE;
>>> +}
>>> +
>>> +/*
>>> + * CFI queries always deal with one byte of information, possibly mirrored
>>> + * to other bytes on the bus. This is dealt with in the callers.
>>> + * The address provided is the one for 8-bit addressing, and would need to
>>> + * be adjusted for wider accesses.
>>> + */
>>> +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	if (addr < 0x10)		/* CFI information starts at 0x10 */
>>> +		return 0;
>>> +
>>> +	if (addr - 0x10 > sizeof(cfi_query_table)) {
>>> +		pr_debug("CFI query read access beyond the end of table");
>>> +		return 0;
>>> +	}
>>> +
>>> +	/* Fixup dynamic information in the geometry part of the table. */
>>> +	switch (addr) {
>>> +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
>>> +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
>>> +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
>>> +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
>>> +	case CFI_GEOM_OFFSET + 7:
>>> +		return (nr_erase_blocks(sfdev) - 1) >> 8;
>>> +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
>>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
>>> +	case CFI_GEOM_OFFSET + 9:
>>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
>>> +	}
>>> +
>>> +	return cfi_query_table[addr - 0x10];
>>> +}
>>> +
>>> +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
>>> +
>>> +	return test_bit(block_nr, sfdev->lock_bm);
>>> +}
>>> +
>>> +#define DEV_ID_MASK 0x7ff
>>> +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
>>> +	case 0x0:				/* vendor ID */
>>> +		return 0x0000;
>>> +	case 0x1:				/* device ID */
>>> +		return 0xffff;
>>> +	case 0x2:
>>> +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
>>> +	case 0x5:
>>> +		return sfdev->rcr;
>>> +	default:			/* Ignore the other entries. */
>>> +		return 0;
>>> +	}
>>> +}
>>> +
>>> +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
>>> +{
>>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
>>> +
>>> +	if (lock)
>>> +		set_bit(block_nr, sfdev->lock_bm);
>>> +	else
>>> +		clear_bit(block_nr, sfdev->lock_bm);
>>> +}
>>> +
>>> +static void word_program(struct cfi_flash_device *sfdev,
>>> +			 u64 addr, void *data, int len)
>>> +{
>>> +	if (block_is_locked(sfdev, addr)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +
>>> +	memcpy(sfdev->flash_memory + addr, data, len);
>>> +}
>>> +
>>> +/* Reset the program buffer state to prepare for follow-up writes. */
>>> +static void buffer_setup(struct cfi_flash_device *sfdev)
>>> +{
>>> +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
>>> +	sfdev->last_address = ~0ULL;
>>> +	sfdev->buff_written = 0;
>>> +}
>>> +
>>> +static bool buffer_program(struct cfi_flash_device *sfdev,
>>> +			   u64 addr, void *buffer, int len)
>>> +{
>>> +	unsigned int buf_addr;
>>> +
>>> +	if (sfdev->buff_written >= sfdev->program_length)
>>> +		return false;
>>> +
>>> +	/*
>>> +	 * The first word written into the buffer after the setup command
>>> +	 * happens to be the base address for the buffer.
>>> +	 * All subsequent writes need to be within this address and this
>>> +	 * address plus the buffer size, so keep this value around.
>>> +	 */
>>> +	if (sfdev->last_address == ~0ULL)
>>> +		sfdev->last_address = addr;
>>> +
>>> +	if (addr < sfdev->last_address)
>>> +		return false;
>>> +	buf_addr = addr - sfdev->last_address;
>>> +	if (buf_addr >= PROGRAM_BUFF_SIZE)
>>> +		return false;
>>> +
>>> +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
>>> +	sfdev->buff_written++;
>>> +
>>> +	return true;
>>> +}
>>> +
>>> +static void buffer_confirm(struct cfi_flash_device *sfdev)
>>> +{
>>> +	if (block_is_locked(sfdev, sfdev->last_address)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +	memcpy(sfdev->flash_memory + sfdev->last_address,
>>> +	       sfdev->program_buffer,
>>> +	       sfdev->buff_written * sizeof(u32));
>>> +}
>>> +
>>> +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
>>> +{
>>> +	if (block_is_locked(sfdev, addr)) {
>>> +		sfdev->sr |= 0x12;
>>> +		return;
>>> +	}
>>> +
>>> +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
>>> +}
>>> +
>>> +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
>>> +			   u64 addr, u8 *data, u32 len, u8 is_write,
>>> +			   void *context)
>>> +{
>>> +	struct cfi_flash_device *sfdev = context;
>>> +	u64 faddr = addr - sfdev->base_addr;
>>> +	u32 value;
>>> +
>>> +	if (!is_write) {
>>> +		u16 cfi_value = 0;
>>> +
>>> +		mutex_lock(&sfdev->mutex);
>>> +
>>> +		switch (sfdev->read_mode) {
>>> +		case READ_ARRAY:
>>> +			/* just copy the requested bytes from the array */
>>> +			memcpy(data, sfdev->flash_memory + faddr, len);
>>> +			goto out_unlock;
>>> +		case READ_STATUS:
>>> +			cfi_value = sfdev->sr;
>>> +			break;
>>> +		case READ_DEVICE_ID:
>>> +			cfi_value = read_dev_id(sfdev, faddr);
>>> +			break;
>>> +		case READ_QUERY:
>>> +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
>>> +			break;
>>> +		}
>>> +		switch (len) {
>>> +		case 1:
>>> +			*data = cfi_value;
>>> +			break;
>>> +		case 8: memset(data + 4, 0, 4);
>>> +			/* fall-through */
>>> +		case 4:
>>> +			if (CFI_NR_FLASH_CHIPS == 2)
>>> +				memcpy(data + 2, &cfi_value, 2);
>>> +			else
>>> +				memset(data + 2, 0, 2);
>>> +			/* fall-through */
>>> +		case 2:
>>> +			memcpy(data, &cfi_value, 2);
>>> +			break;
>>> +		default:
>>> +			pr_debug("CFI flash: illegal access length %d for read mode %d",
>>> +				 len, sfdev->read_mode);
>>> +			break;
>>> +		}
>>> +
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	if (len > 4) {
>>> +		pr_info("CFI flash: MMIO %d-bit write access not supported",
>>> +			 len * 8);
>>> +		return;
>>> +	}
>>> +
>>> +	memcpy(&value, data, len);
>>> +
>>> +	mutex_lock(&sfdev->mutex);
>>> +
>>> +	switch (sfdev->state) {
>>> +	case READY:			/* handled below */
>>> +		break;
>>> +
>>> +	case LOCK_SETUP:
>>> +		switch (value & 0xff) {
>>> +		case CFI_CMD_LOCK_BLOCK:
>>> +			lock_block(sfdev, faddr, true);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +			break;
>>> +		case CFI_CMD_UNLOCK_BLOCK:
>>> +			lock_block(sfdev, faddr, false);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +			break;
>>> +		default:
>>> +			sfdev->sr |= 0x30;
>>> +			break;
>>> +		}
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case WP_SETUP:
>>> +		word_program(sfdev, faddr, data, len);
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case BP_LOAD:
>>> +		if (buffer_program(sfdev, faddr, data, len))
>>> +			goto out_unlock;
>>> +
>>> +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
>>> +			buffer_confirm(sfdev);
>>> +			sfdev->read_mode = READ_STATUS;
>>> +		} else {
>>> +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
>>> +				 value, faddr);
>>> +			sfdev->sr |= 0x10;
>>> +		}
>>> +		sfdev->state = READY;
>>> +		goto out_unlock;
>>> +
>>> +	case BP_SETUP:
>>> +		sfdev->program_length = (value & 0xffff) + 1;
>>> +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
>>> +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
>>> +		sfdev->state = BP_LOAD;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		goto out_unlock;
>>> +
>>> +	case ERASE_SETUP:
>>> +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
>>> +			block_erase_confirm(sfdev, faddr);
>>> +		else
>>> +			sfdev->sr |= 0x30;
>>> +
>>> +		sfdev->state = READY;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	/* write commands in READY state */
>>> +	switch (value & 0xFF) {
>>> +	case CFI_CMD_READ_JEDEC:
>>> +		sfdev->read_mode = READ_DEVICE_ID;
>>> +		break;
>>> +	case CFI_CMD_READ_STATUS_REGISTER:
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_READ_CFI_QUERY:
>>> +		sfdev->read_mode = READ_QUERY;
>>> +		break;
>>> +	case CFI_CMD_CLEAR_STATUS_REGISTER:
>>> +		sfdev->sr = 0x80;
>>> +		break;
>>> +	case CFI_CMD_WORD_PROGRAM_SETUP:
>>> +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
>>> +		sfdev->state = WP_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_LOCK_BLOCK_SETUP:
>>> +		sfdev->state = LOCK_SETUP;
>>> +		break;
>>> +	case CFI_CMD_BLOCK_ERASE_SETUP:
>>> +		sfdev->state = ERASE_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
>>> +		buffer_setup(sfdev);
>>> +		sfdev->state = BP_SETUP;
>>> +		sfdev->read_mode = READ_STATUS;
>>> +		break;
>>> +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
>>> +		pr_debug("CFI flash: unexpected confirm command 0xD0");
>>> +		break;
>>> +	default:
>>> +		pr_debug("CFI flash: unknown command 0x%x", value);
>>> +		/* fall through */
>>> +	case CFI_CMD_READ_ARRAY:
>>> +		sfdev->read_mode = READ_ARRAY;
>>> +		break;
>>> +	}
>>> +
>>> +out_unlock:
>>> +	mutex_unlock(&sfdev->mutex);
>>> +}
>>> +
>>> +#ifdef CONFIG_HAS_LIBFDT
>>> +static void generate_cfi_flash_fdt_node(void *fdt,
>>> +					struct device_header *dev_hdr,
>>> +					void (*generate_irq_prop)(void *fdt,
>>> +								  u8 irq,
>>> +								enum irq_type))
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +	u64 reg_prop[2];
>>> +
>>> +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
>>> +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
>>> +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
>>> +
>>> +	_FDT(fdt_begin_node(fdt, "flash"));
>>> +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
>>> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
>>> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
>>> +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
>>> +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
>>> +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
>>> +	_FDT(fdt_end_node(fdt));
>>> +}
>>> +#else
>>> +#define generate_cfi_flash_fdt_node NULL
>>> +#endif
>>> +
>>> +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
>>> +							 const char *filename)
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +	struct stat statbuf;
>>> +	unsigned int value;
>>> +	int ret;
>>> +	int fd;
>>> +
>>> +	fd = open(filename, O_RDWR);
>>> +	if (fd < 0)
>>> +		return ERR_PTR(-errno);
>>> +	if (fstat(fd, &statbuf) < 0) {
>>> +		close(fd);
>>> +		return ERR_PTR(-errno);
>>> +	}
>>> +
>>> +	sfdev = malloc(sizeof(struct cfi_flash_device));
>>> +	if (!sfdev) {
>>> +		close(fd);
>>> +		return ERR_PTR(-ENOMEM);
>>> +	}
>>> +
>>> +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
>>> +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
>>> +				   PROT_READ | PROT_WRITE, MAP_SHARED,
>>> +				   fd, 0);
>>> +	if (sfdev->flash_memory == MAP_FAILED) {
>>> +		close(fd);
>>> +		free(sfdev);
>>> +		return ERR_PTR(-errno);
>>> +	}
>>> +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
>>> +	sfdev->state = READY;
>>> +	sfdev->read_mode = READ_ARRAY;
>>> +	sfdev->sr = 0x80;
>>> +	sfdev->rcr = 0xbfcf;
>>> +
>>> +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
>>> +	sfdev->lock_bm = malloc(value);
>>> +	memset(sfdev->lock_bm, 0, value);
>>> +
>>> +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
>>> +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
>>> +	mutex_init(&sfdev->mutex);
>>> +	ret = device__register(&sfdev->dev_hdr);
>>> +	if (ret) {
>>> +		free(sfdev->flash_memory);
>>> +		free(sfdev);
>>> +		return ERR_PTR(ret);
>>> +	}
>>> +
>>> +	ret = kvm__register_mmio(kvm,
>>> +				 sfdev->base_addr, sfdev->size,
>>> +				 false, cfi_flash_mmio, sfdev);
>>> +	if (ret) {
>>> +		device__unregister(&sfdev->dev_hdr);
>>> +		free(sfdev->flash_memory);
>>> +		free(sfdev);
>>> +		return ERR_PTR(ret);
>>> +	}
>>> +
>>> +	return sfdev;
>>> +}
>>> +
>>> +static int flash__init(struct kvm *kvm)
>>> +{
>>> +	struct cfi_flash_device *sfdev;
>>> +
>>> +	if (!kvm->cfg.flash_filename)
>>> +		return 0;
>>> +
>>> +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
>>> +	if (IS_ERR(sfdev))
>>> +		return PTR_ERR(sfdev);
>>> +
>>> +	return 0;
>>> +}
>>> +dev_init(flash__init);
>>> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
>>> index a052b0bc..f4a8b831 100644
>>> --- a/include/kvm/kvm-config.h
>>> +++ b/include/kvm/kvm-config.h
>>> @@ -35,6 +35,7 @@ struct kvm_config {
>>>  	const char *vmlinux_filename;
>>>  	const char *initrd_filename;
>>>  	const char *firmware_filename;
>>> +	const char *flash_filename;
>>>  	const char *console;
>>>  	const char *dev;
>>>  	const char *network;
>>> diff --git a/include/kvm/util.h b/include/kvm/util.h
>>> index 4ca7aa93..5c37f0b7 100644
>>> --- a/include/kvm/util.h
>>> +++ b/include/kvm/util.h
>>> @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
>>>  	return x ? 1UL << fls_long(x - 1) : 0;
>>>  }
>>>  
>>> +static inline int pow2_size(unsigned long x)
>>> +{
>>> +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
>>> +}
>>> +
>>>  struct kvm;
>>>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
>>>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);  
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH kvmtool v2] Add emulation for CFI compatible flash memory
  2020-02-20 10:24     ` Alexandru Elisei
@ 2020-02-20 18:00       ` Andre Przywara
  0 siblings, 0 replies; 8+ messages in thread
From: Andre Przywara @ 2020-02-20 18:00 UTC (permalink / raw)
  To: Alexandru Elisei
  Cc: Raphael Gault, Sami Mujawar, Will Deacon, kvmarm, linux-arm-kernel

On Thu, 20 Feb 2020 10:24:13 +0000
Alexandru Elisei <alexandru.elisei@arm.com> wrote:

Hi,

> On 2/19/20 5:26 PM, Andre Przywara wrote:
> > On Mon, 17 Feb 2020 17:20:43 +0000
> > Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> >
> > Hi,
> >  
> >> I guess the device hasn't been tested with Linux. This is what I'm getting when
> >> trying to boot a Linux guest using the command:  
> > It was actually developed with a Linux guest, because that's more verbatim and easier to debug.
> >
> > And I just tested this again with Linux and it worked for me:  
> 
> The flash image you provided is 2 MB. The flash image that I used is 10 MB (it
> shows in the log that I sent). I guess you ran a different test.

What I stated below ... I guess there is some miscommunication here: I tested with Linux, just not with odd flash sizes. Which I agree should be either handled correctly or denied by kvmtool.

> 
> > [    2.164992] physmap-flash 20000.flash: physmap platform flash device: [mem 0x00020000-0x0021ffff]
> > [    2.166539] 20000.flash: Found 2 x16 devices at 0x0 in 32-bit bank. Manufacturer ID 0x000000 Chip ID 0x00ffff
> > ...
> > # mtd_debug info /dev/mtd0
> > mtd.type = MTD_NORFLASH
> > mtd.flags = MTD_CAP_NORFLASH
> > mtd.size = 2097152 (2M)
> > mtd.erasesize = 65536 (64K)
> > mtd.writesize = 1 
> > mtd.oobsize = 0 
> > regions = 1
> >
> > I think what you are seeing are problems when you give a non-power-of-2 sized flash image. The current patch does not really support this (since it's hardly a thing in the real world). I originally wanted to expand any "uneven" size to the next power-of-2, but this doesn't work easily with mmap.  
> 
> I would expect that if kvmtool allows the user to specify a non-power-of-2 flash
> image size, then it should know how to deal with it and not present a broken
> device to a linux guest if that size is forbidden by the spec. Or it is allowed by
> the specification and kvmtool doesn't know how to deal with it?

I don't know, I would guess physical flash has always a power-of-2 size, at least on a per-chip base. So the "spec" doesn't even consider the other case.

> Instead of expanding the file provided by the user to fit a bigger flash, how
> about you use the highest power of two size that is smaller than the flash size?

The original idea was to avoid cutting off the flash file, but this doesn't really work easily, or at least is not worth the effort.
So I was suggesting the trimming you mentioned in the next sentence ;-)
                                                      vvvvvvvvvvvvv

> > So I now changed the code to downgrade, so you get 8MB with any file ranging from [8MB, 16MB(, for instance.
> > That fixed the Linux problems with those files for me.
> >  
> >> $ ./lkvm run -c4 -m4096 -k /path/to/kernel -d /path/to/disk -p root="/dev/vda2" -F
> >> flash.img
> >>
> >> [    0.659167] physmap-flash 2000000.flash: physmap platform flash device: [mem
> >> 0x02000000-0x029fffff]
> >> [    0.660444] Number of erase regions: 1
> >> [    0.661036] Primary Vendor Command Set: 0001 (Intel/Sharp Extended)
> >> [    0.661688] Primary Algorithm Table at 0031
> >> [    0.662168] Alternative Vendor Command Set: 0000 (None)
> >> [    0.662711] No Alternate Algorithm Table
> >> [    0.663120] Vcc Minimum:  4.5 V
> >> [    0.663450] Vcc Maximum:  5.5 V
> >> [    0.663779] No Vpp line
> >> [    0.664039] Typical byte/word write timeout: 2 µs
> >> [    0.664590] Maximum byte/word write timeout: 2 µs
> >> [    0.665240] Typical full buffer write timeout: 2 µs
> >> [    0.665775] Maximum full buffer write timeout: 2 µs
> >> [    0.666373] Typical block erase timeout: 2 ms
> >> [    0.666828] Maximum block erase timeout: 2 ms
> >> [    0.667282] Chip erase not supported
> >> [    0.667659] Device size: 0x800000 bytes (8 MiB)
> >> [    0.668137] Flash Device Interface description: 0x0006
> >> [    0.668697]   - Unknown
> >> [    0.668963] Max. bytes in buffer write: 0x40
> >> [    0.669407] Number of Erase Block Regions: 1
> >> [    0.669865]   Erase Region #0: BlockSize 0x8000 bytes, 160 blocks
> >> [    0.672299] 2000000.flash: Found 2 x16 devices at 0x0 in 32-bit bank.
> >> Manufacturer ID 0x000000 Chip ID 0x00ffff
> >> [    0.681328] NOR chip too large to fit in mapping. Attempting to cope...
> >> [    0.682046] Intel/Sharp Extended Query Table at 0x0031
> >> [    0.682645] Using buffer write method
> >> [    0.683031] Sum of regions (a00000) != total size of set of interleaved chips
> >> (1000000)
> >> [    0.683854] gen_probe: No supported Vendor Command Set found
> >> [    0.684441] physmap-flash 2000000.flash: map_probe failed
> >>
> >> I also defined DEBUG_CFI in drivers/mtd/chips/cfi_probe.c.
> >>
> >> The Flash Device Interface description that we provide is wrong, it should 0x05.
> >> More details below.
> >>
> >> On 2/7/20 12:19 PM, Andre Przywara wrote:  
> >>> From: Raphael Gault <raphael.gault@arm.com>
> >>>
> >>> The EDK II UEFI firmware implementation requires some storage for the EFI
> >>> variables, which is typically some flash storage.
> >>> Since this is already supported on the EDK II side, we add a CFI flash
> >>> emulation to kvmtool.
> >>> This is backed by a file, specified via the --flash or -F command line
> >>> option. Any flash writes done by the guest will immediately be reflected
> >>> into this file (kvmtool mmap's the file).
> >>>
> >>> This implements a CFI flash using the "Intel/Sharp extended command
> >>> set", as specified in:
> >>> - JEDEC JESD68.01
> >>> - JEDEC JEP137B
> >>> - Intel Application Note 646
> >>> Some gaps in those specs have been filled by looking at real devices and
> >>> other implementations (QEMU, Linux kernel driver).
> >>>
> >>> At the moment this relies on DT to advertise the base address of the
> >>> flash memory (mapped into the MMIO address space) and is only enabled
> >>> for ARM/ARM64. The emulation itself is architecture agnostic, though.
> >>>
> >>> This is one missing piece toward a working UEFI boot with kvmtool on
> >>> ARM guests, the other is to provide writable PCI BARs, which is WIP.
> >>>
> >>> Signed-off-by: Raphael Gault <raphael.gault@arm.com>
> >>> [Andre: rewriting and fixing]
> >>> Signed-off-by: Andre Przywra <andre.przywara@arm.com>
> >>> ---
> >>> Hi,
> >>>
> >>> an update addressing Will's comments. I added coarse grained locking
> >>> to the MMIO handler, to prevent concurrent vCPU accesses from messing up
> >>> the internal CFI flash state machine.
> >>> I also folded the actual flash array read access into the MMIO handler
> >>> and fixed the other small issues.
> >>>
> >>> Cheers,
> >>> Andre
> >>>
> >>>  Makefile                          |   6 +
> >>>  arm/include/arm-common/kvm-arch.h |   3 +
> >>>  builtin-run.c                     |   2 +
> >>>  hw/cfi_flash.c                    | 546 ++++++++++++++++++++++++++++++
> >>>  include/kvm/kvm-config.h          |   1 +
> >>>  include/kvm/util.h                |   5 +
> >>>  6 files changed, 563 insertions(+)
> >>>  create mode 100644 hw/cfi_flash.c
> >>>
> >>> diff --git a/Makefile b/Makefile
> >>> index 3862112c..7ed6fb5e 100644
> >>> --- a/Makefile
> >>> +++ b/Makefile
> >>> @@ -170,6 +170,7 @@ ifeq ($(ARCH), arm)
> >>>  	CFLAGS		+= -march=armv7-a
> >>>  
> >>>  	ARCH_WANT_LIBFDT := y
> >>> +	ARCH_HAS_FLASH_MEM := y
> >>>  endif
> >>>  
> >>>  # ARM64
> >>> @@ -182,6 +183,7 @@ ifeq ($(ARCH), arm64)
> >>>  	ARCH_INCLUDE	+= -Iarm/aarch64/include
> >>>  
> >>>  	ARCH_WANT_LIBFDT := y
> >>> +	ARCH_HAS_FLASH_MEM := y
> >>>  endif
> >>>  
> >>>  ifeq ($(ARCH),mips)
> >>> @@ -261,6 +263,10 @@ ifeq (y,$(ARCH_HAS_FRAMEBUFFER))
> >>>  	endif
> >>>  endif
> >>>  
> >>> +ifeq (y,$(ARCH_HAS_FLASH_MEM))
> >>> +	OBJS	+= hw/cfi_flash.o
> >>> +endif
> >>> +
> >>>  ifeq ($(call try-build,$(SOURCE_ZLIB),$(CFLAGS),$(LDFLAGS) -lz),y)
> >>>  	CFLAGS_DYNOPT	+= -DCONFIG_HAS_ZLIB
> >>>  	LIBS_DYNOPT	+= -lz
> >>> diff --git a/arm/include/arm-common/kvm-arch.h b/arm/include/arm-common/kvm-arch.h
> >>> index b9d486d5..2bb085f4 100644
> >>> --- a/arm/include/arm-common/kvm-arch.h
> >>> +++ b/arm/include/arm-common/kvm-arch.h
> >>> @@ -21,6 +21,9 @@
> >>>  #define ARM_GIC_DIST_SIZE	0x10000
> >>>  #define ARM_GIC_CPUI_SIZE	0x20000
> >>>  
> >>> +#define ARM_FLASH_MMIO_BASE	0x2000000		/* 32 MB */
> >>> +#define KVM_FLASH_MMIO_BASE	ARM_FLASH_MMIO_BASE
> >>> +
> >>>  #define ARM_IOPORT_SIZE		(ARM_MMIO_AREA - ARM_IOPORT_AREA)
> >>>  #define ARM_VIRTIO_MMIO_SIZE	(ARM_AXI_AREA - (ARM_MMIO_AREA + ARM_GIC_SIZE))
> >>>  #define ARM_PCI_CFG_SIZE	(1ULL << 24)
> >>> diff --git a/builtin-run.c b/builtin-run.c
> >>> index f8dc6c72..df8c6741 100644
> >>> --- a/builtin-run.c
> >>> +++ b/builtin-run.c
> >>> @@ -138,6 +138,8 @@ void kvm_run_set_wrapper_sandbox(void)
> >>>  			"Kernel command line arguments"),		\
> >>>  	OPT_STRING('f', "firmware", &(cfg)->firmware_filename, "firmware",\
> >>>  			"Firmware image to boot in virtual machine"),	\
> >>> +	OPT_STRING('F', "flash", &(cfg)->flash_filename, "flash",\
> >>> +			"Flash image to present to virtual machine"),	\
> >>>  									\
> >>>  	OPT_GROUP("Networking options:"),				\
> >>>  	OPT_CALLBACK_DEFAULT('n', "network", NULL, "network params",	\
> >>> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> >>> new file mode 100644
> >>> index 00000000..d7c0e7e8
> >>> --- /dev/null
> >>> +++ b/hw/cfi_flash.c
> >>> @@ -0,0 +1,546 @@
> >>> +#include <stdbool.h>
> >>> +#include <stdlib.h>
> >>> +#include <string.h>
> >>> +#include <linux/bitops.h>
> >>> +#include <linux/err.h>
> >>> +#include <linux/sizes.h>
> >>> +#include <linux/types.h>
> >>> +
> >>> +#include "kvm/kvm.h"
> >>> +#include "kvm/kvm-arch.h"
> >>> +#include "kvm/devices.h"
> >>> +#include "kvm/fdt.h"
> >>> +#include "kvm/mutex.h"
> >>> +#include "kvm/util.h"
> >>> +
> >>> +/* The EDK2 driver hardcodes two 16-bit chips on a 32-bit bus. */
> >>> +#define CFI_NR_FLASH_CHIPS			2
> >>> +
> >>> +/* We always emulate a 32 bit bus width. */
> >>> +#define CFI_BUS_WIDTH				4
> >>> +
> >>> +/* The *effective* size of an erase block (over all chips) */
> >>> +#define FLASH_BLOCK_SIZE			SZ_64K
> >>> +
> >>> +#define PROGRAM_BUFF_SIZE_BITS			7
> >>> +#define PROGRAM_BUFF_SIZE			(1U << PROGRAM_BUFF_SIZE_BITS)
> >>> +
> >>> +/* CFI commands */
> >>> +#define CFI_CMD_LOCK_BLOCK			0x01
> >>> +#define CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP	0x10
> >>> +#define CFI_CMD_BLOCK_ERASE_SETUP		0x20
> >>> +#define CFI_CMD_WORD_PROGRAM_SETUP		0x40
> >>> +#define CFI_CMD_CLEAR_STATUS_REGISTER		0x50
> >>> +#define CFI_CMD_LOCK_BLOCK_SETUP		0x60
> >>> +#define CFI_CMD_READ_STATUS_REGISTER		0x70
> >>> +#define CFI_CMD_READ_JEDEC			0x90
> >>> +#define CFI_CMD_READ_CFI_QUERY			0x98
> >>> +#define CFI_CMD_BUFFERED_PROGRAM_CONFIRM	0xd0
> >>> +#define CFI_CMD_BLOCK_ERASE_CONFIRM		0xd0
> >>> +#define CFI_CMD_UNLOCK_BLOCK			0xd0
> >>> +#define CFI_CMD_BUFFERED_PROGRAM_SETUP		0xe8
> >>> +#define CFI_CMD_READ_ARRAY			0xff
> >>> +
> >>> +/*
> >>> + * CFI query table contents, as far as it is constant.
> >>> + */
> >>> +#define CFI_GEOM_OFFSET				0x27
> >>> +static u8 cfi_query_table[] = {
> >>> +		/* offset 0x10: CFI query identification string */
> >>> +	'Q', 'R', 'Y',		/* ID string */
> >>> +	0x01, 0x00,		/* primary command set: Intel/Sharp extended */
> >>> +	0x31, 0x00,		/* address of primary extended query table */
> >>> +	0x00, 0x00,		/* alternative command set: unused */
> >>> +	0x00, 0x00,		/* address of alternative extended query table*/
> >>> +		/* offset 0x1b: system interface information */
> >>> +	0x45,			/* minimum Vcc voltage: 4.5V */
> >>> +	0x55,			/* maximum Vcc voltage: 5.5V */
> >>> +	0x00,			/* minimum Vpp voltage: 0.0V (unused) */
> >>> +	0x00,			/* maximum Vpp voltage: 0.0V *(unused) */
> >>> +	0x01,			/* timeout for single word program: 2 us */
> >>> +	0x01,			/* timeout for multi-byte program: 2 us */
> >>> +	0x01,			/* timeout for block erase: 2 ms */
> >>> +	0x00,			/* timeout for full chip erase: not supported */
> >>> +	0x00,			/* max timeout for single word program: 1x */
> >>> +	0x00,			/* max timeout for mulit-byte program: 1x */
> >>> +	0x00,			/* max timeout for block erase: 1x */
> >>> +	0x00,			/* max timeout for chip erase: not supported */
> >>> +		/* offset 0x27: flash geometry information */
> >>> +	0x00,			/* size in power-of-2 bytes, filled later */
> >>> +	0x06, 0x00,		/* interface description: 32 and 16 bits */    
> >> I don't think this is correct. From Intel StrataFlash Embedded Memory (P30)
> >> Family, table 34:
> >>
> >> ""n" such that n+1 specifies the bit field that represents the flash device width
> >> capabilities as described in the table".  
> > Yeah, seems to be correct, but it looks this Intel Strata document is the only place which details this encoding (which looks like being retrofit somehow).
> > And I didn't really use this document, because it's a manufacturer data sheet and not a specification.  
> 
> The device is in the list of specification you provided in the commit message.

Where? I only see JEP137B, JESD68-01 and Intel AN-646 in the list up there.

> I
> think it would make the reviewers' life a lot easier if you posted all the
> documentation that you used, and drop documentation that you didn't. Where did you
> get the value 0x06 from? That was the only document from where I could infer what
> it means, maybe I didn't dig deep enough.
> 
> > I will change it to 0x5, but for the records Linux worked even with 0x6 for me.  
> 
> I would say that in this case working != correct, because Linux 5.6-rc2 defines
> 0x05 as a x32/x16 interface, and 0x06 is undefined in the file that I mentioned in
> the previous reply. Did you check to see if the Linux driver recognized that
> interface type? Maybe it changed between versions. I also tried 0x00,0x00 for the
> interface description and Linux also worked.

I guess because the emulation doesn't really care about the access size (we use memcpy).
 
> Either way, I followed the trail of breadcrumbs starting at the comment for the
> define and I found this in Common Flash Memory Interface Specification release
> 2.0, Appendix:
> 
> "Note: April 2000 -x16/x32 devices will be represented by hex value 0005h as
> requested by Intel in order to make them more software friendly. Changes will be
> made to the CFI drivers so that a bit-wise switch is created to represent
> different data widths. [..] For example, if we take the description for an x16/x32
> device (0005h) and we convert that to binary we get 0101b. If we add one to this
> value we get a bit pattern that looks like this: 0110b. This bit-wise switch
> indicates that the device a x16/x32 device".
> 
> I guess the reason for the inconsistencies is that at some point it used to be
> different, but it was changed because Intel requested it.

Yeah, I found this one as well later.

> >> If you want to advertise 32 and 16 bit write capabilities, it should be 5 because
> >> 5+1=6. This is also the value that the Linux kernel checks for (see
> >> include/linux/mtd/cfi.h, define CFI_INTERFACE_X16_BY_X32_ASYNC"). 6 actually means
> >> 32, 16 and 8 bit accesses.
> >>
> >> This begs another question: why do we support both 16 and 32 bit accesses instead
> >> of supporting only 32 bit?  
> > Because we can, there is no reason to restrict this. I feel like we should be as capable as possible, especially since it's trivial to emulate.  
> 
> Makes sense.
> 
> >  
> >>> +	PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> >>> +				/* number of multi-byte writes */    
> >> Shouldn't the comment be maximum number of bytes in the write buffer?  
> > Yes, possibly.
> >  
> >>> +	0x01,			/* one erase block region */
> >>> +	0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> >>> +		/* offset 0x31: Intel primary algorithm extended query table */
> >>> +	'P', 'R', 'I',
> >>> +	'1', '0',		/* version 1.0 */
> >>> +	0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> >>> +	0x00,			/* no functions after suspend */
> >>> +	0x01, 0x00,		/* only lock bit supported */
> >>> +	0x50,			/* best Vcc value: 5.0V */
> >>> +	0x00,			/* best Vpp value: 0.0V (unused) */
> >>> +	0x01,			/* number of protection register fields */
> >>> +	0x00, 0x00, 0x00, 0x00,	/* protection field 1 description */
> >>> +};    
> >> As an aside, I found it impossible to review the cfi_query_table array in its
> >> current form. This is how I wrote the array so I could read it. I also took the
> >> liberty to remove the offset when indexing the array, making read_cfi less error
> >> prone, in my opinion:  
> > Please don't post elaborate code sequences as a comment, especially not if it gets mangled (Thunderbird is annoyingly bad in this respect).  
> 
> I use Thunderbird and it showed fine for me, and I have sent large diffs before in
> replies and I got no complaints. I use the settings from
> Documentation/process/email-clients.rst.

Maybe it was a problem because (I guess) you copy&pasted the diff in?
Because I see a "...skipping..." line in there, bogus line breaks and all tabs were converted into spaces. Or there was some cocky mail server in the queue ;-)

Eventually I gave up with sending diffs other than for demonstration purposes through the Thunderbird editor because of various problems.
As a workaround you could try to save the email, fix it up with a proper editor (or insert the patch there), then send it via git send-email.
Or avoid sending patches this way at all ;-)


I will send v3 tomorrow morning after double checking that I didn't miss a comment.

Cheers,
Andre

> Thanks,
> Alex
> > I think I would have got what you mean by showing just one line ;-)
> >
> > Cheers,
> > Andre
> >  
> >> diff --git a/hw/cfi_flash.c b/hw/cfi_flash.c
> >> index d7c0e7e80d69..65a90e288be8 100644
> >> --- a/hw/cfi_flash.c
> >> +++ b/hw/cfi_flash.c
> >> @@ -46,45 +46,43 @@
> >>   */
> >>  #define CFI_GEOM_OFFSET                                0x27
> >>  static u8 cfi_query_table[] = {
> >> -               /* offset 0x10: CFI query identification string */
> >> -       'Q', 'R', 'Y',          /* ID string */
> >> -       0x01, 0x00,             /* primary command set: Intel/Sharp extended */
> >> -       0x31, 0x00,             /* address of primary extended query table */
> >> -       0x00, 0x00,             /* alternative command set: unused */
> >> -       0x00, 0x00,             /* address of alternative extended query table*/
> >> -               /* offset 0x1b: system interface information */
> >> -       0x45,                   /* minimum Vcc voltage: 4.5V */
> >> -       0x55,                   /* maximum Vcc voltage: 5.5V */
> >> -       0x00,                   /* minimum Vpp voltage: 0.0V (unused) */
> >> -       0x00,                   /* maximum Vpp voltage: 0.0V *(unused) */
> >> -       0x01,                   /* timeout for single word program: 2 us */
> >> -       0x01,                   /* timeout for multi-byte program: 2 us */
> >> -       0x01,                   /* timeout for block erase: 2 ms */
> >> -       0x00,                   /* timeout for full chip erase: not supported */
> >> -       0x00,                   /* max timeout for single word program: 1x */
> >> -       0x00,                   /* max timeout for mulit-byte program: 1x */
> >> -       0x00,                   /* max timeout for block erase: 1x */
> >> -       0x00,                   /* max timeout for chip erase: not supported */
> >> -               /* offset 0x27: flash geometry information */
> >> -       0x00,                   /* size in power-of-2 bytes, filled later */
> >> -       0x06, 0x00,             /* interface description: 32 and 16 bits */
> >> -       PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> >> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
> >> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
> >> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
> >> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
> >> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
> >> +       /* System interface information */
> >> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
> >> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
> >> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
> >> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
> >> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
> >> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
> >> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
> >> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
> >> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
> >> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
> >> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
> >> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
> >> +       /* Flash geometry information */
> >> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
> >> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
> >> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> >>                                 /* number of multi-byte writes */
> >> -       0x01,                   /* one erase block region */
> >> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> >> -               /* offset 0x31: Intel primary algorithm extended query table */
> >> -       'P', 'R', 'I',
> >> -       '1', '0',               /* version 1.0 */
> >> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> >> -       0x00,                   /* no functions after suspend */
> >> -       0x01, 0x00,             /* only lock bit supported */
> >> -       0x50,                   /* best Vcc value: 5.0V */
> >> -       0x00,                   /* best Vpp value: 0.0V (unused) */
> >> -       0x01,                   /* number of protection register fields */
> >> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
> >> +       [0x2c] = 0x01,          /* one erase block region */
> >> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> >> +       /* Intel primary algorithm extended query table */
> >> +       [0x31] = 'P', 'R', 'I', /* ID string */
> >> +       [0x34] = '1', '0',      /* version 1.0 */
> >> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
> >> pm-read */
> >> +       [0x40] = 0x00,          /* no functions after suspend */
> >> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
> >> ...skipping...
> >> +       [0x10] = 'Q', 'R', 'Y', /* ID string */
> >> +       [0x13] = 0x01, 0x00,    /* primary command set: Intel/Sharp extended */
> >> +       [0x15] = 0x31, 0x00,    /* address of primary extended query table */
> >> +       [0x17] = 0x00, 0x00,    /* alternative command set: unused */
> >> +       [0x19] = 0x00, 0x00,    /* address of alternative extended query table*/
> >> +       /* System interface information */
> >> +       [0x1b] = 0x45,          /* minimum Vcc voltage: 4.5V */
> >> +       [0x1c] = 0x55,          /* maximum Vcc voltage: 5.5V */
> >> +       [0x1d] = 0x00,          /* minimum Vpp voltage: 0.0V (unused) */
> >> +       [0x1e] = 0x00,          /* maximum Vpp voltage: 0.0V *(unused) */
> >> +       [0x1f] = 0x01,          /* timeout for single word program: 2 us */
> >> +       [0x20] = 0x01,          /* timeout for multi-byte program: 2 us */
> >> +       [0x21] = 0x01,          /* timeout for block erase: 2 ms */
> >> +       [0x22] = 0x00,          /* timeout for full chip erase: not supported */
> >> +       [0x23] = 0x00,          /* max timeout for single word program: 1x */
> >> +       [0x24] = 0x00,          /* max timeout for mulit-byte program: 1x */
> >> +       [0x25] = 0x00,          /* max timeout for block erase: 1x */
> >> +       [0x26] = 0x00,          /* max timeout for chip erase: not supported */
> >> +       /* Flash geometry information */
> >> +       [0x27] = 0x00,          /* size in power-of-2 bytes, filled later */
> >> +       [0x28] = 0x06, 0x00,    /* interface description: 32 and 16 bits */
> >> +       [0x2a] = PROGRAM_BUFF_SIZE_BITS + 1 - CFI_NR_FLASH_CHIPS, 0x00,
> >>                                 /* number of multi-byte writes */
> >> -       0x01,                   /* one erase block region */
> >> -       0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> >> -               /* offset 0x31: Intel primary algorithm extended query table */
> >> -       'P', 'R', 'I',
> >> -       '1', '0',               /* version 1.0 */
> >> -       0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock & pm-read */
> >> -       0x00,                   /* no functions after suspend */
> >> -       0x01, 0x00,             /* only lock bit supported */
> >> -       0x50,                   /* best Vcc value: 5.0V */
> >> -       0x00,                   /* best Vpp value: 0.0V (unused) */
> >> -       0x01,                   /* number of protection register fields */
> >> -       0x00, 0x00, 0x00, 0x00, /* protection field 1 description */
> >> +       [0x2c] = 0x01,          /* one erase block region */
> >> +       [0x2d] = 0x00, 0x00, 0x00, 0x00, /* number and size of erase blocks, filled */
> >> +       /* Intel primary algorithm extended query table */
> >> +       [0x31] = 'P', 'R', 'I', /* ID string */
> >> +       [0x34] = '1', '0',      /* version 1.0 */
> >> +       [0x36] = 0xa0, 0x00, 0x00, 0x00, /* optional features: instant lock &
> >> pm-read */
> >> +       [0x40] = 0x00,          /* no functions after suspend */
> >> +       [0x41] = 0x01, 0x00,    /* only lock bit supported */
> >> +       [0x43] = 0x50,          /* best Vcc value: 5.0V */
> >> +       [0x43] = 0x00,          /* best Vpp value: 0.0V (unused) */
> >> +       [0x44] = 0x01,          /* number of protection register fields */
> >> +       [0x45] = 0x00, 0x00, 0x00, 0x00,/* protection field 1 description */
> >>  };
> >>  
> >> -
> >>  /*
> >>   * Those states represent a subset of the CFI flash state machine.
> >>   */
> >> @@ -141,10 +139,7 @@ static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> >>   */
> >>  static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> >>  {
> >> -       if (addr < 0x10)                /* CFI information starts at 0x10 */
> >> -               return 0;
> >> -
> >> -       if (addr - 0x10 > sizeof(cfi_query_table)) {
> >> +       if (addr > sizeof(cfi_query_table)) {
> >>                 pr_debug("CFI query read access beyond the end of table");
> >>                 return 0;
> >>         }
> >> @@ -163,7 +158,7 @@ static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> >>                 return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> >>         }
> >>  
> >> -       return cfi_query_table[addr - 0x10];
> >> +       return cfi_query_table[addr];
> >>  }
> >>  
> >>  static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> >>
> >> Thanks,
> >> Alex  
> >>> +
> >>> +
> >>> +/*
> >>> + * Those states represent a subset of the CFI flash state machine.
> >>> + */
> >>> +enum cfi_flash_state {
> >>> +	READY,
> >>> +	LOCK_SETUP,
> >>> +	WP_SETUP,
> >>> +	BP_SETUP,
> >>> +	BP_LOAD,
> >>> +	ERASE_SETUP,
> >>> +};
> >>> +
> >>> +/*
> >>> + * The device can be in several **Read** modes.
> >>> + * We don't implement the asynchronous burst mode.
> >>> + */
> >>> +enum cfi_read_mode {
> >>> +	READ_ARRAY,
> >>> +	READ_STATUS,
> >>> +	READ_DEVICE_ID,
> >>> +	READ_QUERY,
> >>> +};
> >>> +
> >>> +struct cfi_flash_device {
> >>> +	struct device_header	dev_hdr;
> >>> +	/* Protects the CFI state machine variables in this data structure. */
> >>> +	struct mutex		mutex;
> >>> +	u64			base_addr;
> >>> +	u32			size;
> >>> +
> >>> +	void			*flash_memory;
> >>> +	u8			program_buffer[PROGRAM_BUFF_SIZE * 4];
> >>> +	unsigned long		*lock_bm;
> >>> +	u64			last_address;
> >>> +	unsigned int		buff_written;
> >>> +	unsigned int		program_length;
> >>> +
> >>> +	enum cfi_flash_state	state;
> >>> +	enum cfi_read_mode	read_mode;
> >>> +	u16			rcr;
> >>> +	u8			sr;
> >>> +};
> >>> +
> >>> +static int nr_erase_blocks(struct cfi_flash_device *sfdev)
> >>> +{
> >>> +	return sfdev->size / FLASH_BLOCK_SIZE;
> >>> +}
> >>> +
> >>> +/*
> >>> + * CFI queries always deal with one byte of information, possibly mirrored
> >>> + * to other bytes on the bus. This is dealt with in the callers.
> >>> + * The address provided is the one for 8-bit addressing, and would need to
> >>> + * be adjusted for wider accesses.
> >>> + */
> >>> +static u8 read_cfi(struct cfi_flash_device *sfdev, u64 addr)
> >>> +{
> >>> +	if (addr < 0x10)		/* CFI information starts at 0x10 */
> >>> +		return 0;
> >>> +
> >>> +	if (addr - 0x10 > sizeof(cfi_query_table)) {
> >>> +		pr_debug("CFI query read access beyond the end of table");
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	/* Fixup dynamic information in the geometry part of the table. */
> >>> +	switch (addr) {
> >>> +	case CFI_GEOM_OFFSET:		/* device size in bytes, power of two */
> >>> +		return pow2_size(sfdev->size / CFI_NR_FLASH_CHIPS);
> >>> +	case CFI_GEOM_OFFSET + 6:	/* number of erase blocks, minus one */
> >>> +		return (nr_erase_blocks(sfdev) - 1) & 0xff;
> >>> +	case CFI_GEOM_OFFSET + 7:
> >>> +		return (nr_erase_blocks(sfdev) - 1) >> 8;
> >>> +	case CFI_GEOM_OFFSET + 8:	/* erase block size, in units of 256 */
> >>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) & 0xff;
> >>> +	case CFI_GEOM_OFFSET + 9:
> >>> +		return ((FLASH_BLOCK_SIZE / 256 ) / CFI_NR_FLASH_CHIPS) >> 8;
> >>> +	}
> >>> +
> >>> +	return cfi_query_table[addr - 0x10];
> >>> +}
> >>> +
> >>> +static bool block_is_locked(struct cfi_flash_device *sfdev, u64 addr)
> >>> +{
> >>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> >>> +
> >>> +	return test_bit(block_nr, sfdev->lock_bm);
> >>> +}
> >>> +
> >>> +#define DEV_ID_MASK 0x7ff
> >>> +static u16 read_dev_id(struct cfi_flash_device *sfdev, u64 addr)
> >>> +{
> >>> +	switch ((addr & DEV_ID_MASK) / CFI_BUS_WIDTH) {
> >>> +	case 0x0:				/* vendor ID */
> >>> +		return 0x0000;
> >>> +	case 0x1:				/* device ID */
> >>> +		return 0xffff;
> >>> +	case 0x2:
> >>> +		return block_is_locked(sfdev, addr & ~DEV_ID_MASK);
> >>> +	case 0x5:
> >>> +		return sfdev->rcr;
> >>> +	default:			/* Ignore the other entries. */
> >>> +		return 0;
> >>> +	}
> >>> +}
> >>> +
> >>> +static void lock_block(struct cfi_flash_device *sfdev, u64 addr, bool lock)
> >>> +{
> >>> +	int block_nr = addr / FLASH_BLOCK_SIZE;
> >>> +
> >>> +	if (lock)
> >>> +		set_bit(block_nr, sfdev->lock_bm);
> >>> +	else
> >>> +		clear_bit(block_nr, sfdev->lock_bm);
> >>> +}
> >>> +
> >>> +static void word_program(struct cfi_flash_device *sfdev,
> >>> +			 u64 addr, void *data, int len)
> >>> +{
> >>> +	if (block_is_locked(sfdev, addr)) {
> >>> +		sfdev->sr |= 0x12;
> >>> +		return;
> >>> +	}
> >>> +
> >>> +	memcpy(sfdev->flash_memory + addr, data, len);
> >>> +}
> >>> +
> >>> +/* Reset the program buffer state to prepare for follow-up writes. */
> >>> +static void buffer_setup(struct cfi_flash_device *sfdev)
> >>> +{
> >>> +	memset(sfdev->program_buffer, 0, sizeof(sfdev->program_buffer));
> >>> +	sfdev->last_address = ~0ULL;
> >>> +	sfdev->buff_written = 0;
> >>> +}
> >>> +
> >>> +static bool buffer_program(struct cfi_flash_device *sfdev,
> >>> +			   u64 addr, void *buffer, int len)
> >>> +{
> >>> +	unsigned int buf_addr;
> >>> +
> >>> +	if (sfdev->buff_written >= sfdev->program_length)
> >>> +		return false;
> >>> +
> >>> +	/*
> >>> +	 * The first word written into the buffer after the setup command
> >>> +	 * happens to be the base address for the buffer.
> >>> +	 * All subsequent writes need to be within this address and this
> >>> +	 * address plus the buffer size, so keep this value around.
> >>> +	 */
> >>> +	if (sfdev->last_address == ~0ULL)
> >>> +		sfdev->last_address = addr;
> >>> +
> >>> +	if (addr < sfdev->last_address)
> >>> +		return false;
> >>> +	buf_addr = addr - sfdev->last_address;
> >>> +	if (buf_addr >= PROGRAM_BUFF_SIZE)
> >>> +		return false;
> >>> +
> >>> +	memcpy(sfdev->program_buffer + buf_addr, buffer, len);
> >>> +	sfdev->buff_written++;
> >>> +
> >>> +	return true;
> >>> +}
> >>> +
> >>> +static void buffer_confirm(struct cfi_flash_device *sfdev)
> >>> +{
> >>> +	if (block_is_locked(sfdev, sfdev->last_address)) {
> >>> +		sfdev->sr |= 0x12;
> >>> +		return;
> >>> +	}
> >>> +	memcpy(sfdev->flash_memory + sfdev->last_address,
> >>> +	       sfdev->program_buffer,
> >>> +	       sfdev->buff_written * sizeof(u32));
> >>> +}
> >>> +
> >>> +static void block_erase_confirm(struct cfi_flash_device *sfdev, u64 addr)
> >>> +{
> >>> +	if (block_is_locked(sfdev, addr)) {
> >>> +		sfdev->sr |= 0x12;
> >>> +		return;
> >>> +	}
> >>> +
> >>> +	memset(sfdev->flash_memory + addr, 0xFF, FLASH_BLOCK_SIZE);
> >>> +}
> >>> +
> >>> +static void cfi_flash_mmio(struct kvm_cpu *vcpu,
> >>> +			   u64 addr, u8 *data, u32 len, u8 is_write,
> >>> +			   void *context)
> >>> +{
> >>> +	struct cfi_flash_device *sfdev = context;
> >>> +	u64 faddr = addr - sfdev->base_addr;
> >>> +	u32 value;
> >>> +
> >>> +	if (!is_write) {
> >>> +		u16 cfi_value = 0;
> >>> +
> >>> +		mutex_lock(&sfdev->mutex);
> >>> +
> >>> +		switch (sfdev->read_mode) {
> >>> +		case READ_ARRAY:
> >>> +			/* just copy the requested bytes from the array */
> >>> +			memcpy(data, sfdev->flash_memory + faddr, len);
> >>> +			goto out_unlock;
> >>> +		case READ_STATUS:
> >>> +			cfi_value = sfdev->sr;
> >>> +			break;
> >>> +		case READ_DEVICE_ID:
> >>> +			cfi_value = read_dev_id(sfdev, faddr);
> >>> +			break;
> >>> +		case READ_QUERY:
> >>> +			cfi_value = read_cfi(sfdev, faddr / CFI_BUS_WIDTH);
> >>> +			break;
> >>> +		}
> >>> +		switch (len) {
> >>> +		case 1:
> >>> +			*data = cfi_value;
> >>> +			break;
> >>> +		case 8: memset(data + 4, 0, 4);
> >>> +			/* fall-through */
> >>> +		case 4:
> >>> +			if (CFI_NR_FLASH_CHIPS == 2)
> >>> +				memcpy(data + 2, &cfi_value, 2);
> >>> +			else
> >>> +				memset(data + 2, 0, 2);
> >>> +			/* fall-through */
> >>> +		case 2:
> >>> +			memcpy(data, &cfi_value, 2);
> >>> +			break;
> >>> +		default:
> >>> +			pr_debug("CFI flash: illegal access length %d for read mode %d",
> >>> +				 len, sfdev->read_mode);
> >>> +			break;
> >>> +		}
> >>> +
> >>> +		goto out_unlock;
> >>> +	}
> >>> +
> >>> +	if (len > 4) {
> >>> +		pr_info("CFI flash: MMIO %d-bit write access not supported",
> >>> +			 len * 8);
> >>> +		return;
> >>> +	}
> >>> +
> >>> +	memcpy(&value, data, len);
> >>> +
> >>> +	mutex_lock(&sfdev->mutex);
> >>> +
> >>> +	switch (sfdev->state) {
> >>> +	case READY:			/* handled below */
> >>> +		break;
> >>> +
> >>> +	case LOCK_SETUP:
> >>> +		switch (value & 0xff) {
> >>> +		case CFI_CMD_LOCK_BLOCK:
> >>> +			lock_block(sfdev, faddr, true);
> >>> +			sfdev->read_mode = READ_STATUS;
> >>> +			break;
> >>> +		case CFI_CMD_UNLOCK_BLOCK:
> >>> +			lock_block(sfdev, faddr, false);
> >>> +			sfdev->read_mode = READ_STATUS;
> >>> +			break;
> >>> +		default:
> >>> +			sfdev->sr |= 0x30;
> >>> +			break;
> >>> +		}
> >>> +		sfdev->state = READY;
> >>> +		goto out_unlock;
> >>> +
> >>> +	case WP_SETUP:
> >>> +		word_program(sfdev, faddr, data, len);
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		sfdev->state = READY;
> >>> +		goto out_unlock;
> >>> +
> >>> +	case BP_LOAD:
> >>> +		if (buffer_program(sfdev, faddr, data, len))
> >>> +			goto out_unlock;
> >>> +
> >>> +		if ((value & 0xFF) == CFI_CMD_BUFFERED_PROGRAM_CONFIRM) {
> >>> +			buffer_confirm(sfdev);
> >>> +			sfdev->read_mode = READ_STATUS;
> >>> +		} else {
> >>> +			pr_debug("CFI flash: BP_LOAD: expected CONFIRM(0xd0), got 0x%x @ 0x%llx",
> >>> +				 value, faddr);
> >>> +			sfdev->sr |= 0x10;
> >>> +		}
> >>> +		sfdev->state = READY;
> >>> +		goto out_unlock;
> >>> +
> >>> +	case BP_SETUP:
> >>> +		sfdev->program_length = (value & 0xffff) + 1;
> >>> +		if (sfdev->program_length > PROGRAM_BUFF_SIZE / 4)
> >>> +			sfdev->program_length = PROGRAM_BUFF_SIZE / 4;
> >>> +		sfdev->state = BP_LOAD;
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		goto out_unlock;
> >>> +
> >>> +	case ERASE_SETUP:
> >>> +		if ((value & 0xff) == CFI_CMD_BLOCK_ERASE_CONFIRM)
> >>> +			block_erase_confirm(sfdev, faddr);
> >>> +		else
> >>> +			sfdev->sr |= 0x30;
> >>> +
> >>> +		sfdev->state = READY;
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		goto out_unlock;
> >>> +	}
> >>> +
> >>> +	/* write commands in READY state */
> >>> +	switch (value & 0xFF) {
> >>> +	case CFI_CMD_READ_JEDEC:
> >>> +		sfdev->read_mode = READ_DEVICE_ID;
> >>> +		break;
> >>> +	case CFI_CMD_READ_STATUS_REGISTER:
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		break;
> >>> +	case CFI_CMD_READ_CFI_QUERY:
> >>> +		sfdev->read_mode = READ_QUERY;
> >>> +		break;
> >>> +	case CFI_CMD_CLEAR_STATUS_REGISTER:
> >>> +		sfdev->sr = 0x80;
> >>> +		break;
> >>> +	case CFI_CMD_WORD_PROGRAM_SETUP:
> >>> +	case CFI_CMD_ALTERNATE_WORD_PROGRAM_SETUP:
> >>> +		sfdev->state = WP_SETUP;
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		break;
> >>> +	case CFI_CMD_LOCK_BLOCK_SETUP:
> >>> +		sfdev->state = LOCK_SETUP;
> >>> +		break;
> >>> +	case CFI_CMD_BLOCK_ERASE_SETUP:
> >>> +		sfdev->state = ERASE_SETUP;
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		break;
> >>> +	case CFI_CMD_BUFFERED_PROGRAM_SETUP:
> >>> +		buffer_setup(sfdev);
> >>> +		sfdev->state = BP_SETUP;
> >>> +		sfdev->read_mode = READ_STATUS;
> >>> +		break;
> >>> +	case CFI_CMD_BUFFERED_PROGRAM_CONFIRM:
> >>> +		pr_debug("CFI flash: unexpected confirm command 0xD0");
> >>> +		break;
> >>> +	default:
> >>> +		pr_debug("CFI flash: unknown command 0x%x", value);
> >>> +		/* fall through */
> >>> +	case CFI_CMD_READ_ARRAY:
> >>> +		sfdev->read_mode = READ_ARRAY;
> >>> +		break;
> >>> +	}
> >>> +
> >>> +out_unlock:
> >>> +	mutex_unlock(&sfdev->mutex);
> >>> +}
> >>> +
> >>> +#ifdef CONFIG_HAS_LIBFDT
> >>> +static void generate_cfi_flash_fdt_node(void *fdt,
> >>> +					struct device_header *dev_hdr,
> >>> +					void (*generate_irq_prop)(void *fdt,
> >>> +								  u8 irq,
> >>> +								enum irq_type))
> >>> +{
> >>> +	struct cfi_flash_device *sfdev;
> >>> +	u64 reg_prop[2];
> >>> +
> >>> +	sfdev = container_of(dev_hdr, struct cfi_flash_device, dev_hdr);
> >>> +	reg_prop[0] = cpu_to_fdt64(sfdev->base_addr);
> >>> +	reg_prop[1] = cpu_to_fdt64(sfdev->size);
> >>> +
> >>> +	_FDT(fdt_begin_node(fdt, "flash"));
> >>> +	_FDT(fdt_property_cell(fdt, "bank-width", CFI_BUS_WIDTH));
> >>> +	_FDT(fdt_property_cell(fdt, "#address-cells", 0x1));
> >>> +	_FDT(fdt_property_cell(fdt, "#size-cells", 0x1));
> >>> +	_FDT(fdt_property_string(fdt, "compatible", "cfi-flash"));
> >>> +	_FDT(fdt_property_string(fdt, "label", "System-firmware"));
> >>> +	_FDT(fdt_property(fdt, "reg", &reg_prop, sizeof(reg_prop)));
> >>> +	_FDT(fdt_end_node(fdt));
> >>> +}
> >>> +#else
> >>> +#define generate_cfi_flash_fdt_node NULL
> >>> +#endif
> >>> +
> >>> +static struct cfi_flash_device *create_flash_device_file(struct kvm *kvm,
> >>> +							 const char *filename)
> >>> +{
> >>> +	struct cfi_flash_device *sfdev;
> >>> +	struct stat statbuf;
> >>> +	unsigned int value;
> >>> +	int ret;
> >>> +	int fd;
> >>> +
> >>> +	fd = open(filename, O_RDWR);
> >>> +	if (fd < 0)
> >>> +		return ERR_PTR(-errno);
> >>> +	if (fstat(fd, &statbuf) < 0) {
> >>> +		close(fd);
> >>> +		return ERR_PTR(-errno);
> >>> +	}
> >>> +
> >>> +	sfdev = malloc(sizeof(struct cfi_flash_device));
> >>> +	if (!sfdev) {
> >>> +		close(fd);
> >>> +		return ERR_PTR(-ENOMEM);
> >>> +	}
> >>> +
> >>> +	sfdev->size = (statbuf.st_size + 4095) & ~0xfffUL;
> >>> +	sfdev->flash_memory = mmap(NULL, statbuf.st_size,
> >>> +				   PROT_READ | PROT_WRITE, MAP_SHARED,
> >>> +				   fd, 0);
> >>> +	if (sfdev->flash_memory == MAP_FAILED) {
> >>> +		close(fd);
> >>> +		free(sfdev);
> >>> +		return ERR_PTR(-errno);
> >>> +	}
> >>> +	sfdev->base_addr = KVM_FLASH_MMIO_BASE;
> >>> +	sfdev->state = READY;
> >>> +	sfdev->read_mode = READ_ARRAY;
> >>> +	sfdev->sr = 0x80;
> >>> +	sfdev->rcr = 0xbfcf;
> >>> +
> >>> +	value = roundup(nr_erase_blocks(sfdev), BITS_PER_LONG) / 8;
> >>> +	sfdev->lock_bm = malloc(value);
> >>> +	memset(sfdev->lock_bm, 0, value);
> >>> +
> >>> +	sfdev->dev_hdr.bus_type = DEVICE_BUS_MMIO;
> >>> +	sfdev->dev_hdr.data = generate_cfi_flash_fdt_node;
> >>> +	mutex_init(&sfdev->mutex);
> >>> +	ret = device__register(&sfdev->dev_hdr);
> >>> +	if (ret) {
> >>> +		free(sfdev->flash_memory);
> >>> +		free(sfdev);
> >>> +		return ERR_PTR(ret);
> >>> +	}
> >>> +
> >>> +	ret = kvm__register_mmio(kvm,
> >>> +				 sfdev->base_addr, sfdev->size,
> >>> +				 false, cfi_flash_mmio, sfdev);
> >>> +	if (ret) {
> >>> +		device__unregister(&sfdev->dev_hdr);
> >>> +		free(sfdev->flash_memory);
> >>> +		free(sfdev);
> >>> +		return ERR_PTR(ret);
> >>> +	}
> >>> +
> >>> +	return sfdev;
> >>> +}
> >>> +
> >>> +static int flash__init(struct kvm *kvm)
> >>> +{
> >>> +	struct cfi_flash_device *sfdev;
> >>> +
> >>> +	if (!kvm->cfg.flash_filename)
> >>> +		return 0;
> >>> +
> >>> +	sfdev = create_flash_device_file(kvm, kvm->cfg.flash_filename);
> >>> +	if (IS_ERR(sfdev))
> >>> +		return PTR_ERR(sfdev);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +dev_init(flash__init);
> >>> diff --git a/include/kvm/kvm-config.h b/include/kvm/kvm-config.h
> >>> index a052b0bc..f4a8b831 100644
> >>> --- a/include/kvm/kvm-config.h
> >>> +++ b/include/kvm/kvm-config.h
> >>> @@ -35,6 +35,7 @@ struct kvm_config {
> >>>  	const char *vmlinux_filename;
> >>>  	const char *initrd_filename;
> >>>  	const char *firmware_filename;
> >>> +	const char *flash_filename;
> >>>  	const char *console;
> >>>  	const char *dev;
> >>>  	const char *network;
> >>> diff --git a/include/kvm/util.h b/include/kvm/util.h
> >>> index 4ca7aa93..5c37f0b7 100644
> >>> --- a/include/kvm/util.h
> >>> +++ b/include/kvm/util.h
> >>> @@ -104,6 +104,11 @@ static inline unsigned long roundup_pow_of_two(unsigned long x)
> >>>  	return x ? 1UL << fls_long(x - 1) : 0;
> >>>  }
> >>>  
> >>> +static inline int pow2_size(unsigned long x)
> >>> +{
> >>> +	return (sizeof(x) * 8) - __builtin_clzl(x - 1);
> >>> +}
> >>> +
> >>>  struct kvm;
> >>>  void *mmap_hugetlbfs(struct kvm *kvm, const char *htlbfs_path, u64 size);
> >>>  void *mmap_anon_or_hugetlbfs(struct kvm *kvm, const char *hugetlbfs_path, u64 size);    

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2020-02-20 18:00 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-07 12:19 [PATCH kvmtool v2] Add emulation for CFI compatible flash memory Andre Przywara
2020-02-07 17:34 ` Alexandru Elisei
2020-02-14 13:47   ` Andre Przywara
2020-02-14 15:38     ` Alexandru Elisei
2020-02-17 17:20 ` Alexandru Elisei
2020-02-19 17:26   ` Andre Przywara
2020-02-20 10:24     ` Alexandru Elisei
2020-02-20 18:00       ` Andre Przywara

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).