All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Agner <stefan@agner.ch>
To: bmeng.cn@gmail.com
Cc: nsaenz@kernel.org, u-boot@lists.denx.de, mbrugger@suse.com,
	m.szyprowski@samsung.com, s.nawrocki@samsung.com
Subject: Re: [RFC PATCH 4/4] nvme: translate virtual addresses into the bus's address space
Date: Fri, 24 Sep 2021 01:22:54 +0200	[thread overview]
Message-ID: <a5a7f6ca87b0c6d49f463696d69e8b60@agner.ch> (raw)
In-Reply-To: <ad0644a33852f459dc32e77bd0452397ba95ab1d.1632439220.git.stefan@agner.ch>

On 2021-09-24 01:20, Stefan Agner wrote:
> So far we've been content with passing physical/CPU addresses when
> configuring memory addresses into NVMe controllers, but not all
> platforms have buses with transparent mappings. Specifically the
> Raspberry Pi 4 might introduce an offset to memory accesses incoming
> from its PCIe port.
> 
> Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these
> limitations, and make sure we don't break non DM users.
> For devices where PCIe's view of host memory doesn't match the memory
> as seen by the CPU.
> 
> A similar change has been introduced for XHCI controller with
> commit 1a474559d90a ("xhci: translate virtual addresses into the bus's
> address space").
> 
> Signed-off-by: Stefan Agner <stefan@agner.ch>
> ---
> 
>  drivers/nvme/nvme.c | 32 ++++++++++++++++++--------------
>  drivers/nvme/nvme.h | 15 +++++++++++++++
>  2 files changed, 33 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
> index 4c4dc7cc4d..0b7082d71b 100644
> --- a/drivers/nvme/nvme.c
> +++ b/drivers/nvme/nvme.c
> @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
>  		buffer += (page_size - offset);
>  
>  	if (length <= page_size) {
> -		*prp2 = (u64)buffer;
> +		*prp2 = nvme_virt_to_bus(dev, buffer);
>  		return 0;
>  	}
>  
> @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev,
> u64 *prp2,
>  	i = 0;
>  	while (nprps) {
>  		if (i == prps_per_page) {
> -			u64 next_prp_list = (u64)prp_pool + page_size;
> -			*(prp_pool + i) = cpu_to_le64(next_prp_list);
> +			u64 next = nvme_virt_to_bus(dev, prp_pool + page_size);
> +			*(prp_pool + i) = cpu_to_le64(next);
>  			i = 0;
>  			prp_pool += page_size;
>  		}
> -		*(prp_pool + i++) = cpu_to_le64((u64)buffer);
> +		*(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  		buffer += page_size;
>  		nprps--;
>  	}
> -	*prp2 = (u64)dev->prp_pool;
> +	*prp2 = nvme_virt_to_bus(dev, dev->prp_pool);
>  
>  	flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
>  			   dev->prp_entry_num * sizeof(u64));
> @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
>  	int result;
>  	u32 aqa;
>  	u64 cap = dev->cap;
> +	u64 dma_addr;
>  	struct nvme_queue *nvmeq;
>  	/* most architectures use 4KB as the page size */
>  	unsigned page_shift = 12;
> @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
>  	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
>  
>  	writel(aqa, &dev->bar->aqa);
> -	nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq);
> -	nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq);
> +	dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds);
> +	nvme_writeq(dma_addr, &dev->bar->asq);
> +	dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes);
> +	nvme_writeq(dma_addr, &dev->bar->acq);
>  
>  	result = nvme_enable_ctrl(dev);
>  	if (result)
> @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.create_cq.opcode = nvme_admin_create_cq;
> -	c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
> +	c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes));
>  	c.create_cq.cqid = cpu_to_le16(qid);
>  	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>  	c.create_cq.cq_flags = cpu_to_le16(flags);
> @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.create_sq.opcode = nvme_admin_create_sq;
> -	c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
> +	c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds));
>  	c.create_sq.sqid = cpu_to_le16(qid);
>  	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>  	c.create_sq.sq_flags = cpu_to_le16(flags);
> @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
>  	memset(&c, 0, sizeof(c));
>  	c.identify.opcode = nvme_admin_identify;
>  	c.identify.nsid = cpu_to_le32(nsid);
> -	c.identify.prp1 = cpu_to_le64((u64)buffer);
> +	c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  
>  	length -= (page_size - offset);
>  	if (length <= 0) {
>  		c.identify.prp2 = 0;
>  	} else {
>  		buffer += (page_size - offset);
> -		c.identify.prp2 = cpu_to_le64((u64)buffer);
> +		c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	}
>  
>  	c.identify.cns = cpu_to_le32(cns);
> @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev,
> unsigned fid, unsigned nsid,
>  	memset(&c, 0, sizeof(c));
>  	c.features.opcode = nvme_admin_get_features;
>  	c.features.nsid = cpu_to_le32(nsid);
> -	c.features.prp1 = cpu_to_le64((u64)buffer);
> +	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	c.features.fid = cpu_to_le32(fid);
>  
>  	ret = nvme_submit_admin_cmd(dev, &c, result);
> @@ -519,7 +522,7 @@ int nvme_set_features(struct nvme_dev *dev,
> unsigned fid, unsigned dword11,
>  
>  	memset(&c, 0, sizeof(c));
>  	c.features.opcode = nvme_admin_set_features;
> -	c.features.prp1 = cpu_to_le64((u64)buffer);
> +	c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  	c.features.fid = cpu_to_le32(fid);
>  	c.features.dword11 = cpu_to_le32(dword11);
>  
> @@ -775,7 +778,7 @@ static ulong nvme_blk_rw(struct udevice *udev,
> lbaint_t blknr,
>  		c.rw.slba = cpu_to_le64(slba);
>  		slba += lbas;
>  		c.rw.length = cpu_to_le16(lbas - 1);
> -		c.rw.prp1 = cpu_to_le64((ulong)buffer);
> +		c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  		c.rw.prp2 = cpu_to_le64(prp2);
>  		status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q],
>  				&c, NULL, IO_TIMEOUT);
> @@ -834,6 +837,7 @@ static int nvme_probe(struct udevice *udev)
>  	struct nvme_id_ns *id;
>  
>  	ndev->instance = trailing_strtol(udev->name);
> +	ndev->dev = udev->parent;

It only translates addresses correctly once I chose the parent device. I
am pretty sure that this is not the right way to fix it. Shouldn't the
child automatically assume that the same translation is required?

--
Stefan

>  
>  	INIT_LIST_HEAD(&ndev->namespaces);
>  	ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0,
> diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h
> index c6aae4da5d..31e6899bca 100644
> --- a/drivers/nvme/nvme.h
> +++ b/drivers/nvme/nvme.h
> @@ -7,8 +7,15 @@
>  #ifndef __DRIVER_NVME_H__
>  #define __DRIVER_NVME_H__
>  
> +#include <phys2bus.h>
>  #include <asm/io.h>
>  
> +#if CONFIG_IS_ENABLED(DM_USB)
> +#define nvme_to_dev(_dev)     _dev->dev
> +#else
> +#define nvme_to_dev(_dev)     NULL
> +#endif
> +
>  struct nvme_id_power_state {
>  	__le16			max_power;	/* centiwatts */
>  	__u8			rsvd2;
> @@ -596,6 +603,9 @@ enum {
>  
>  /* Represents an NVM Express device. Each nvme_dev is a PCI function. */
>  struct nvme_dev {
> +#if CONFIG_IS_ENABLED(DM_USB)
> +	struct udevice *dev;
> +#endif
>  	struct list_head node;
>  	struct nvme_queue **queues;
>  	u32 __iomem *dbs;
> @@ -635,4 +645,9 @@ struct nvme_ns {
>  	u8 flbas;
>  };
>  
> +static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr)
> +{
> +	return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr));
> +}
> +
>  #endif /* __DRIVER_NVME_H__ */

      reply	other threads:[~2021-09-23 23:23 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-23 23:20 [RFC PATCH 1/4] Revert "nvme: Correct the prps per page calculation method" Stefan Agner
2021-09-23 23:20 ` [RFC PATCH 2/4] nvme: improve readability of nvme_setup_prps() Stefan Agner
2021-09-23 23:20 ` [RFC PATCH 3/4] nvme: Use pointer for CPU addressed buffers Stefan Agner
2021-09-23 23:20 ` [RFC PATCH 4/4] nvme: translate virtual addresses into the bus's address space Stefan Agner
2021-09-23 23:22   ` Stefan Agner [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a5a7f6ca87b0c6d49f463696d69e8b60@agner.ch \
    --to=stefan@agner.ch \
    --cc=bmeng.cn@gmail.com \
    --cc=m.szyprowski@samsung.com \
    --cc=mbrugger@suse.com \
    --cc=nsaenz@kernel.org \
    --cc=s.nawrocki@samsung.com \
    --cc=u-boot@lists.denx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.