Hi Phil, Laurent, On Mon, 2020-05-04 at 12:26 +0300, Laurent Pinchart wrote: > From: Phil Elwell > > Conditional on a new compatible string, change the pagelist encoding > such that the top 24 bits are the pfn, leaving 8 bits for run length > (-1). > > Signed-off-by: Phil Elwell > Signed-off-by: Jacopo Mondi > --- > .../interface/vchiq_arm/vchiq_2835_arm.c | 88 ++++++++++++++----- > .../interface/vchiq_arm/vchiq_arm.c | 6 ++ > .../interface/vchiq_arm/vchiq_arm.h | 1 + > 3 files changed, 74 insertions(+), 21 deletions(-) > > diff --git > a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > index 3e422a7eb3f1..ecec84ad4345 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c > @@ -16,6 +16,8 @@ > #include > > #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) > +#define VC_SAFE(x) (g_use_36bit_addrs ? ((u32)(x) | 0xc0000000) : (u32)(x)) > +#define IS_VC_SAFE(x) (g_use_36bit_addrs ? !((x) & ~0x3fffffffull) : 1) As I commented earlier, this is a sign your dma-ranges are wrong, most of the code below reimplements what is already done by dma-direct (see kernel/dma/direct.c). Once properly setup, you should be able to use whatever phys address dmam_alloc_coherent() provides and drop g_use_36bit_addrs. Note that on arm32+LPAE, dma-direct/swiotlb are the default dma_ops, so this also applies there. Regards, Nicolas > #include "vchiq_arm.h" > #include "vchiq_connected.h" > @@ -62,6 +64,7 @@ static void __iomem *g_regs; > */ > static unsigned int g_cache_line_size = 32; > static struct dma_pool *g_dma_pool; > +static unsigned int g_use_36bit_addrs = 0; > static unsigned int g_fragments_size; > static char *g_fragments_base; > static char *g_free_fragments; > @@ -104,6 +107,8 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > g_cache_line_size = drvdata->cache_line_size; > g_fragments_size = 2 * g_cache_line_size; > > + g_use_36bit_addrs = (dev->dma_pfn_offset == 0); > + > /* Allocate space for the channels in coherent memory */ > slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); > frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); > @@ -115,14 +120,21 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > return -ENOMEM; > } > > + if (!IS_VC_SAFE(slot_phys)) { > + dev_err(dev, "allocated DMA memory %pad is not VC-safe\n", > + &slot_phys); > + return -ENOMEM; > + } > + > WARN_ON(((unsigned long)slot_mem & (PAGE_SIZE - 1)) != 0); > + channelbase = VC_SAFE(slot_phys); > > vchiq_slot_zero = vchiq_init_slots(slot_mem, slot_mem_size); > if (!vchiq_slot_zero) > return -EINVAL; > > vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_OFFSET_IDX] = > - (int)slot_phys + slot_mem_size; > + channelbase + slot_mem_size; > vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = > MAX_FRAGMENTS; > > @@ -155,7 +167,6 @@ int vchiq_platform_init(struct platform_device *pdev, > struct vchiq_state *state) > } > > /* Send the base address of the slots to VideoCore */ > - channelbase = slot_phys; > err = rpi_firmware_property(fw, RPI_FIRMWARE_VCHIQ_INIT, > &channelbase, sizeof(channelbase)); > if (err || channelbase) { > @@ -241,7 +252,7 @@ vchiq_prepare_bulk_data(struct vchiq_bulk *bulk, void > *offset, int size, > if (!pagelistinfo) > return VCHIQ_ERROR; > > - bulk->data = (void *)(unsigned long)pagelistinfo->dma_addr; > + bulk->data = (void *)VC_SAFE(pagelistinfo->dma_addr); > > /* > * Store the pagelistinfo address in remote_data, > @@ -475,25 +486,60 @@ create_pagelist(char __user *buf, size_t count, unsigned > short type) > > /* Combine adjacent blocks for performance */ > k = 0; > - for_each_sg(scatterlist, sg, dma_buffers, i) { > - u32 len = sg_dma_len(sg); > - u32 addr = sg_dma_address(sg); > + if (g_use_36bit_addrs) { > + for_each_sg(scatterlist, sg, dma_buffers, i) { > + u32 len = sg_dma_len(sg); > + u64 addr = sg_dma_address(sg); > + u32 page_id = (u32)((addr >> 4) & ~0xff); > + u32 sg_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; > > - /* Note: addrs is the address + page_count - 1 > - * The firmware expects blocks after the first to be page- > - * aligned and a multiple of the page size > - */ > - WARN_ON(len == 0); > - WARN_ON(i && (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); > - WARN_ON(i && (addr & ~PAGE_MASK)); > - if (k > 0 && > - ((addrs[k - 1] & PAGE_MASK) + > - (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) > - == (addr & PAGE_MASK)) > - addrs[k - 1] += ((len + PAGE_SIZE - 1) >> PAGE_SHIFT); > - else > - addrs[k++] = (addr & PAGE_MASK) | > - (((len + PAGE_SIZE - 1) >> PAGE_SHIFT) - 1); > + /* Note: addrs is the address + page_count - 1 > + * The firmware expects blocks after the first to be > page- > + * aligned and a multiple of the page size > + */ > + WARN_ON(len == 0); > + WARN_ON(i && > + (i != (dma_buffers - 1)) && (len & ~PAGE_MASK)); > + WARN_ON(i && (addr & ~PAGE_MASK)); > + WARN_ON(upper_32_bits(addr) > 0xf); > + if (k > 0 && > + ((addrs[k - 1] & ~0xff) + > + (((addrs[k - 1] & 0xff) + 1) << 8) > + == page_id)) { > + u32 inc_pages = min(sg_pages, > + 0xff - (addrs[k - 1] & > 0xff)); > + addrs[k - 1] += inc_pages; > + page_id += inc_pages << 8; > + sg_pages -= inc_pages; > + } > + while (sg_pages) { > + u32 inc_pages = min(sg_pages, 0x100u); > + addrs[k++] = page_id | (inc_pages - 1); > + page_id += inc_pages << 8; > + sg_pages -= inc_pages; > + } > + } > + } else { > + for_each_sg(scatterlist, sg, dma_buffers, i) { > + u32 len = sg_dma_len(sg); > + u32 addr = VC_SAFE(sg_dma_address(sg)); > + u32 new_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; > + > + /* Note: addrs is the address + page_count - 1 > + * The firmware expects blocks after the first to be > page- > + * aligned and a multiple of the page size > + */ > + WARN_ON(len == 0); > + WARN_ON(i && (i != (dma_buffers - 1)) && (len & > ~PAGE_MASK)); > + WARN_ON(i && (addr & ~PAGE_MASK)); > + if (k > 0 && > + ((addrs[k - 1] & PAGE_MASK) + > + (((addrs[k - 1] & ~PAGE_MASK) + 1) << PAGE_SHIFT)) > + == (addr & PAGE_MASK)) > + addrs[k - 1] += new_pages; > + else > + addrs[k++] = (addr & PAGE_MASK) | (new_pages - > 1); > + } > } > > /* Partial cache lines (fragments) require special measures */ > diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > index d1a556f16499..dd3c8f829daa 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c > @@ -117,6 +117,11 @@ static struct vchiq_drvdata bcm2836_drvdata = { > .cache_line_size = 64, > }; > > +static struct vchiq_drvdata bcm2711_drvdata = { > + .cache_line_size = 64, > + .use_36bit_addrs = true, > +}; > + > static const char *const ioctl_names[] = { > "CONNECT", > "SHUTDOWN", > @@ -2710,6 +2715,7 @@ void vchiq_platform_conn_state_changed(struct > vchiq_state *state, > static const struct of_device_id vchiq_of_match[] = { > { .compatible = "brcm,bcm2835-vchiq", .data = &bcm2835_drvdata }, > { .compatible = "brcm,bcm2836-vchiq", .data = &bcm2836_drvdata }, > + { .compatible = "brcm,bcm2711-vchiq", .data = &bcm2711_drvdata }, > {}, > }; > MODULE_DEVICE_TABLE(of, vchiq_of_match); > diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > index 0784c5002417..f8b1c005af62 100644 > --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h > @@ -52,6 +52,7 @@ struct vchiq_arm_state { > > struct vchiq_drvdata { > const unsigned int cache_line_size; > + const bool use_36bit_addrs; > struct rpi_firmware *fw; > }; >