On Thu, May 9, 2019 at 8:02 PM Arnaud Pouliquen wrote: > > Hello Xiang, > > This patch has the opposite effect on my platform as DMA allocation is > aligned on 4k page. > For instance i declared: > - in RX 6 buffers (of 512 bytes) > - in TX 4 buffers ( of 512 bytes) > Yes, dma_init_coherent_memory always allocate memory by 4KB unit, but this limitation is too waste memory for remoteproc/rpmsg. The attached patch fix this problem by adding a new device tree option to customize the unit size. > The result is (kernel trace) > [ 41.915896] virtio_rpmsg_bus virtio0: rx buffers: va ebb5f5ca, dma > 0x0x10042000 > [ 41.915922] virtio_rpmsg_bus virtio0: tx buffers: va a7865153, dma > 0x0x10043000 > > The TX buffer memory is allocated on next 4k page... > > Anyway separate the RX and TX allocation makes sense. This could also > allow to allocate buffers in 2 different memories. > For time being, issue is that only one memory area can be attached to > the virtio device for DMA allocation... and PA/DA translations are missing. > This means that we probably need (in a first step) a new remoteproc API > for memory allocation. > These memories should be declared and mmaped in rproc platform drivers > (memory region) or in resource table (carveout). > This is partially done in the API for the platform driver > (rproc_mem_entry_init) but not available for rproc clients. > > Regards > Arnaud > > > On 1/31/19 4:41 PM, Xiang Xiao wrote: > > many dma allocator align the returned address with buffer size, > > so two small allocation could reduce the alignment requirement > > and save the the memory space wasted by the potential alignment. > > > > Signed-off-by: Xiang Xiao > > --- > > drivers/rpmsg/virtio_rpmsg_bus.c | 58 +++++++++++++++++++++++----------------- > > 1 file changed, 34 insertions(+), 24 deletions(-) > > > > diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c > > index fb0d2eb..59c4554 100644 > > --- a/drivers/rpmsg/virtio_rpmsg_bus.c > > +++ b/drivers/rpmsg/virtio_rpmsg_bus.c > > @@ -40,7 +40,8 @@ > > * @num_sbufs: total number of buffers for tx > > * @buf_size: size of one rx or tx buffer > > * @last_sbuf: index of last tx buffer used > > - * @bufs_dma: dma base addr of the buffers > > + * @rbufs_dma: dma base addr of rx buffers > > + * @sbufs_dma: dma base addr of tx buffers > > * @tx_lock: protects svq, sbufs and sleepers, to allow concurrent senders. > > * sending a message might require waking up a dozing remote > > * processor, which involves sleeping, hence the mutex. > > @@ -62,7 +63,8 @@ struct virtproc_info { > > unsigned int num_sbufs; > > unsigned int buf_size; > > int last_sbuf; > > - dma_addr_t bufs_dma; > > + dma_addr_t rbufs_dma; > > + dma_addr_t sbufs_dma; > > struct mutex tx_lock; > > struct idr endpoints; > > struct mutex endpoints_lock; > > @@ -872,9 +874,7 @@ static int rpmsg_probe(struct virtio_device *vdev) > > static const char * const names[] = { "input", "output" }; > > struct virtqueue *vqs[2]; > > struct virtproc_info *vrp; > > - void *bufs_va; > > int err = 0, i; > > - size_t total_buf_space; > > bool notify; > > > > vrp = kzalloc(sizeof(*vrp), GFP_KERNEL); > > @@ -909,25 +909,28 @@ static int rpmsg_probe(struct virtio_device *vdev) > > > > vrp->buf_size = MAX_RPMSG_BUF_SIZE; > > > > - total_buf_space = (vrp->num_rbufs + vrp->num_sbufs) * vrp->buf_size; > > - > > /* allocate coherent memory for the buffers */ > > - bufs_va = dma_alloc_coherent(vdev->dev.parent->parent, > > - total_buf_space, &vrp->bufs_dma, > > - GFP_KERNEL); > > - if (!bufs_va) { > > + vrp->rbufs = dma_alloc_coherent(vdev->dev.parent->parent, > > + vrp->num_rbufs * vrp->buf_size, > > + &vrp->rbufs_dma, GFP_KERNEL); > > + if (!vrp->rbufs) { > > err = -ENOMEM; > > goto vqs_del; > > } > > > > - dev_dbg(&vdev->dev, "buffers: va %p, dma %pad\n", > > - bufs_va, &vrp->bufs_dma); > > + dev_dbg(&vdev->dev, "rx buffers: va %p, dma 0x%pad\n", > > + vrp->rbufs, &vrp->rbufs_dma); > > > > - /* first part of the buffers is dedicated for RX */ > > - vrp->rbufs = bufs_va; > > + vrp->sbufs = dma_alloc_coherent(vdev->dev.parent->parent, > > + vrp->num_sbufs * vrp->buf_size, > > + &vrp->sbufs_dma, GFP_KERNEL); > > + if (!vrp->sbufs) { > > + err = -ENOMEM; > > + goto free_rbufs; > > + } > > > > - /* and second part is dedicated for TX */ > > - vrp->sbufs = bufs_va + vrp->num_rbufs * vrp->buf_size; > > + dev_dbg(&vdev->dev, "tx buffers: va %p, dma 0x%pad\n", > > + vrp->sbufs, &vrp->sbufs_dma); > > > > /* set up the receive buffers */ > > for (i = 0; i < vrp->num_rbufs; i++) { > > @@ -954,7 +957,7 @@ static int rpmsg_probe(struct virtio_device *vdev) > > if (!vrp->ns_ept) { > > dev_err(&vdev->dev, "failed to create the ns ept\n"); > > err = -ENOMEM; > > - goto free_coherent; > > + goto free_sbufs; > > } > > } > > > > @@ -979,9 +982,14 @@ static int rpmsg_probe(struct virtio_device *vdev) > > > > return 0; > > > > -free_coherent: > > - dma_free_coherent(vdev->dev.parent->parent, total_buf_space, > > - bufs_va, vrp->bufs_dma); > > +free_sbufs: > > + dma_free_coherent(vdev->dev.parent->parent, > > + vrp->num_sbufs * vrp->buf_size, > > + vrp->sbufs, vrp->sbufs_dma); > > +free_rbufs: > > + dma_free_coherent(vdev->dev.parent->parent, > > + vrp->num_rbufs * vrp->buf_size, > > + vrp->rbufs, vrp->rbufs_dma); > > vqs_del: > > vdev->config->del_vqs(vrp->vdev); > > free_vrp: > > @@ -999,8 +1007,6 @@ static int rpmsg_remove_device(struct device *dev, void *data) > > static void rpmsg_remove(struct virtio_device *vdev) > > { > > struct virtproc_info *vrp = vdev->priv; > > - unsigned int num_bufs = vrp->num_rbufs + vrp->num_sbufs; > > - size_t total_buf_space = num_bufs * vrp->buf_size; > > int ret; > > > > vdev->config->reset(vdev); > > @@ -1016,8 +1022,12 @@ static void rpmsg_remove(struct virtio_device *vdev) > > > > vdev->config->del_vqs(vrp->vdev); > > > > - dma_free_coherent(vdev->dev.parent->parent, total_buf_space, > > - vrp->rbufs, vrp->bufs_dma); > > + dma_free_coherent(vdev->dev.parent->parent, > > + vrp->num_sbufs * vrp->buf_size, > > + vrp->sbufs, vrp->sbufs_dma); > > + dma_free_coherent(vdev->dev.parent->parent, > > + vrp->num_rbufs * vrp->buf_size, > > + vrp->rbufs, vrp->rbufs_dma); > > > > kfree(vrp); > > } > >