* [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
@ 2020-01-07 15:30 Lorenzo Bianconi
2020-01-08 14:53 ` Ilias Apalodimas
0 siblings, 1 reply; 4+ messages in thread
From: Lorenzo Bianconi @ 2020-01-07 15:30 UTC (permalink / raw)
To: ilias.apalodimas; +Cc: netdev, brouer, davem, lorenzo.bianconi
Socionext driver can run on dma coherent and non-coherent devices.
Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
now the driver can let page_pool API to managed needed DMA sync
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
drivers/net/ethernet/socionext/netsec.c | 45 +++++++++++++++----------
1 file changed, 28 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index b5a9e947a4a8..00404fef17e8 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -243,6 +243,7 @@
NET_IP_ALIGN)
#define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define NETSEC_RX_BUF_SIZE (PAGE_SIZE - NETSEC_RX_BUF_NON_DATA)
#define DESC_SZ sizeof(struct netsec_de)
@@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
- enum dma_data_direction dma_dir;
struct page *page;
page = page_pool_dev_alloc_pages(dring->page_pool);
@@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
/* Make sure the incoming payload fits in the page for XDP and non-XDP
* cases and reserve enough space for headroom + skb_shared_info
*/
- *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
- dma_dir = page_pool_get_dma_dir(dring->page_pool);
- dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
+ *desc_len = NETSEC_RX_BUF_SIZE;
return page_address(page);
}
@@ -883,6 +881,7 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
struct xdp_buff *xdp)
{
+ struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
u32 ret = NETSEC_XDP_PASS;
int err;
u32 act;
@@ -896,7 +895,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
case XDP_TX:
ret = netsec_xdp_xmit_back(priv, xdp);
if (ret != NETSEC_XDP_TX)
- xdp_return_buff(xdp);
+ __page_pool_put_page(dring->page_pool,
+ virt_to_head_page(xdp->data),
+ xdp->data_end - xdp->data_hard_start,
+ true);
break;
case XDP_REDIRECT:
err = xdp_do_redirect(priv->ndev, xdp, prog);
@@ -904,7 +906,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
ret = NETSEC_XDP_REDIR;
} else {
ret = NETSEC_XDP_CONSUMED;
- xdp_return_buff(xdp);
+ __page_pool_put_page(dring->page_pool,
+ virt_to_head_page(xdp->data),
+ xdp->data_end - xdp->data_hard_start,
+ true);
}
break;
default:
@@ -915,7 +920,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
/* fall through -- handle aborts by dropping packet */
case XDP_DROP:
ret = NETSEC_XDP_CONSUMED;
- xdp_return_buff(xdp);
+ __page_pool_put_page(dring->page_pool,
+ virt_to_head_page(xdp->data),
+ xdp->data_end - xdp->data_hard_start,
+ true);
break;
}
@@ -1014,7 +1022,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
* cache state. Since we paid the allocation cost if
* building an skb fails try to put the page into cache
*/
- page_pool_recycle_direct(dring->page_pool, page);
+ __page_pool_put_page(dring->page_pool, page,
+ desc->len, true);
netif_err(priv, drv, priv->ndev,
"rx failed to build skb\n");
break;
@@ -1272,17 +1281,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
- struct page_pool_params pp_params = { 0 };
+ struct page_pool_params pp_params = {
+ .order = 0,
+ /* internal DMA mapping in page_pool */
+ .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+ .pool_size = DESC_NUM,
+ .nid = NUMA_NO_NODE,
+ .dev = priv->dev,
+ .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+ .offset = NETSEC_RXBUF_HEADROOM,
+ .max_len = NETSEC_RX_BUF_SIZE,
+ };
int i, err;
- pp_params.order = 0;
- /* internal DMA mapping in page_pool */
- pp_params.flags = PP_FLAG_DMA_MAP;
- pp_params.pool_size = DESC_NUM;
- pp_params.nid = NUMA_NO_NODE;
- pp_params.dev = priv->dev;
- pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-
dring->page_pool = page_pool_create(&pp_params);
if (IS_ERR(dring->page_pool)) {
err = PTR_ERR(dring->page_pool);
--
2.21.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
2020-01-07 15:30 [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data Lorenzo Bianconi
@ 2020-01-08 14:53 ` Ilias Apalodimas
2020-01-09 17:20 ` Jesper Dangaard Brouer
0 siblings, 1 reply; 4+ messages in thread
From: Ilias Apalodimas @ 2020-01-08 14:53 UTC (permalink / raw)
To: Lorenzo Bianconi; +Cc: netdev, brouer, davem, lorenzo.bianconi
Hi Lorenzo,
On Tue, Jan 07, 2020 at 04:30:32PM +0100, Lorenzo Bianconi wrote:
> Socionext driver can run on dma coherent and non-coherent devices.
> Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> now the driver can let page_pool API to managed needed DMA sync
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
> drivers/net/ethernet/socionext/netsec.c | 45 +++++++++++++++----------
> 1 file changed, 28 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
> index b5a9e947a4a8..00404fef17e8 100644
> --- a/drivers/net/ethernet/socionext/netsec.c
> +++ b/drivers/net/ethernet/socionext/netsec.c
> @@ -243,6 +243,7 @@
> NET_IP_ALIGN)
> #define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
> SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> +#define NETSEC_RX_BUF_SIZE (PAGE_SIZE - NETSEC_RX_BUF_NON_DATA)
>
> #define DESC_SZ sizeof(struct netsec_de)
>
> @@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
> {
>
> struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> - enum dma_data_direction dma_dir;
> struct page *page;
>
> page = page_pool_dev_alloc_pages(dring->page_pool);
> @@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
> /* Make sure the incoming payload fits in the page for XDP and non-XDP
> * cases and reserve enough space for headroom + skb_shared_info
> */
> - *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
> - dma_dir = page_pool_get_dma_dir(dring->page_pool);
> - dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
> + *desc_len = NETSEC_RX_BUF_SIZE;
>
> return page_address(page);
> }
> @@ -883,6 +881,7 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> struct xdp_buff *xdp)
> {
> + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> u32 ret = NETSEC_XDP_PASS;
> int err;
> u32 act;
> @@ -896,7 +895,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> case XDP_TX:
> ret = netsec_xdp_xmit_back(priv, xdp);
> if (ret != NETSEC_XDP_TX)
> - xdp_return_buff(xdp);
> + __page_pool_put_page(dring->page_pool,
> + virt_to_head_page(xdp->data),
> + xdp->data_end - xdp->data_hard_start,
Do we have to include data_hard_start?
@Jesper i know bpf programs can modify the packet, but isn't it safe
to only sync for xdp->data_end - xdp->data in this case since the DMA transfer
in this driver will always start *after* the XDP headroom?
> + true);
> break;
> case XDP_REDIRECT:
> err = xdp_do_redirect(priv->ndev, xdp, prog);
> @@ -904,7 +906,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> ret = NETSEC_XDP_REDIR;
> } else {
> ret = NETSEC_XDP_CONSUMED;
> - xdp_return_buff(xdp);
> + __page_pool_put_page(dring->page_pool,
> + virt_to_head_page(xdp->data),
> + xdp->data_end - xdp->data_hard_start,
Ditto
> + true);
> }
> break;
> default:
> @@ -915,7 +920,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> /* fall through -- handle aborts by dropping packet */
> case XDP_DROP:
> ret = NETSEC_XDP_CONSUMED;
> - xdp_return_buff(xdp);
> + __page_pool_put_page(dring->page_pool,
> + virt_to_head_page(xdp->data),
> + xdp->data_end - xdp->data_hard_start,
Ditto
> + true);
> break;
> }
>
> @@ -1014,7 +1022,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
> * cache state. Since we paid the allocation cost if
> * building an skb fails try to put the page into cache
> */
> - page_pool_recycle_direct(dring->page_pool, page);
> + __page_pool_put_page(dring->page_pool, page,
> + desc->len, true);
I think you can sync for pkt_len here only, since that's the area the DMA engine
touched. Syncing for desc->len, will sync the entire area (like the older
version did).
Keep in mind that the driver always starts the DMA transfer after
NETSEC_RXBUF_HEADROOM which is 192 or 256 bytes depending on XDP running or not.
So in the default SKB case if the pkt_len = 60 the
xdp.data_end - xdp.data_hard_start = 316.
I think it's safe here to only sync pkt_len, since noone will touch the XDP
headroom
> netif_err(priv, drv, priv->ndev,
> "rx failed to build skb\n");
> break;
> @@ -1272,17 +1281,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
> {
> struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
> - struct page_pool_params pp_params = { 0 };
> + struct page_pool_params pp_params = {
> + .order = 0,
> + /* internal DMA mapping in page_pool */
> + .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
> + .pool_size = DESC_NUM,
> + .nid = NUMA_NO_NODE,
> + .dev = priv->dev,
> + .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
> + .offset = NETSEC_RXBUF_HEADROOM,
> + .max_len = NETSEC_RX_BUF_SIZE,
> + };
> int i, err;
>
> - pp_params.order = 0;
> - /* internal DMA mapping in page_pool */
> - pp_params.flags = PP_FLAG_DMA_MAP;
> - pp_params.pool_size = DESC_NUM;
> - pp_params.nid = NUMA_NO_NODE;
> - pp_params.dev = priv->dev;
> - pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
> -
> dring->page_pool = page_pool_create(&pp_params);
> if (IS_ERR(dring->page_pool)) {
> err = PTR_ERR(dring->page_pool);
> --
> 2.21.1
>
Thanks
/Ilias
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
2020-01-08 14:53 ` Ilias Apalodimas
@ 2020-01-09 17:20 ` Jesper Dangaard Brouer
2020-01-09 17:29 ` Lorenzo Bianconi
0 siblings, 1 reply; 4+ messages in thread
From: Jesper Dangaard Brouer @ 2020-01-09 17:20 UTC (permalink / raw)
To: Ilias Apalodimas
Cc: Lorenzo Bianconi, netdev, davem, lorenzo.bianconi, brouer
On Wed, 8 Jan 2020 16:53:22 +0200
Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote:
> Hi Lorenzo,
>
> On Tue, Jan 07, 2020 at 04:30:32PM +0100, Lorenzo Bianconi wrote:
> > Socionext driver can run on dma coherent and non-coherent devices.
> > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > now the driver can let page_pool API to managed needed DMA sync
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> > drivers/net/ethernet/socionext/netsec.c | 45 +++++++++++++++----------
> > 1 file changed, 28 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
> > index b5a9e947a4a8..00404fef17e8 100644
> > --- a/drivers/net/ethernet/socionext/netsec.c
> > +++ b/drivers/net/ethernet/socionext/netsec.c
[...]
> > @@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
> > /* Make sure the incoming payload fits in the page for XDP and non-XDP
> > * cases and reserve enough space for headroom + skb_shared_info
> > */
> > - *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
> > - dma_dir = page_pool_get_dma_dir(dring->page_pool);
> > - dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
> > + *desc_len = NETSEC_RX_BUF_SIZE;
> >
> > return page_address(page);
> > }
> > @@ -883,6 +881,7 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > struct xdp_buff *xdp)
> > {
> > + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > u32 ret = NETSEC_XDP_PASS;
> > int err;
> > u32 act;
> > @@ -896,7 +895,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > case XDP_TX:
> > ret = netsec_xdp_xmit_back(priv, xdp);
> > if (ret != NETSEC_XDP_TX)
> > - xdp_return_buff(xdp);
> > + __page_pool_put_page(dring->page_pool,
> > + virt_to_head_page(xdp->data),
> > + xdp->data_end - xdp->data_hard_start,
>
> Do we have to include data_hard_start?
That does look wrong.
> @Jesper i know bpf programs can modify the packet, but isn't it safe
> to only sync for xdp->data_end - xdp->data in this case since the DMA transfer
> in this driver will always start *after* the XDP headroom?
I agree.
For performance it is actually important that we avoid "cache-flushing"
(which what happens on these non-coherent devices) the headroom. As the
headroom is used for e.g. storing xdp_frame.
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
2020-01-09 17:20 ` Jesper Dangaard Brouer
@ 2020-01-09 17:29 ` Lorenzo Bianconi
0 siblings, 0 replies; 4+ messages in thread
From: Lorenzo Bianconi @ 2020-01-09 17:29 UTC (permalink / raw)
To: Jesper Dangaard Brouer; +Cc: Ilias Apalodimas, Lorenzo Bianconi, netdev, davem
[-- Attachment #1: Type: text/plain, Size: 3101 bytes --]
> On Wed, 8 Jan 2020 16:53:22 +0200
> Ilias Apalodimas <ilias.apalodimas@linaro.org> wrote:
>
> > Hi Lorenzo,
> >
> > On Tue, Jan 07, 2020 at 04:30:32PM +0100, Lorenzo Bianconi wrote:
Hi Jesper and Ilias,
thx for the review :)
> > > Socionext driver can run on dma coherent and non-coherent devices.
> > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > now the driver can let page_pool API to managed needed DMA sync
> > >
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > ---
> > > drivers/net/ethernet/socionext/netsec.c | 45 +++++++++++++++----------
> > > 1 file changed, 28 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
> > > index b5a9e947a4a8..00404fef17e8 100644
> > > --- a/drivers/net/ethernet/socionext/netsec.c
> > > +++ b/drivers/net/ethernet/socionext/netsec.c
>
> [...]
> > > @@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
> > > /* Make sure the incoming payload fits in the page for XDP and non-XDP
> > > * cases and reserve enough space for headroom + skb_shared_info
> > > */
> > > - *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
> > > - dma_dir = page_pool_get_dma_dir(dring->page_pool);
> > > - dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
> > > + *desc_len = NETSEC_RX_BUF_SIZE;
> > >
> > > return page_address(page);
> > > }
> > > @@ -883,6 +881,7 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > struct xdp_buff *xdp)
> > > {
> > > + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > u32 ret = NETSEC_XDP_PASS;
> > > int err;
> > > u32 act;
> > > @@ -896,7 +895,10 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > case XDP_TX:
> > > ret = netsec_xdp_xmit_back(priv, xdp);
> > > if (ret != NETSEC_XDP_TX)
> > > - xdp_return_buff(xdp);
> > > + __page_pool_put_page(dring->page_pool,
> > > + virt_to_head_page(xdp->data),
> > > + xdp->data_end - xdp->data_hard_start,
> >
> > Do we have to include data_hard_start?
>
> That does look wrong.
ack, will fix it in v2
>
> > @Jesper i know bpf programs can modify the packet, but isn't it safe
> > to only sync for xdp->data_end - xdp->data in this case since the DMA transfer
> > in this driver will always start *after* the XDP headroom?
>
> I agree.
>
> For performance it is actually important that we avoid "cache-flushing"
> (which what happens on these non-coherent devices) the headroom. As the
> headroom is used for e.g. storing xdp_frame.
IIRC on mvneta there is the same issue. I will post a patch to fix it.
Regards,
Lorenzo
>
>
> --
> Best regards,
> Jesper Dangaard Brouer
> MSc.CS, Principal Kernel Engineer at Red Hat
> LinkedIn: http://www.linkedin.com/in/brouer
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-01-09 17:30 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-07 15:30 [PATCH] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data Lorenzo Bianconi
2020-01-08 14:53 ` Ilias Apalodimas
2020-01-09 17:20 ` Jesper Dangaard Brouer
2020-01-09 17:29 ` Lorenzo Bianconi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.