All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
@ 2020-01-10 13:57 Lorenzo Bianconi
  2020-01-10 14:56 ` Ilias Apalodimas
  2020-01-14  2:11 ` Jakub Kicinski
  0 siblings, 2 replies; 10+ messages in thread
From: Lorenzo Bianconi @ 2020-01-10 13:57 UTC (permalink / raw)
  To: ilias.apalodimas; +Cc: netdev, brouer, davem, lorenzo.bianconi

Socionext driver can run on dma coherent and non-coherent devices.
Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
now the driver can let page_pool API to managed needed DMA sync

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
Changes since v1:
- rely on original frame size for dma sync
---
 drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index b5a9e947a4a8..45c76b437457 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -243,6 +243,7 @@
 			       NET_IP_ALIGN)
 #define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
 				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define NETSEC_RX_BUF_SIZE	(PAGE_SIZE - NETSEC_RX_BUF_NON_DATA)
 
 #define DESC_SZ	sizeof(struct netsec_de)
 
@@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
 {
 
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	enum dma_data_direction dma_dir;
 	struct page *page;
 
 	page = page_pool_dev_alloc_pages(dring->page_pool);
@@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
 	/* Make sure the incoming payload fits in the page for XDP and non-XDP
 	 * cases and reserve enough space for headroom + skb_shared_info
 	 */
-	*desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
-	dma_dir = page_pool_get_dma_dir(dring->page_pool);
-	dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
+	*desc_len = NETSEC_RX_BUF_SIZE;
 
 	return page_address(page);
 }
@@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
 static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			  struct xdp_buff *xdp)
 {
+	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
+	unsigned int len = xdp->data_end - xdp->data;
 	u32 ret = NETSEC_XDP_PASS;
 	int err;
 	u32 act;
@@ -896,7 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 	case XDP_TX:
 		ret = netsec_xdp_xmit_back(priv, xdp);
 		if (ret != NETSEC_XDP_TX)
-			xdp_return_buff(xdp);
+			__page_pool_put_page(dring->page_pool,
+				     virt_to_head_page(xdp->data),
+				     len, true);
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(priv->ndev, xdp, prog);
@@ -904,7 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			ret = NETSEC_XDP_REDIR;
 		} else {
 			ret = NETSEC_XDP_CONSUMED;
-			xdp_return_buff(xdp);
+			__page_pool_put_page(dring->page_pool,
+				     virt_to_head_page(xdp->data),
+				     len, true);
 		}
 		break;
 	default:
@@ -915,7 +919,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 		/* fall through -- handle aborts by dropping packet */
 	case XDP_DROP:
 		ret = NETSEC_XDP_CONSUMED;
-		xdp_return_buff(xdp);
+		__page_pool_put_page(dring->page_pool,
+				     virt_to_head_page(xdp->data),
+				     len, true);
 		break;
 	}
 
@@ -1014,7 +1020,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 			 * cache state. Since we paid the allocation cost if
 			 * building an skb fails try to put the page into cache
 			 */
-			page_pool_recycle_direct(dring->page_pool, page);
+			__page_pool_put_page(dring->page_pool, page,
+					     pkt_len, true);
 			netif_err(priv, drv, priv->ndev,
 				  "rx failed to build skb\n");
 			break;
@@ -1272,17 +1279,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
 	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
-	struct page_pool_params pp_params = { 0 };
+	struct page_pool_params pp_params = {
+		.order = 0,
+		/* internal DMA mapping in page_pool */
+		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.pool_size = DESC_NUM,
+		.nid = NUMA_NO_NODE,
+		.dev = priv->dev,
+		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
+		.offset = NETSEC_RXBUF_HEADROOM,
+		.max_len = NETSEC_RX_BUF_SIZE,
+	};
 	int i, err;
 
-	pp_params.order = 0;
-	/* internal DMA mapping in page_pool */
-	pp_params.flags = PP_FLAG_DMA_MAP;
-	pp_params.pool_size = DESC_NUM;
-	pp_params.nid = NUMA_NO_NODE;
-	pp_params.dev = priv->dev;
-	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
-
 	dring->page_pool = page_pool_create(&pp_params);
 	if (IS_ERR(dring->page_pool)) {
 		err = PTR_ERR(dring->page_pool);
-- 
2.21.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 13:57 [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data Lorenzo Bianconi
@ 2020-01-10 14:56 ` Ilias Apalodimas
  2020-01-10 15:34   ` Lorenzo Bianconi
  2020-01-14  2:11 ` Jakub Kicinski
  1 sibling, 1 reply; 10+ messages in thread
From: Ilias Apalodimas @ 2020-01-10 14:56 UTC (permalink / raw)
  To: Lorenzo Bianconi; +Cc: netdev, brouer, davem, lorenzo.bianconi

On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:
> Socionext driver can run on dma coherent and non-coherent devices.
> Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> now the driver can let page_pool API to managed needed DMA sync
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
> Changes since v1:
> - rely on original frame size for dma sync
> ---
>  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
>  1 file changed, 26 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
> index b5a9e947a4a8..45c76b437457 100644
> --- a/drivers/net/ethernet/socionext/netsec.c
> +++ b/drivers/net/ethernet/socionext/netsec.c
> @@ -243,6 +243,7 @@
>  			       NET_IP_ALIGN)
>  #define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \
>  				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
> +#define NETSEC_RX_BUF_SIZE	(PAGE_SIZE - NETSEC_RX_BUF_NON_DATA)
>  
>  #define DESC_SZ	sizeof(struct netsec_de)
>  
> @@ -719,7 +720,6 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
>  {
>  
>  	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> -	enum dma_data_direction dma_dir;
>  	struct page *page;
>  
>  	page = page_pool_dev_alloc_pages(dring->page_pool);
> @@ -734,9 +734,7 @@ static void *netsec_alloc_rx_data(struct netsec_priv *priv,
>  	/* Make sure the incoming payload fits in the page for XDP and non-XDP
>  	 * cases and reserve enough space for headroom + skb_shared_info
>  	 */
> -	*desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA;
> -	dma_dir = page_pool_get_dma_dir(dring->page_pool);
> -	dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir);
> +	*desc_len = NETSEC_RX_BUF_SIZE;
>  
>  	return page_address(page);
>  }
> @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
>  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
>  			  struct xdp_buff *xdp)
>  {
> +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> +	unsigned int len = xdp->data_end - xdp->data;

We need to account for XDP expanding the headers as well here. 
So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
xdp->data (original)

>  	u32 ret = NETSEC_XDP_PASS;
>  	int err;
>  	u32 act;
> @@ -896,7 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
>  	case XDP_TX:
>  		ret = netsec_xdp_xmit_back(priv, xdp);
>  		if (ret != NETSEC_XDP_TX)
> -			xdp_return_buff(xdp);
> +			__page_pool_put_page(dring->page_pool,
> +				     virt_to_head_page(xdp->data),
> +				     len, true);
>  		break;
>  	case XDP_REDIRECT:
>  		err = xdp_do_redirect(priv->ndev, xdp, prog);
> @@ -904,7 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
>  			ret = NETSEC_XDP_REDIR;
>  		} else {
>  			ret = NETSEC_XDP_CONSUMED;
> -			xdp_return_buff(xdp);
> +			__page_pool_put_page(dring->page_pool,
> +				     virt_to_head_page(xdp->data),
> +				     len, true);
>  		}
>  		break;
>  	default:
> @@ -915,7 +919,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
>  		/* fall through -- handle aborts by dropping packet */
>  	case XDP_DROP:
>  		ret = NETSEC_XDP_CONSUMED;
> -		xdp_return_buff(xdp);
> +		__page_pool_put_page(dring->page_pool,
> +				     virt_to_head_page(xdp->data),
> +				     len, true);
>  		break;
>  	}
>  
> @@ -1014,7 +1020,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
>  			 * cache state. Since we paid the allocation cost if
>  			 * building an skb fails try to put the page into cache
>  			 */
> -			page_pool_recycle_direct(dring->page_pool, page);
> +			__page_pool_put_page(dring->page_pool, page,
> +					     pkt_len, true);

Same here, a bpf prog with XDP_PASS verdict might change lenghts

>  			netif_err(priv, drv, priv->ndev,
>  				  "rx failed to build skb\n");
>  			break;
> @@ -1272,17 +1279,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
>  {
>  	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
>  	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
> -	struct page_pool_params pp_params = { 0 };
> +	struct page_pool_params pp_params = {
> +		.order = 0,
> +		/* internal DMA mapping in page_pool */
> +		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
> +		.pool_size = DESC_NUM,
> +		.nid = NUMA_NO_NODE,
> +		.dev = priv->dev,
> +		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
> +		.offset = NETSEC_RXBUF_HEADROOM,
> +		.max_len = NETSEC_RX_BUF_SIZE,
> +	};
>  	int i, err;
>  
> -	pp_params.order = 0;
> -	/* internal DMA mapping in page_pool */
> -	pp_params.flags = PP_FLAG_DMA_MAP;
> -	pp_params.pool_size = DESC_NUM;
> -	pp_params.nid = NUMA_NO_NODE;
> -	pp_params.dev = priv->dev;
> -	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
> -
>  	dring->page_pool = page_pool_create(&pp_params);
>  	if (IS_ERR(dring->page_pool)) {
>  		err = PTR_ERR(dring->page_pool);
> -- 
> 2.21.1
> 

Thanks
/Ilias

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 14:56 ` Ilias Apalodimas
@ 2020-01-10 15:34   ` Lorenzo Bianconi
  2020-01-10 17:33     ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 10+ messages in thread
From: Lorenzo Bianconi @ 2020-01-10 15:34 UTC (permalink / raw)
  To: Ilias Apalodimas; +Cc: netdev, brouer, davem, lorenzo.bianconi

[-- Attachment #1: Type: text/plain, Size: 4351 bytes --]

> On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:
> > Socionext driver can run on dma coherent and non-coherent devices.
> > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > now the driver can let page_pool API to managed needed DMA sync
> > 
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> > Changes since v1:
> > - rely on original frame size for dma sync
> > ---
> >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> >  1 file changed, 26 insertions(+), 17 deletions(-)
> > 

[...]

> > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> >  			  struct xdp_buff *xdp)
> >  {
> > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > +	unsigned int len = xdp->data_end - xdp->data;
> 
> We need to account for XDP expanding the headers as well here. 
> So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> xdp->data (original)

correct, the corner case that is not covered at the moment is when data_end is
moved forward by the bpf program. I will fix it in v3. Thx

Regards,
Lorenzo

> 
> >  	u32 ret = NETSEC_XDP_PASS;
> >  	int err;
> >  	u32 act;
> > @@ -896,7 +896,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> >  	case XDP_TX:
> >  		ret = netsec_xdp_xmit_back(priv, xdp);
> >  		if (ret != NETSEC_XDP_TX)
> > -			xdp_return_buff(xdp);
> > +			__page_pool_put_page(dring->page_pool,
> > +				     virt_to_head_page(xdp->data),
> > +				     len, true);
> >  		break;
> >  	case XDP_REDIRECT:
> >  		err = xdp_do_redirect(priv->ndev, xdp, prog);
> > @@ -904,7 +906,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> >  			ret = NETSEC_XDP_REDIR;
> >  		} else {
> >  			ret = NETSEC_XDP_CONSUMED;
> > -			xdp_return_buff(xdp);
> > +			__page_pool_put_page(dring->page_pool,
> > +				     virt_to_head_page(xdp->data),
> > +				     len, true);
> >  		}
> >  		break;
> >  	default:
> > @@ -915,7 +919,9 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> >  		/* fall through -- handle aborts by dropping packet */
> >  	case XDP_DROP:
> >  		ret = NETSEC_XDP_CONSUMED;
> > -		xdp_return_buff(xdp);
> > +		__page_pool_put_page(dring->page_pool,
> > +				     virt_to_head_page(xdp->data),
> > +				     len, true);
> >  		break;
> >  	}
> >  
> > @@ -1014,7 +1020,8 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
> >  			 * cache state. Since we paid the allocation cost if
> >  			 * building an skb fails try to put the page into cache
> >  			 */
> > -			page_pool_recycle_direct(dring->page_pool, page);
> > +			__page_pool_put_page(dring->page_pool, page,
> > +					     pkt_len, true);
> 
> Same here, a bpf prog with XDP_PASS verdict might change lenghts
> 
> >  			netif_err(priv, drv, priv->ndev,
> >  				  "rx failed to build skb\n");
> >  			break;
> > @@ -1272,17 +1279,19 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
> >  {
> >  	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> >  	struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog);
> > -	struct page_pool_params pp_params = { 0 };
> > +	struct page_pool_params pp_params = {
> > +		.order = 0,
> > +		/* internal DMA mapping in page_pool */
> > +		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
> > +		.pool_size = DESC_NUM,
> > +		.nid = NUMA_NO_NODE,
> > +		.dev = priv->dev,
> > +		.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
> > +		.offset = NETSEC_RXBUF_HEADROOM,
> > +		.max_len = NETSEC_RX_BUF_SIZE,
> > +	};
> >  	int i, err;
> >  
> > -	pp_params.order = 0;
> > -	/* internal DMA mapping in page_pool */
> > -	pp_params.flags = PP_FLAG_DMA_MAP;
> > -	pp_params.pool_size = DESC_NUM;
> > -	pp_params.nid = NUMA_NO_NODE;
> > -	pp_params.dev = priv->dev;
> > -	pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
> > -
> >  	dring->page_pool = page_pool_create(&pp_params);
> >  	if (IS_ERR(dring->page_pool)) {
> >  		err = PTR_ERR(dring->page_pool);
> > -- 
> > 2.21.1
> > 
> 
> Thanks
> /Ilias

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 15:34   ` Lorenzo Bianconi
@ 2020-01-10 17:33     ` Jesper Dangaard Brouer
  2020-01-10 18:19       ` Lorenzo Bianconi
  0 siblings, 1 reply; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-01-10 17:33 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Ilias Apalodimas, netdev, davem, lorenzo.bianconi, brouer

On Fri, 10 Jan 2020 16:34:13 +0100
Lorenzo Bianconi <lorenzo@kernel.org> wrote:

> > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:  
> > > Socionext driver can run on dma coherent and non-coherent devices.
> > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > now the driver can let page_pool API to managed needed DMA sync
> > > 
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > ---
> > > Changes since v1:
> > > - rely on original frame size for dma sync
> > > ---
> > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > >   
> 
> [...]
> 
> > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > >  			  struct xdp_buff *xdp)
> > >  {
> > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > +	unsigned int len = xdp->data_end - xdp->data;  
> > 
> > We need to account for XDP expanding the headers as well here. 
> > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > xdp->data (original)  
> 
> correct, the corner case that is not covered at the moment is when data_end is
> moved forward by the bpf program. I will fix it in v3. Thx

Maybe we can simplify do:

 void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
 unsigned int len = xdp->data_end - data_start;

The cache-lines that need to be flushed/synced for_device is the area
used by NIC DMA engine.  We know it will always start at a certain
point (given driver configured hardware to this).

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 17:33     ` Jesper Dangaard Brouer
@ 2020-01-10 18:19       ` Lorenzo Bianconi
  2020-01-10 19:01         ` Jesper Dangaard Brouer
  0 siblings, 1 reply; 10+ messages in thread
From: Lorenzo Bianconi @ 2020-01-10 18:19 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Lorenzo Bianconi, Ilias Apalodimas, netdev, davem

[-- Attachment #1: Type: text/plain, Size: 2290 bytes --]

> On Fri, 10 Jan 2020 16:34:13 +0100
> Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> 
> > > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:  
> > > > Socionext driver can run on dma coherent and non-coherent devices.
> > > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > > now the driver can let page_pool API to managed needed DMA sync
> > > > 
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > ---
> > > > Changes since v1:
> > > > - rely on original frame size for dma sync
> > > > ---
> > > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > > >   
> > 
> > [...]
> > 
> > > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > >  			  struct xdp_buff *xdp)
> > > >  {
> > > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > > +	unsigned int len = xdp->data_end - xdp->data;  
> > > 
> > > We need to account for XDP expanding the headers as well here. 
> > > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > > xdp->data (original)  
> > 
> > correct, the corner case that is not covered at the moment is when data_end is
> > moved forward by the bpf program. I will fix it in v3. Thx
> 
> Maybe we can simplify do:
> 
>  void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
>  unsigned int len = xdp->data_end - data_start;
> 

Hi Jesper,

please correct me if I am wrong but this seems to me the same as v2. The leftover
corner case is if xdp->data_end is moved 'forward' by the bpf program (I guess
it is possible, right?). In this case we will not sync xdp->data_end(new) - xdp->data_end(old)

Regards,
Lorenzo

> The cache-lines that need to be flushed/synced for_device is the area
> used by NIC DMA engine.  We know it will always start at a certain
> point (given driver configured hardware to this).
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 18:19       ` Lorenzo Bianconi
@ 2020-01-10 19:01         ` Jesper Dangaard Brouer
  2020-01-10 19:19           ` Ilias Apalodimas
  2020-01-10 19:36           ` Lorenzo Bianconi
  0 siblings, 2 replies; 10+ messages in thread
From: Jesper Dangaard Brouer @ 2020-01-10 19:01 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Lorenzo Bianconi, Ilias Apalodimas, netdev, davem, brouer

On Fri, 10 Jan 2020 19:19:40 +0100
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:

> > On Fri, 10 Jan 2020 16:34:13 +0100
> > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> >   
> > > > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:    
> > > > > Socionext driver can run on dma coherent and non-coherent devices.
> > > > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > > > now the driver can let page_pool API to managed needed DMA sync
> > > > > 
> > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > > ---
> > > > > Changes since v1:
> > > > > - rely on original frame size for dma sync
> > > > > ---
> > > > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > > > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > > > >     
> > > 
> > > [...]
> > >   
> > > > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > > >  			  struct xdp_buff *xdp)
> > > > >  {
> > > > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > > > +	unsigned int len = xdp->data_end - xdp->data;    
> > > > 
> > > > We need to account for XDP expanding the headers as well here. 
> > > > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > > > xdp->data (original)    
> > > 
> > > correct, the corner case that is not covered at the moment is when data_end is
> > > moved forward by the bpf program. I will fix it in v3. Thx  
> > 
> > Maybe we can simplify do:
> > 
> >  void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
> >  unsigned int len = xdp->data_end - data_start;
> >   
> 
> Hi Jesper,
> 
> please correct me if I am wrong but this seems to me the same as v2.

No, this is v2, where you do:
   len = xdp->data_end - xdp->data;

Maybe you mean v1? where you calc len like:
   len = xdp->data_end - xdp->data_hard_start;
   

> The leftover corner case is if xdp->data_end is moved 'forward' by
> the bpf program (I guess it is possible, right?). In this case we
> will not sync xdp->data_end(new) - xdp->data_end(old)

Currently xdp->data_end can only shrink (but I plan to extend it). Yes,
this corner case is left, but I don't think we need to handle it.  When
a BPF prog shrink xdp->data_end, then i believe it cannot change that
part the shunk part any longer.


> 
> > The cache-lines that need to be flushed/synced for_device is the area
> > used by NIC DMA engine.  We know it will always start at a certain
> > point (given driver configured hardware to this).


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 19:01         ` Jesper Dangaard Brouer
@ 2020-01-10 19:19           ` Ilias Apalodimas
  2020-01-10 19:36           ` Lorenzo Bianconi
  1 sibling, 0 replies; 10+ messages in thread
From: Ilias Apalodimas @ 2020-01-10 19:19 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Lorenzo Bianconi, Lorenzo Bianconi, netdev, davem

On Fri, Jan 10, 2020 at 08:01:56PM +0100, Jesper Dangaard Brouer wrote:
> On Fri, 10 Jan 2020 19:19:40 +0100
> Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
> 
> > > On Fri, 10 Jan 2020 16:34:13 +0100
> > > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> > >   
> > > > > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:    
> > > > > > Socionext driver can run on dma coherent and non-coherent devices.
> > > > > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > > > > now the driver can let page_pool API to managed needed DMA sync
> > > > > > 
> > > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > > > ---
> > > > > > Changes since v1:
> > > > > > - rely on original frame size for dma sync
> > > > > > ---
> > > > > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > > > > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > > > > >     
> > > > 
> > > > [...]
> > > >   
> > > > > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > > > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > > > >  			  struct xdp_buff *xdp)
> > > > > >  {
> > > > > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > > > > +	unsigned int len = xdp->data_end - xdp->data;    
> > > > > 
> > > > > We need to account for XDP expanding the headers as well here. 
> > > > > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > > > > xdp->data (original)    
> > > > 
> > > > correct, the corner case that is not covered at the moment is when data_end is
> > > > moved forward by the bpf program. I will fix it in v3. Thx  
> > > 
> > > Maybe we can simplify do:
> > > 
> > >  void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
> > >  unsigned int len = xdp->data_end - data_start;
> > >   
> > 
> > Hi Jesper,
> > 
> > please correct me if I am wrong but this seems to me the same as v2.
> 
> No, this is v2, where you do:
>    len = xdp->data_end - xdp->data;
> 
> Maybe you mean v1? where you calc len like:
>    len = xdp->data_end - xdp->data_hard_start;
>    
> 
> > The leftover corner case is if xdp->data_end is moved 'forward' by
> > the bpf program (I guess it is possible, right?). In this case we
> > will not sync xdp->data_end(new) - xdp->data_end(old)
> 
> Currently xdp->data_end can only shrink (but I plan to extend it). Yes,
> this corner case is left, but I don't think we need to handle it.  When
> a BPF prog shrink xdp->data_end, then i believe it cannot change that
> part the shunk part any longer.
> 

What about a bpf prog that adds a vlan header for example?
Won't that push extra bytes in the memory the NIC will potentially will write
the next packet, once the memory is recycled?

Regards
/Ilias
> 
> > 
> > > The cache-lines that need to be flushed/synced for_device is the area
> > > used by NIC DMA engine.  We know it will always start at a certain
> > > point (given driver configured hardware to this).
> 
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 19:01         ` Jesper Dangaard Brouer
  2020-01-10 19:19           ` Ilias Apalodimas
@ 2020-01-10 19:36           ` Lorenzo Bianconi
  2020-01-13 10:39             ` Ilias Apalodimas
  1 sibling, 1 reply; 10+ messages in thread
From: Lorenzo Bianconi @ 2020-01-10 19:36 UTC (permalink / raw)
  To: Jesper Dangaard Brouer; +Cc: Lorenzo Bianconi, Ilias Apalodimas, netdev, davem

[-- Attachment #1: Type: text/plain, Size: 3506 bytes --]

> On Fri, 10 Jan 2020 19:19:40 +0100
> Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
> 
> > > On Fri, 10 Jan 2020 16:34:13 +0100
> > > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> > >   
> > > > > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:    
> > > > > > Socionext driver can run on dma coherent and non-coherent devices.
> > > > > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > > > > now the driver can let page_pool API to managed needed DMA sync
> > > > > > 
> > > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > > > ---
> > > > > > Changes since v1:
> > > > > > - rely on original frame size for dma sync
> > > > > > ---
> > > > > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > > > > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > > > > >     
> > > > 
> > > > [...]
> > > >   
> > > > > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > > > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > > > >  			  struct xdp_buff *xdp)
> > > > > >  {
> > > > > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > > > > +	unsigned int len = xdp->data_end - xdp->data;    
> > > > > 
> > > > > We need to account for XDP expanding the headers as well here. 
> > > > > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > > > > xdp->data (original)    
> > > > 
> > > > correct, the corner case that is not covered at the moment is when data_end is
> > > > moved forward by the bpf program. I will fix it in v3. Thx  
> > > 
> > > Maybe we can simplify do:
> > > 
> > >  void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
> > >  unsigned int len = xdp->data_end - data_start;
> > >   
> > 
> > Hi Jesper,
> > 
> > please correct me if I am wrong but this seems to me the same as v2.
> 
> No, this is v2, where you do:
>    len = xdp->data_end - xdp->data;

I mean in the solution you proposed you set (before running the bpf program):

len = xdp->data_end - data_start
where:
data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start

that is equivalent to what I did in v2 (before running the bpf program):
len = xdp->data_end - xdp->data

since:
xdp->data = xdp->data_hard_start + NETSEC_RXBUF_HEADROOM
(set in netsec_process_rx())

Am I missing something?

> 
> Maybe you mean v1? where you calc len like:
>    len = xdp->data_end - xdp->data_hard_start;
>    
> 
> > The leftover corner case is if xdp->data_end is moved 'forward' by
> > the bpf program (I guess it is possible, right?). In this case we
> > will not sync xdp->data_end(new) - xdp->data_end(old)
> 
> Currently xdp->data_end can only shrink (but I plan to extend it). Yes,
> this corner case is left, but I don't think we need to handle it.  When
> a BPF prog shrink xdp->data_end, then i believe it cannot change that
> part the shunk part any longer.
> 

ack, fine to me.

Regards,
Lorenzo

> 
> > 
> > > The cache-lines that need to be flushed/synced for_device is the area
> > > used by NIC DMA engine.  We know it will always start at a certain
> > > point (given driver configured hardware to this).
> 
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 19:36           ` Lorenzo Bianconi
@ 2020-01-13 10:39             ` Ilias Apalodimas
  0 siblings, 0 replies; 10+ messages in thread
From: Ilias Apalodimas @ 2020-01-13 10:39 UTC (permalink / raw)
  To: Lorenzo Bianconi; +Cc: Jesper Dangaard Brouer, Lorenzo Bianconi, netdev, davem

On Fri, Jan 10, 2020 at 08:36:51PM +0100, Lorenzo Bianconi wrote:
> > On Fri, 10 Jan 2020 19:19:40 +0100
> > Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote:
> > 
> > > > On Fri, 10 Jan 2020 16:34:13 +0100
> > > > Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> > > >   
> > > > > > On Fri, Jan 10, 2020 at 02:57:44PM +0100, Lorenzo Bianconi wrote:    
> > > > > > > Socionext driver can run on dma coherent and non-coherent devices.
> > > > > > > Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> > > > > > > now the driver can let page_pool API to managed needed DMA sync
> > > > > > > 
> > > > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > > > > ---
> > > > > > > Changes since v1:
> > > > > > > - rely on original frame size for dma sync
> > > > > > > ---
> > > > > > >  drivers/net/ethernet/socionext/netsec.c | 43 +++++++++++++++----------
> > > > > > >  1 file changed, 26 insertions(+), 17 deletions(-)
> > > > > > >     
> > > > > 
> > > > > [...]
> > > > >   
> > > > > > > @@ -883,6 +881,8 @@ static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
> > > > > > >  static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
> > > > > > >  			  struct xdp_buff *xdp)
> > > > > > >  {
> > > > > > > +	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
> > > > > > > +	unsigned int len = xdp->data_end - xdp->data;    
> > > > > > 
> > > > > > We need to account for XDP expanding the headers as well here. 
> > > > > > So something like max(xdp->data_end(before bpf), xdp->data_end(after bpf)) -
> > > > > > xdp->data (original)    
> > > > > 
> > > > > correct, the corner case that is not covered at the moment is when data_end is
> > > > > moved forward by the bpf program. I will fix it in v3. Thx  
> > > > 
> > > > Maybe we can simplify do:
> > > > 
> > > >  void *data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start;
> > > >  unsigned int len = xdp->data_end - data_start;
> > > >   
> > > 
> > > Hi Jesper,
> > > 
> > > please correct me if I am wrong but this seems to me the same as v2.
> > 
> > No, this is v2, where you do:
> >    len = xdp->data_end - xdp->data;
> 
> I mean in the solution you proposed you set (before running the bpf program):
> 
> len = xdp->data_end - data_start
> where:
> data_start = NETSEC_RXBUF_HEADROOM + xdp->data_hard_start
> 
> that is equivalent to what I did in v2 (before running the bpf program):
> len = xdp->data_end - xdp->data
> 
> since:
> xdp->data = xdp->data_hard_start + NETSEC_RXBUF_HEADROOM
> (set in netsec_process_rx())
> 
> Am I missing something?
> 
> > 
> > Maybe you mean v1? where you calc len like:
> >    len = xdp->data_end - xdp->data_hard_start;
> >    
> > 
> > > The leftover corner case is if xdp->data_end is moved 'forward' by
> > > the bpf program (I guess it is possible, right?). In this case we
> > > will not sync xdp->data_end(new) - xdp->data_end(old)
> > 
> > Currently xdp->data_end can only shrink (but I plan to extend it). Yes,
> > this corner case is left, but I don't think we need to handle it.  When
> > a BPF prog shrink xdp->data_end, then i believe it cannot change that
> > part the shunk part any longer.
> > 

Ok, i thought it could expand as well.
If that's the case the current patchset is ok

> 
> ack, fine to me.
> 
> Regards,
> Lorenzo
> 
> > 
> > > 
> > > > The cache-lines that need to be flushed/synced for_device is the area
> > > > used by NIC DMA engine.  We know it will always start at a certain
> > > > point (given driver configured hardware to this).
> > 
> > 
> > -- 
> > Best regards,
> >   Jesper Dangaard Brouer
> >   MSc.CS, Principal Kernel Engineer at Red Hat
> >   LinkedIn: http://www.linkedin.com/in/brouer
> > 

Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data
  2020-01-10 13:57 [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data Lorenzo Bianconi
  2020-01-10 14:56 ` Ilias Apalodimas
@ 2020-01-14  2:11 ` Jakub Kicinski
  1 sibling, 0 replies; 10+ messages in thread
From: Jakub Kicinski @ 2020-01-14  2:11 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: ilias.apalodimas, netdev, brouer, davem, lorenzo.bianconi

On Fri, 10 Jan 2020 14:57:44 +0100, Lorenzo Bianconi wrote:
> Socionext driver can run on dma coherent and non-coherent devices.
> Get rid of huge dma_sync_single_for_device in netsec_alloc_rx_data since
> now the driver can let page_pool API to managed needed DMA sync
> 
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

Looks like this is good to be applied, after all. Could you fix
the misaligned continuation lines (checkpatch will guide you to them)
and repost (giving folks last chance to object)?

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-01-14  2:11 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-10 13:57 [PATCH v2 net-next] net: socionext: get rid of huge dma sync in netsec_alloc_rx_data Lorenzo Bianconi
2020-01-10 14:56 ` Ilias Apalodimas
2020-01-10 15:34   ` Lorenzo Bianconi
2020-01-10 17:33     ` Jesper Dangaard Brouer
2020-01-10 18:19       ` Lorenzo Bianconi
2020-01-10 19:01         ` Jesper Dangaard Brouer
2020-01-10 19:19           ` Ilias Apalodimas
2020-01-10 19:36           ` Lorenzo Bianconi
2020-01-13 10:39             ` Ilias Apalodimas
2020-01-14  2:11 ` Jakub Kicinski

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.