From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ilias Apalodimas Subject: [net-next, PATCH 2/2, v3] net: socionext: add XDP support Date: Sat, 29 Sep 2018 14:28:02 +0300 Message-ID: <1538220482-16129-3-git-send-email-ilias.apalodimas@linaro.org> References: <1538220482-16129-1-git-send-email-ilias.apalodimas@linaro.org> Cc: ard.biesheuvel@linaro.org, masami.hiramatsu@linaro.org, arnd@arndb.de, bjorn.topel@intel.com, magnus.karlsson@intel.com, brouer@redhat.com, daniel@iogearbox.net, ast@kernel.org, jesus.sanchez-palencia@intel.com, vinicius.gomes@intel.com, makita.toshiaki@lab.ntt.co.jp, Ilias Apalodimas To: netdev@vger.kernel.org, jaswinder.singh@linaro.org Return-path: Received: from mail-wr1-f68.google.com ([209.85.221.68]:42029 "EHLO mail-wr1-f68.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727865AbeI2R4W (ORCPT ); Sat, 29 Sep 2018 13:56:22 -0400 Received: by mail-wr1-f68.google.com with SMTP id b11-v6so8895755wru.9 for ; Sat, 29 Sep 2018 04:28:15 -0700 (PDT) In-Reply-To: <1538220482-16129-1-git-send-email-ilias.apalodimas@linaro.org> Sender: netdev-owner@vger.kernel.org List-ID: Add basic XDP support. The interface only supports 1 Tx queue for now so locking is introduced on the Tx queue if XDP is enabled to make sure .ndo_start_xmit and .ndo_xdp_xmit won't corrupt Tx ring Signed-off-by: Ilias Apalodimas --- drivers/net/ethernet/socionext/netsec.c | 345 +++++++++++++++++++++++++++++--- 1 file changed, 318 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 8f788a1..2b29363 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -9,6 +9,9 @@ #include #include #include +#include +#include +#include #include #include @@ -238,6 +241,15 @@ #define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000) +#define NETSEC_XDP_PASS 0 +#define NETSEC_XDP_CONSUMED BIT(0) +#define NETSEC_XDP_TX BIT(1) +#define NETSEC_XDP_REDIR BIT(2) +#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR) + +#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ + NET_IP_ALIGN) + enum ring_id { NETSEC_RING_TX = 0, NETSEC_RING_RX @@ -256,11 +268,16 @@ struct netsec_desc_ring { void *vaddr; u16 pkt_cnt; u16 head, tail; + u16 xdp_xmit; /* netsec_xdp_xmit packets */ + bool is_xdp; + struct xdp_rxq_info xdp_rxq; + spinlock_t lock; /* XDP tx queue locking */ }; struct netsec_priv { struct netsec_desc_ring desc_ring[NETSEC_RING_MAX]; struct ethtool_coalesce et_coalesce; + struct bpf_prog *xdp_prog; spinlock_t reglock; /* protect reg access */ struct napi_struct napi; phy_interface_t phy_interface; @@ -297,6 +314,8 @@ struct netsec_rx_pkt_info { }; static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num); +static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, + struct xdp_buff *xdp); static void *netsec_alloc_rx_data(struct netsec_priv *priv, dma_addr_t *dma_addr, u16 *len); @@ -590,6 +609,8 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; unsigned int pkts, bytes; + if (dring->is_xdp) + spin_lock(&dring->lock); dring->pkt_cnt += netsec_read(priv, NETSEC_REG_NRM_TX_DONE_PKTCNT); if (dring->pkt_cnt < budget) @@ -615,13 +636,23 @@ static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget) dma_unmap_single(priv->dev, desc->dma_addr, desc->len, DMA_TO_DEVICE); - if (eop) { - pkts++; + + if (!eop) { + *desc = (struct netsec_desc){}; + continue; + } + + if (!desc->skb) { + skb_free_frag(desc->addr); + } else { bytes += desc->skb->len; dev_kfree_skb(desc->skb); } + pkts++; *desc = (struct netsec_desc){}; } + if (dring->is_xdp) + spin_unlock(&dring->lock); dring->pkt_cnt -= budget; priv->ndev->stats.tx_packets += budget; @@ -656,11 +687,30 @@ static void netsec_adv_desc(u16 *idx) *idx = 0; } +static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts) +{ + if (likely(pkts)) + netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts); +} + +static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res, + u16 pkts) +{ + if (xdp_res & NETSEC_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_res & NETSEC_XDP_TX) + netsec_xdp_ring_tx_db(priv, pkts); +} + static int netsec_process_rx(struct netsec_priv *priv, int budget) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); struct net_device *ndev = priv->ndev; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + u16 xdp_xmit = 0; + u32 xdp_act = 0; int done = 0; while (done < budget) { @@ -668,8 +718,10 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); struct netsec_desc *desc = &dring->desc[idx]; struct netsec_rx_pkt_info rpi; - u16 pkt_len, desc_len; + u32 xdp_result = XDP_PASS; dma_addr_t dma_handle; + u16 pkt_len, desc_len; + struct xdp_buff xdp; void *buf_addr; if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) @@ -706,7 +758,23 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) DMA_FROM_DEVICE); prefetch(desc->addr); - skb = build_skb(desc->addr, desc->len); + xdp.data_hard_start = desc->addr; + xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM; + xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + pkt_len; + xdp.rxq = &dring->xdp_rxq; + + if (xdp_prog) { + xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp); + if (xdp_result != NETSEC_XDP_PASS) { + xdp_act |= xdp_result; + if (xdp_result == NETSEC_XDP_TX) + xdp_xmit++; + goto next; + } + } + + skb = build_skb(xdp.data_hard_start, desc->len); if (unlikely(!skb)) { dma_unmap_single(priv->dev, dma_handle, desc_len, DMA_TO_DEVICE); @@ -715,30 +783,35 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) "rx failed to alloc skb\n"); break; } - dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len, - DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - /* Update the descriptor with fresh buffers */ - desc->len = desc_len; - desc->dma_addr = dma_handle; - desc->addr = buf_addr; - - skb_put(skb, pkt_len); + skb_reserve(skb, xdp.data - xdp.data_hard_start); + skb_put(skb, xdp.data_end - xdp.data); skb->protocol = eth_type_trans(skb, priv->ndev); if (priv->rx_cksum_offload_flag && rpi.rx_cksum_result == NETSEC_RX_CKSUM_OK) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) { +next: + if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) || + xdp_result & NETSEC_XDP_RX_OK) { ndev->stats.rx_packets++; - ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_bytes += xdp.data_end - xdp.data; } + dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); + + /* Update the descriptor with fresh buffers */ + desc->len = desc_len; + desc->dma_addr = dma_handle; + desc->addr = buf_addr; netsec_rx_fill(priv, idx, 1); netsec_adv_desc(&dring->tail); } + netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit); + return done; } @@ -805,7 +878,9 @@ static void netsec_set_tx_de(struct netsec_priv *priv, de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; de->attr = attr; - dma_wmb(); + /* under spin_lock if using XDP */ + if (!dring->is_xdp) + dma_wmb(); dring->desc[idx] = *desc; dring->desc[idx].skb = skb; @@ -824,6 +899,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, u16 tso_seg_len = 0; int filled; + if (dring->is_xdp) + spin_lock_bh(&dring->lock); /* differentiate between full/emtpy ring */ if (dring->head >= dring->tail) filled = dring->head - dring->tail; @@ -831,6 +908,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, filled = dring->head + DESC_NUM - dring->tail; if (DESC_NUM - filled < 2) { /* if less than 2 available */ + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); netif_err(priv, drv, priv->ndev, "%s: TxQFull!\n", __func__); netif_stop_queue(priv->ndev); dma_wmb(); @@ -864,6 +943,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, tx_desc.dma_addr = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) { + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); netif_err(priv, drv, priv->ndev, "%s: DMA mapping failed\n", __func__); ndev->stats.tx_dropped++; @@ -877,6 +958,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, netdev_sent_queue(priv->ndev, skb->len); netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb); + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */ return NETDEV_TX_OK; @@ -891,6 +974,9 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) if (!dring->vaddr || !dring->desc) return; + if (xdp_rxq_info_is_reg(&dring->xdp_rxq)) + xdp_rxq_info_unreg(&dring->xdp_rxq); + for (idx = 0; idx < DESC_NUM; idx++) { desc = &dring->desc[idx]; if (!desc->addr) @@ -930,24 +1016,24 @@ static void netsec_free_dring(struct netsec_priv *priv, int id) static void *netsec_alloc_rx_data(struct netsec_priv *priv, dma_addr_t *dma_handle, u16 *desc_len) { - size_t len = priv->ndev->mtu + ETH_HLEN + 2 * VLAN_HLEN + NET_SKB_PAD + - NET_IP_ALIGN; + size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size_t payload_len = priv->ndev->mtu + ETH_HLEN + 2 * VLAN_HLEN; dma_addr_t mapping; void *buf; - len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - len = SKB_DATA_ALIGN(len); + total_len += SKB_DATA_ALIGN(payload_len + NETSEC_RXBUF_HEADROOM); - buf = napi_alloc_frag(len); + buf = napi_alloc_frag(total_len); if (!buf) return NULL; - mapping = dma_map_single(priv->dev, buf, len, DMA_FROM_DEVICE); + mapping = dma_map_single(priv->dev, buf + NETSEC_RXBUF_HEADROOM, + payload_len, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(priv->dev, mapping))) goto err_out; *dma_handle = mapping; - *desc_len = len; + *desc_len = total_len; return buf; @@ -990,10 +1076,27 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) return -ENOMEM; } +static void netsec_setup_tx_dring(struct netsec_priv *priv) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); + + if (xdp_prog) + dring->is_xdp = true; + else + dring->is_xdp = false; +} + static int netsec_setup_rx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; - int i; + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); + int i, err; + + if (xdp_prog) + dring->is_xdp = true; + else + dring->is_xdp = false; for (i = 0; i < DESC_NUM; i++) { struct netsec_desc *desc = &dring->desc[i]; @@ -1002,20 +1105,29 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) u16 len; buf = netsec_alloc_rx_data(priv, &dma_handle, &len); - if (!buf) { - netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + if (!buf) goto err_out; - } desc->dma_addr = dma_handle; desc->addr = buf; desc->len = len; } netsec_rx_fill(priv, 0, DESC_NUM); + err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0); + if (err) + goto err_out; + + err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_SHARED, + NULL); + if (err) { + xdp_rxq_info_unreg(&dring->xdp_rxq); + goto err_out; + } return 0; err_out: + netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); return -ENOMEM; } @@ -1290,6 +1402,7 @@ static int netsec_netdev_open(struct net_device *ndev) pm_runtime_get_sync(priv->dev); + netsec_setup_tx_dring(priv); ret = netsec_setup_rx_dring(priv); if (ret) { netif_err(priv, probe, priv->ndev, @@ -1387,6 +1500,9 @@ static int netsec_netdev_init(struct net_device *ndev) if (ret) goto err2; + spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock); + spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock); + return 0; err2: netsec_free_dring(priv, NETSEC_RING_RX); @@ -1419,6 +1535,179 @@ static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr, return phy_mii_ioctl(ndev->phydev, ifr, cmd); } +/* The current driver only supports 1 Txq, this should run under spin_lock() */ +static u32 netsec_xdp_queue_one(struct netsec_priv *priv, + struct xdp_frame *xdpf) + +{ + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + struct netsec_tx_pkt_ctrl tx_ctrl = {}; + struct netsec_desc tx_desc; + dma_addr_t dma_handle; + u16 filled; + + if (tx_ring->head >= tx_ring->tail) + filled = tx_ring->head - tx_ring->tail; + else + filled = tx_ring->head + DESC_NUM - tx_ring->tail; + + if (DESC_NUM - filled <= 1) + return NETSEC_XDP_CONSUMED; + + dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len, + DMA_TO_DEVICE); + if (dma_mapping_error(priv->dev, dma_handle)) + return NETSEC_XDP_CONSUMED; + + tx_ctrl.cksum_offload_flag = false; + tx_ctrl.tcp_seg_offload_flag = false; + tx_ctrl.tcp_seg_len = 0; + + tx_desc.dma_addr = dma_handle; + tx_desc.addr = xdpf->data; + tx_desc.len = xdpf->len; + + netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, NULL); + + return NETSEC_XDP_TX; +} + +static int netsec_xdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct netsec_priv *priv = netdev_priv(ndev); + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + int drops = 0; + int i; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + spin_lock(&tx_ring->lock); + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = netsec_xdp_queue_one(priv, xdpf); + if (err != NETSEC_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } else { + tx_ring->xdp_xmit++; + } + } + spin_unlock(&tx_ring->lock); + + if (unlikely(flags & XDP_XMIT_FLUSH)) { + netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit); + tx_ring->xdp_xmit = 0; + } + + return n - drops; +} + +static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp) +{ + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + u32 ret; + + if (unlikely(!xdpf)) + return NETSEC_XDP_CONSUMED; + + spin_lock(&tx_ring->lock); + ret = netsec_xdp_queue_one(priv, xdpf); + spin_unlock(&tx_ring->lock); + + return ret; +} + +static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 ret = NETSEC_XDP_PASS; + int err; + u32 act; + + rcu_read_lock(); + act = bpf_prog_run_xdp(prog, xdp); + + switch (act) { + case XDP_PASS: + ret = NETSEC_XDP_PASS; + break; + case XDP_TX: + ret = netsec_xdp_xmit_back(priv, xdp); + if (ret != NETSEC_XDP_TX) + xdp_return_buff(xdp); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(priv->ndev, xdp, prog); + if (!err) { + ret = NETSEC_XDP_REDIR; + } else { + ret = NETSEC_XDP_CONSUMED; + xdp_return_buff(xdp); + } + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(priv->ndev, prog, act); + /* fall through -- handle aborts by dropping packet */ + case XDP_DROP: + ret = NETSEC_XDP_CONSUMED; + xdp_return_buff(xdp); + break; + } + + rcu_read_unlock(); + + return ret; +} + +static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->ndev; + struct bpf_prog *old_prog; + + /* For now just support only the usual MTU sized frames */ + if (prog && dev->mtu > 1500) { + NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP"); + return -EOPNOTSUPP; + } + + if (netif_running(dev)) + netsec_netdev_stop(dev); + + /* Detach old prog, if any */ + old_prog = xchg(&priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (netif_running(dev)) + netsec_netdev_open(dev); + + return 0; +} + +static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return netsec_xdp_setup(priv, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops netsec_netdev_ops = { .ndo_init = netsec_netdev_init, .ndo_uninit = netsec_netdev_uninit, @@ -1429,6 +1718,8 @@ static const struct net_device_ops netsec_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = netsec_netdev_ioctl, + .ndo_xdp_xmit = netsec_xdp_xmit, + .ndo_bpf = netsec_xdp, }; static int netsec_of_probe(struct platform_device *pdev, -- 2.7.4