All of lore.kernel.org
 help / color / mirror / Atom feed
From: William Tu <u9012063@gmail.com>
To: Xiaolong Ye <xiaolong.ye@intel.com>
Cc: Qi Zhang <qi.z.zhang@intel.com>,
	John McNamara <john.mcnamara@intel.com>,
	 Marko Kovacevic <marko.kovacevic@intel.com>,
	Karlsson Magnus <magnus.karlsson@intel.com>,
	 Topel Bjorn <bjorn.topel@intel.com>,
	dev@dpdk.org
Subject: Re: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf
Date: Tue, 11 Jun 2019 09:16:45 -0700	[thread overview]
Message-ID: <CALDO+SYThUjdjBsc+0JGKCVnCRGYj4ryvgJ_uM8hD3NVn8U-Gg@mail.gmail.com> (raw)
In-Reply-To: <20190530090707.36290-2-xiaolong.ye@intel.com>

On Thu, May 30, 2019 at 2:16 AM Xiaolong Ye <xiaolong.ye@intel.com> wrote:
>
> Implement zero copy of af_xdp pmd through mbuf's external memory
> mechanism to achieve high performance.
>
> This patch also provides a new parameter "pmd_zero_copy" for user, so they
> can choose to enable zero copy of af_xdp pmd or not.
>
> To be clear, "zero copy" here is different from the "zero copy mode" of
> AF_XDP, it is about zero copy between af_xdp umem and mbuf used in dpdk
> application.
>
> Suggested-by: Varghese Vipin <vipin.varghese@intel.com>
> Suggested-by: Tummala Sivaprasad <sivaprasad.tummala@intel.com>
> Suggested-by: Olivier Matz <olivier.matz@6wind.com>
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
> ---
>  doc/guides/nics/af_xdp.rst          |   1 +
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 104 +++++++++++++++++++++-------
>  2 files changed, 79 insertions(+), 26 deletions(-)
>
> diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst
> index 175038153..0bd4239fe 100644
> --- a/doc/guides/nics/af_xdp.rst
> +++ b/doc/guides/nics/af_xdp.rst
> @@ -28,6 +28,7 @@ The following options can be provided to set up an af_xdp port in DPDK.
>
>  *   ``iface`` - name of the Kernel interface to attach to (required);
>  *   ``queue`` - netdev queue id (optional, default 0);
> +*   ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
>
>  Prerequisites
>  -------------
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index 35c72272c..014cd5691 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -70,6 +70,7 @@ struct xsk_umem_info {
>         struct xsk_umem *umem;
>         struct rte_ring *buf_ring;
>         const struct rte_memzone *mz;
> +       int pmd_zc;
>  };
>
>  struct rx_stats {
> @@ -109,8 +110,8 @@ struct pmd_internals {
>         int if_index;
>         char if_name[IFNAMSIZ];
>         uint16_t queue_idx;
> +       int pmd_zc;
>         struct ether_addr eth_addr;
> -       struct xsk_umem_info *umem;
>         struct rte_mempool *mb_pool_share;
>
>         struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
> @@ -119,10 +120,12 @@ struct pmd_internals {
>
>  #define ETH_AF_XDP_IFACE_ARG                   "iface"
>  #define ETH_AF_XDP_QUEUE_IDX_ARG               "queue"
> +#define ETH_AF_XDP_PMD_ZC_ARG                  "pmd_zero_copy"
>
>  static const char * const valid_arguments[] = {
>         ETH_AF_XDP_IFACE_ARG,
>         ETH_AF_XDP_QUEUE_IDX_ARG,
> +       ETH_AF_XDP_PMD_ZC_ARG,
>         NULL
>  };
>
> @@ -166,6 +169,15 @@ reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
>         return 0;
>  }
>
> +static void
> +umem_buf_release_to_fq(void *addr, void *opaque)
> +{
> +       struct xsk_umem_info *umem = (struct xsk_umem_info *)opaque;
> +       uint64_t umem_addr = (uint64_t)addr - umem->mz->addr_64;
> +
> +       rte_ring_enqueue(umem->buf_ring, (void *)umem_addr);
> +}
> +
>  static uint16_t
>  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  {
> @@ -175,6 +187,7 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>         struct xsk_ring_prod *fq = &umem->fq;
>         uint32_t idx_rx = 0;
>         uint32_t free_thresh = fq->size >> 1;
> +       int pmd_zc = umem->pmd_zc;
>         struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE];
>         unsigned long dropped = 0;
>         unsigned long rx_bytes = 0;
> @@ -197,19 +210,29 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>                 uint64_t addr;
>                 uint32_t len;
>                 void *pkt;
> +               uint16_t buf_len = ETH_AF_XDP_FRAME_SIZE;
> +               struct rte_mbuf_ext_shared_info *shinfo;
>
>                 desc = xsk_ring_cons__rx_desc(rx, idx_rx++);
>                 addr = desc->addr;
>                 len = desc->len;
>                 pkt = xsk_umem__get_data(rxq->umem->mz->addr, addr);
>
> -               rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
> +               if (pmd_zc) {
> +                       shinfo = rte_pktmbuf_ext_shinfo_init_helper(pkt,
> +                                       &buf_len, umem_buf_release_to_fq, umem);
> +
> +                       rte_pktmbuf_attach_extbuf(mbufs[i], pkt, 0, buf_len,
> +                                                 shinfo);
> +               } else {
> +                       rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
> +                                                       pkt, len);
> +                       rte_ring_enqueue(umem->buf_ring, (void *)addr);
> +               }
>                 rte_pktmbuf_pkt_len(mbufs[i]) = len;
>                 rte_pktmbuf_data_len(mbufs[i]) = len;
>                 rx_bytes += len;
>                 bufs[i] = mbufs[i];
> -
> -               rte_ring_enqueue(umem->buf_ring, (void *)addr);
>         }
>
>         xsk_ring_cons__release(rx, rcvd);
> @@ -262,12 +285,21 @@ kick_tx(struct pkt_tx_queue *txq)
>         pull_umem_cq(umem, ETH_AF_XDP_TX_BATCH_SIZE);
>  }
>
> +static inline bool
> +in_umem_range(struct xsk_umem_info *umem, uint64_t addr)
> +{
> +       uint64_t mz_base_addr = umem->mz->addr_64;
> +
> +       return addr >= mz_base_addr && addr < mz_base_addr + umem->mz->len;
> +}
> +
>  static uint16_t
>  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  {
>         struct pkt_tx_queue *txq = queue;
>         struct xsk_umem_info *umem = txq->pair->umem;
>         struct rte_mbuf *mbuf;
> +       int pmd_zc = umem->pmd_zc;
>         void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
>         unsigned long tx_bytes = 0;
>         int i;
> @@ -294,16 +326,26 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>
>                 desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
>                 mbuf = bufs[i];
> -
> -               desc->addr = (uint64_t)addrs[i];
>                 desc->len = mbuf->pkt_len;
> -               pkt = xsk_umem__get_data(umem->mz->addr,
> -                                        desc->addr);
> -               rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> -                          desc->len);
> -               tx_bytes += mbuf->pkt_len;
>
> -               rte_pktmbuf_free(mbuf);
> +               /*
> +                * We need to make sure the external mbuf address is within
> +                * current port's umem memzone range
> +                */
> +               if (pmd_zc && RTE_MBUF_HAS_EXTBUF(mbuf) &&
> +                               in_umem_range(umem, (uint64_t)mbuf->buf_addr)) {
> +                       desc->addr = (uint64_t)mbuf->buf_addr -
> +                               umem->mz->addr_64;
> +                       mbuf->buf_addr = xsk_umem__get_data(umem->mz->addr,
> +                                       (uint64_t)addrs[i]);
> +               } else {
> +                       desc->addr = (uint64_t)addrs[i];
> +                       pkt = xsk_umem__get_data(umem->mz->addr,
> +                                       desc->addr);
> +                       rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
> +                                       desc->len);
> +               }
> +               tx_bytes += mbuf->pkt_len;
>         }
>
>         xsk_ring_prod__submit(&txq->tx, nb_pkts);
> @@ -313,6 +355,9 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>         txq->stats.tx_pkts += nb_pkts;
>         txq->stats.tx_bytes += tx_bytes;
>
> +       for (i = 0; i < nb_pkts; i++)
> +               rte_pktmbuf_free(bufs[i]);
> +

Is it ok to free the mbuf here?
If the AF_XDP is running pmd_zc=true, the packet mbuf is still in the tx
ring and might not be sent out yet.

Regards,
William


>         return nb_pkts;
>  }
>
> @@ -454,18 +499,16 @@ eth_dev_close(struct rte_eth_dev *dev)
>                 if (rxq->umem == NULL)
>                         break;
>                 xsk_socket__delete(rxq->xsk);
> +               (void)xsk_umem__delete(rxq->umem->umem);
> +               xdp_umem_destroy(rxq->umem);
>         }
>
> -       (void)xsk_umem__delete(internals->umem->umem);
> -
>         /*
>          * MAC is not allocated dynamically, setting it to NULL would prevent
>          * from releasing it in rte_eth_dev_release_port.
>          */
>         dev->data->mac_addrs = NULL;
>
> -       xdp_umem_destroy(internals->umem);
> -
>         remove_xdp_program(internals);
>  }
>
> @@ -639,7 +682,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>                 goto err;
>         }
>
> -       internals->umem = rxq->umem;
> +       rxq->umem->pmd_zc = internals->pmd_zc;
>
>         dev->data->rx_queues[rx_queue_id] = rxq;
>         return 0;
> @@ -775,9 +818,8 @@ parse_name_arg(const char *key __rte_unused,
>  }
>
>  static int
> -parse_parameters(struct rte_kvargs *kvlist,
> -                char *if_name,
> -                int *queue_idx)
> +parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *queue_idx,
> +                               int *pmd_zc)
>  {
>         int ret;
>
> @@ -791,6 +833,11 @@ parse_parameters(struct rte_kvargs *kvlist,
>         if (ret < 0)
>                 goto free_kvlist;
>
> +       ret = rte_kvargs_process(kvlist, ETH_AF_XDP_PMD_ZC_ARG,
> +                                &parse_integer_arg, pmd_zc);
> +       if (ret < 0)
> +               goto free_kvlist;
> +
>  free_kvlist:
>         rte_kvargs_free(kvlist);
>         return ret;
> @@ -827,9 +874,8 @@ get_iface_info(const char *if_name,
>  }
>
>  static struct rte_eth_dev *
> -init_internals(struct rte_vdev_device *dev,
> -              const char *if_name,
> -              int queue_idx)
> +init_internals(struct rte_vdev_device *dev, const char *if_name, int queue_idx,
> +                                       int pmd_zc)
>  {
>         const char *name = rte_vdev_device_name(dev);
>         const unsigned int numa_node = dev->device.numa_node;
> @@ -843,6 +889,7 @@ init_internals(struct rte_vdev_device *dev,
>                 return NULL;
>
>         internals->queue_idx = queue_idx;
> +       internals->pmd_zc = pmd_zc;
>         strlcpy(internals->if_name, if_name, IFNAMSIZ);
>
>         for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
> @@ -868,6 +915,9 @@ init_internals(struct rte_vdev_device *dev,
>         /* Let rte_eth_dev_close() release the port resources. */
>         eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
>
> +       if (internals->pmd_zc)
> +               AF_XDP_LOG(INFO, "Zero copy between umem and mbuf enabled.\n");
> +
>         return eth_dev;
>
>  err:
> @@ -883,6 +933,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>         int xsk_queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
>         struct rte_eth_dev *eth_dev = NULL;
>         const char *name;
> +       int pmd_zc = 0;
>
>         AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
>                 rte_vdev_device_name(dev));
> @@ -909,7 +960,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>         if (dev->device.numa_node == SOCKET_ID_ANY)
>                 dev->device.numa_node = rte_socket_id();
>
> -       if (parse_parameters(kvlist, if_name, &xsk_queue_idx) < 0) {
> +       if (parse_parameters(kvlist, if_name, &xsk_queue_idx, &pmd_zc) < 0) {
>                 AF_XDP_LOG(ERR, "Invalid kvargs value\n");
>                 return -EINVAL;
>         }
> @@ -919,7 +970,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
>                 return -EINVAL;
>         }
>
> -       eth_dev = init_internals(dev, if_name, xsk_queue_idx);
> +       eth_dev = init_internals(dev, if_name, xsk_queue_idx, pmd_zc);
>         if (eth_dev == NULL) {
>                 AF_XDP_LOG(ERR, "Failed to init internals\n");
>                 return -1;
> @@ -961,7 +1012,8 @@ static struct rte_vdev_driver pmd_af_xdp_drv = {
>  RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
>  RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
>                               "iface=<string> "
> -                             "queue=<int> ");
> +                             "queue=<int> "
> +                             "pmd_zero_copy=<0|1>");
>
>  RTE_INIT(af_xdp_init_log)
>  {
> --
> 2.17.1
>

  parent reply	other threads:[~2019-06-11 16:17 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-15  8:38 [dpdk-dev] [PATCH v1 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-15  8:38 ` [dpdk-dev] [PATCH v1 3/3] net/af_xdp: add busy poll support Xiaolong Ye
2019-05-30  9:07 ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Xiaolong Ye
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 1/3] net/af_xdp: enable zero copy by extbuf Xiaolong Ye
2019-05-30 15:31     ` Stephen Hemminger
2019-05-31  1:49       ` Ye Xiaolong
2019-06-11 16:16     ` William Tu [this message]
2019-06-12 10:03       ` Ye Xiaolong
2019-06-13  0:32         ` William Tu
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 2/3] net/af_xdp: add multi-queue support Xiaolong Ye
2019-05-30 15:32     ` Stephen Hemminger
2019-05-31  1:53       ` Ye Xiaolong
2019-05-30  9:07   ` [dpdk-dev] [PATCH v2 3/3] net/af_xdp: remove unused struct member Xiaolong Ye
2019-06-10 16:54   ` [dpdk-dev] [PATCH v2 0/3] add more features for AF_XDP pmd Ferruh Yigit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CALDO+SYThUjdjBsc+0JGKCVnCRGYj4ryvgJ_uM8hD3NVn8U-Gg@mail.gmail.com \
    --to=u9012063@gmail.com \
    --cc=bjorn.topel@intel.com \
    --cc=dev@dpdk.org \
    --cc=john.mcnamara@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=marko.kovacevic@intel.com \
    --cc=qi.z.zhang@intel.com \
    --cc=xiaolong.ye@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.