All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jakub Kicinski <jakub.kicinski@netronome.com>
To: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: davem@davemloft.net,
	Maciej Fijalkowski <maciej.fijalkowski@intel.com>,
	netdev@vger.kernel.org, nhorman@redhat.com, sassmann@redhat.com,
	Tony Nguyen <anthony.l.nguyen@intel.com>,
	Andrew Bowers <andrewx.bowers@intel.com>
Subject: Re: [net-next 3/9] ice: Add support for XDP
Date: Wed, 30 Oct 2019 11:27:54 -0700	[thread overview]
Message-ID: <20191030112754.05f04f6d@cakuba.netronome.com> (raw)
In-Reply-To: <20191030032910.24261-4-jeffrey.t.kirsher@intel.com>

On Tue, 29 Oct 2019 20:29:04 -0700, Jeff Kirsher wrote:
> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> 
> Add support for XDP. Implement ndo_bpf and ndo_xdp_xmit.  Upon load of
> an XDP program, allocate additional Tx rings for dedicated XDP use.
> The following actions are supported: XDP_TX, XDP_DROP, XDP_REDIRECT,
> XDP_PASS, and XDP_ABORTED.
> 
> Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
> Tested-by: Andrew Bowers <andrewx.bowers@intel.com>

> +/**
> + * ice_xdp_setup_prog - Add or remove XDP eBPF program
> + * @vsi: VSI to setup XDP for
> + * @prog: XDP program
> + * @extack: netlink extended ack
> + */
> +static int
> +ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
> +		   struct netlink_ext_ack *extack)
> +{
> +	int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
> +	bool if_running = netif_running(vsi->netdev);
> +	struct bpf_prog *old_prog;
> +	int i, ret = 0;
> +
> +	if (frame_size > vsi->rx_buf_len) {
> +		NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
> +		return -ENOTSUPP;

s/ENOTSUPP/EOPNOTSUPP/ everywhere

> +	}
> +
> +	if (!ice_is_xdp_ena_vsi(vsi) && !prog)
> +		return 0;

This is handled by the core these days.

> +	/* need to stop netdev while setting up the program for Rx rings */
> +	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
> +		ret = ice_down(vsi);
> +		if (ret) {
> +			NL_SET_ERR_MSG_MOD(extack,
> +					   "Preparing device for XDP attach failed");
> +			goto skip_setting_prog;
> +		}
> +	}
> +
> +	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
> +		vsi->num_xdp_txq = vsi->alloc_txq;
> +		vsi->xdp_mapping_mode = ICE_VSI_MAP_CONTIG;
> +		if (ice_prepare_xdp_rings(vsi)) {
> +			NL_SET_ERR_MSG_MOD(extack,
> +					   "Setting up XDP Tx resources failed");
> +			ret = -ENOMEM;
> +			goto skip_setting_prog;
> +		}
> +	} else if (ice_is_xdp_ena_vsi(vsi) && !prog) {
> +		if (ice_destroy_xdp_rings(vsi)) {
> +			NL_SET_ERR_MSG_MOD(extack,
> +					   "Freeing XDP Tx resources failed");
> +			ret = -ENOMEM;
> +			goto skip_setting_prog;

The failures to destroy are kind of concerning, are you 100% sure
XDP is still operational if destroy fails?

> +		}
> +	}
> +
> +	old_prog = xchg(&vsi->xdp_prog, prog);
> +	if (old_prog)
> +		bpf_prog_put(old_prog);
> +
> +	ice_for_each_rxq(vsi, i)
> +		WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
> +
> +	if (if_running)
> +		ret = ice_up(vsi);
> +
> +skip_setting_prog:
> +	return ret;

If this is not expanded by later patches and you don't have any other
code to run, just return directly without the gotos please.

Actually if ice_down(vsi) was run before shouldn't the error case do
ice_up(vsi)?

> +}
> +
> +/**
> + * ice_xdp - implements XDP handler
> + * @dev: netdevice
> + * @xdp: XDP command
> + */
> +static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> +	struct ice_netdev_priv *np = netdev_priv(dev);
> +	struct ice_vsi *vsi = np->vsi;
> +
> +	if (vsi->type != ICE_VSI_PF) {
> +		NL_SET_ERR_MSG_MOD(xdp->extack,
> +				   "XDP can be loaded only on PF VSI");
> +		return -EINVAL;
> +	}
> +
> +	switch (xdp->command) {
> +	case XDP_SETUP_PROG:
> +		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
> +	case XDP_QUERY_PROG:
> +		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
> +		return 0;
> +	default:
> +		NL_SET_ERR_MSG_MOD(xdp->extack, "Unknown XDP command");

Please drop the extack for unknown command, I'm concerned it will leak
out somewhere where it's perfectly fine to ignore unknown commands.

> +		return -EINVAL;
> +	}
> +}

> +/**
> + * ice_run_xdp - Executes an XDP program on initialized xdp_buff
> + * @rx_ring: Rx ring
> + * @xdp: xdp_buff used as input to the XDP program
> + * @xdp_prog: XDP program to run
> + *
> + * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
> + */
> +static int
> +ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
> +	    struct bpf_prog *xdp_prog)
> +{
> +	int err, result = ICE_XDP_PASS;
> +	struct ice_ring *xdp_ring;
> +	u32 act;
> +
> +	act = bpf_prog_run_xdp(xdp_prog, xdp);
> +	switch (act) {
> +	case XDP_PASS:
> +		break;
> +	case XDP_TX:
> +		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];

Here you index the XDP ring with rx queue id but below ice_xdp_xmit()
uses smp_processor_id(). That worries me a little. If TX and REDIRECT
happens at the same time and rx->q_index != smp_processor_id() it could
cause trouble, no?

> +		result = ice_xmit_xdp_buff(xdp, xdp_ring);
> +		break;
> +	case XDP_REDIRECT:
> +		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
> +		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
> +		break;
> +	default:
> +		bpf_warn_invalid_xdp_action(act);
> +		/* fallthrough -- not supported action */
> +	case XDP_ABORTED:
> +		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
> +		/* fallthrough -- handle aborts by dropping frame */
> +	case XDP_DROP:
> +		result = ICE_XDP_CONSUMED;
> +		break;
> +	}
> +
> +	return result;
> +}
> +
> +/**
> + * ice_xdp_xmit - submit packets to XDP ring for transmission
> + * @dev: netdev
> + * @n: number of XDP frames to be transmitted
> + * @frames: XDP frames to be transmitted
> + * @flags: transmit flags
> + *
> + * Returns number of frames successfully sent. Frames that fail are
> + * free'ed via XDP return API.
> + * For error cases, a negative errno code is returned and no-frames
> + * are transmitted (caller must handle freeing frames).
> + */
> +int
> +ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
> +	     u32 flags)
> +{
> +	struct ice_netdev_priv *np = netdev_priv(dev);
> +	unsigned int queue_index = smp_processor_id();
> +	struct ice_vsi *vsi = np->vsi;
> +	struct ice_ring *xdp_ring;
> +	int drops = 0, i;
> +
> +	if (test_bit(__ICE_DOWN, vsi->state))
> +		return -ENETDOWN;
> +
> +	if (!ice_is_xdp_ena_vsi(vsi) || queue_index >= vsi->num_xdp_txq)
> +		return -ENXIO;
> +
> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> +		return -EINVAL;
> +
> +	xdp_ring = vsi->xdp_rings[queue_index];
> +	for (i = 0; i < n; i++) {
> +		struct xdp_frame *xdpf = frames[i];
> +		int err;
> +
> +		err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
> +		if (err != ICE_XDP_TX) {
> +			xdp_return_frame_rx_napi(xdpf);
> +			drops++;
> +		}
> +	}
> +
> +	if (unlikely(flags & XDP_XMIT_FLUSH))
> +		ice_xdp_ring_update_tail(xdp_ring);
> +
> +	return n - drops;
> +}

> diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
> index a914e603b2ed..f74ec83faa55 100644
> --- a/drivers/net/ethernet/intel/ice/ice_txrx.h
> +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
> @@ -22,6 +22,16 @@
>  #define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
>  #define ICE_MAX_TXQ_PER_TXQG	128
>  
> +static inline __le64
> +ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
> +{
> +	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
> +			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
> +			   (td_offset << ICE_TXD_QW1_OFFSET_S) |
> +			   ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
> +			   (td_tag    << ICE_TXD_QW1_L2TAG1_S));
> +}

Please move the build_ctob() rename to a separate patch. All changes
which are not directly related to the objective of the patch make it
harder to review.

  reply	other threads:[~2019-10-30 18:28 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-30  3:29 [net-next 0/9][pull request] 100GbE Intel Wired LAN Driver Updates 2019-10-29 Jeff Kirsher
2019-10-30  3:29 ` [net-next 1/9] ice: Introduce ice_base.c Jeff Kirsher
2019-10-30  3:29 ` [net-next 2/9] ice: get rid of per-tc flow in Tx queue configuration routines Jeff Kirsher
2019-10-30  3:29 ` [net-next 3/9] ice: Add support for XDP Jeff Kirsher
2019-10-30 18:27   ` Jakub Kicinski [this message]
2019-10-31  0:26     ` Maciej Fijalkowski
2019-10-30  3:29 ` [net-next 4/9] ice: Move common functions to ice_txrx_lib.c Jeff Kirsher
2019-10-30  3:29 ` [net-next 5/9] ice: Add support for AF_XDP Jeff Kirsher
2019-10-30 18:50   ` Jakub Kicinski
2019-10-30 22:04     ` Maciej Fijalkowski
2019-10-30 22:06       ` Jakub Kicinski
2019-10-30  3:29 ` [net-next 6/9] ice: introduce legacy Rx flag Jeff Kirsher
2019-10-30  3:29 ` [net-next 7/9] ice: introduce frame padding computation logic Jeff Kirsher
2019-10-30  3:29 ` [net-next 8/9] ice: add build_skb() support Jeff Kirsher
2019-10-30  3:29 ` [net-next 9/9] ice: allow 3k MTU for XDP Jeff Kirsher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191030112754.05f04f6d@cakuba.netronome.com \
    --to=jakub.kicinski@netronome.com \
    --cc=andrewx.bowers@intel.com \
    --cc=anthony.l.nguyen@intel.com \
    --cc=davem@davemloft.net \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=maciej.fijalkowski@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@redhat.com \
    --cc=sassmann@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.