From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alan Brady Date: Thu, 27 Jan 2022 16:10:01 -0800 Subject: [Intel-wired-lan] [PATCH net-next 11/19] iecm: add start_xmit and set_rx_mode In-Reply-To: <20220128001009.721392-1-alan.brady@intel.com> References: <20220128001009.721392-1-alan.brady@intel.com> Message-ID: <20220128001009.721392-12-alan.brady@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: intel-wired-lan@osuosl.org List-ID: With open and stop done, this continues down the netdev_ops struct to add start_xmit and set_rx_mode callbacks. The rest of the data path will be added after netdev_ops are done. Signed-off-by: Phani Burra Signed-off-by: Joshua Hay Signed-off-by: Madhu Chittim Signed-off-by: Pavan Kumar Linga Signed-off-by: Alan Brady --- drivers/net/ethernet/intel/iecm/iecm_lib.c | 247 ++++++- drivers/net/ethernet/intel/iecm/iecm_txrx.c | 667 ++++++++++++++++++ drivers/net/ethernet/intel/include/iecm.h | 1 + .../net/ethernet/intel/include/iecm_txrx.h | 60 ++ 4 files changed, 970 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iecm/iecm_lib.c b/drivers/net/ethernet/intel/iecm/iecm_lib.c index 037a0e06bb7b..003057f48f0c 100644 --- a/drivers/net/ethernet/intel/iecm/iecm_lib.c +++ b/drivers/net/ethernet/intel/iecm/iecm_lib.c @@ -141,6 +141,17 @@ struct iecm_vport *iecm_netdev_to_vport(struct net_device *netdev) return np->vport; } +/** + * iecm_netdev_to_adapter - get an adapter handle from a netdev + * @netdev: network interface device structure + */ +struct iecm_adapter *iecm_netdev_to_adapter(struct net_device *netdev) +{ + struct iecm_netdev_priv *np = netdev_priv(netdev); + + return np->vport->adapter; +} + /** * iecm_mb_intr_rel_irq - Free the IRQ association with the OS * @adapter: adapter structure @@ -417,6 +428,61 @@ iecm_mac_filter *iecm_find_mac_filter(struct iecm_vport *vport, return NULL; } +/** + * __iecm_del_mac_filter - Delete MAC filter helper + * @vport: main vport struct + * @macaddr: address to delete + * + * Takes mac_filter_list_lock spinlock to set remove field for filter in list. + */ +static struct +iecm_mac_filter *__iecm_del_mac_filter(struct iecm_vport *vport, + const u8 *macaddr) +{ + struct iecm_mac_filter *f; + + spin_lock_bh(&vport->adapter->mac_filter_list_lock); + f = iecm_find_mac_filter(vport, macaddr); + if (f) { + /* If filter was never synced to HW we can just delete it here, + * otherwise mark for removal. + */ + if (f->add) { + list_del(&f->list); + kfree(f); + f = NULL; + } else { + f->remove = true; + } + } + spin_unlock_bh(&vport->adapter->mac_filter_list_lock); + + return f; +} + +/** + * iecm_del_mac_filter - Delete a MAC filter from the filter list + * @vport: main vport structure + * @macaddr: the MAC address + * + * Removes filter from list and if interface is up, tells hardware about the + * removed filter. + **/ +static void iecm_del_mac_filter(struct iecm_vport *vport, const u8 *macaddr) +{ + struct iecm_mac_filter *f; + + if (!macaddr) + return; + + f = __iecm_del_mac_filter(vport, macaddr); + if (!f) + return; + + if (vport->adapter->state == __IECM_UP) + iecm_add_del_ether_addrs(vport, false, false); +} + /** * __iecm_add_mac_filter - Add mac filter helper function * @vport: main vport struct @@ -1711,6 +1777,134 @@ void iecm_remove(struct pci_dev *pdev) } EXPORT_SYMBOL(iecm_remove); +/** + * iecm_addr_sync - Callback for dev_(mc|uc)_sync to add address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this, we have to add the + * filter and send the virtchnl message asynchronously without waiting for the + * response from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int iecm_addr_sync(struct net_device *netdev, const u8 *addr) +{ + struct iecm_vport *vport = iecm_netdev_to_vport(netdev); + + if (__iecm_add_mac_filter(vport, addr)) { + if (vport->adapter->state == __IECM_UP) { + set_bit(__IECM_ADD_ETH_REQ, vport->adapter->flags); + iecm_add_del_ether_addrs(vport, true, true); + } + return 0; + } + + return -ENOMEM; +} + +/** + * iecm_addr_unsync - Callback for dev_(mc|uc)_sync to remove address + * @netdev: the netdevice + * @addr: address to add + * + * Called by __dev_(mc|uc)_sync when an address needs to be added. We call + * __dev_(uc|mc)_sync from .set_rx_mode. Kernel takes addr_list_lock spinlock + * meaning we cannot sleep in this context. Due to this we have to delete the + * filter and send the virtchnl message asychronously without waiting for the + * return from the other side. We won't know whether or not the operation + * actually succeeded until we get the message back. Returns 0 on success, + * negative on failure. + */ +static int iecm_addr_unsync(struct net_device *netdev, const u8 *addr) +{ + struct iecm_vport *vport = iecm_netdev_to_vport(netdev); + + /* Under some circumstances, we might receive a request to delete + * our own device address from our uc list. Because we store the + * device address in the VSI's MAC/VLAN filter list, we need to ignore + * such requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + + if (__iecm_del_mac_filter(vport, addr)) { + if (vport->adapter->state == __IECM_UP) { + set_bit(__IECM_DEL_ETH_REQ, vport->adapter->flags); + iecm_add_del_ether_addrs(vport, false, true); + } + } + + return 0; +} + +/** + * iecm_set_rx_mode - NDO callback to set the netdev filters + * @netdev: network interface device structure + * + * Stack takes addr_list_lock spinlock before calling our .set_rx_mode. We + * cannot sleep in this context. + */ +static void iecm_set_rx_mode(struct net_device *netdev) +{ + struct iecm_adapter *adapter = iecm_netdev_to_adapter(netdev); + + if (iecm_is_cap_ena(adapter, IECM_OTHER_CAPS, VIRTCHNL2_CAP_MACFILTER)) { + __dev_uc_sync(netdev, iecm_addr_sync, iecm_addr_unsync); + __dev_mc_sync(netdev, iecm_addr_sync, iecm_addr_unsync); + } + + if (iecm_is_cap_ena(adapter, IECM_OTHER_CAPS, VIRTCHNL2_CAP_PROMISC)) { + bool changed = false; + + /* IFF_PROMISC enables both unicast and multicast promiscuous, + * while IFF_ALLMULTI only enables multicast such that: + * + * promisc + allmulti = unicast | multicast + * promisc + !allmulti = unicast | multicast + * !promisc + allmulti = multicast + */ + if ((netdev->flags & IFF_PROMISC) && + !test_and_set_bit(__IECM_PROMISC_UC, + adapter->config_data.user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Entering promiscuous mode\n"); + if (!test_and_set_bit(__IECM_PROMISC_MC, + adapter->flags)) + dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); + } + if (!(netdev->flags & IFF_PROMISC) && + test_and_clear_bit(__IECM_PROMISC_UC, + adapter->config_data.user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Leaving promiscuous mode\n"); + } + if (netdev->flags & IFF_ALLMULTI && + !test_and_set_bit(__IECM_PROMISC_MC, + adapter->config_data.user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Entering multicast promiscuous mode\n"); + } + if (!(netdev->flags & (IFF_ALLMULTI | IFF_PROMISC)) && + test_and_clear_bit(__IECM_PROMISC_MC, + adapter->config_data.user_flags)) { + changed = true; + dev_info(&adapter->pdev->dev, "Leaving multicast promiscuous mode\n"); + } + + if (changed) { + int err = iecm_set_promiscuous(adapter); + + if (err) { + dev_info(&adapter->pdev->dev, "Failed to set promiscuous mode: %d\n", + err); + } + } + } +} + /** * iecm_open - Called when a network interface becomes active * @netdev: network interface device structure @@ -1730,6 +1924,49 @@ static int iecm_open(struct net_device *netdev) return iecm_vport_open(np->vport, true); } +/** + * iecm_set_mac - NDO callback to set port mac address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int iecm_set_mac(struct net_device *netdev, void *p) +{ + struct iecm_vport *vport = iecm_netdev_to_vport(netdev); + struct iecm_mac_filter *f; + struct sockaddr *addr = p; + + if (!iecm_is_cap_ena(vport->adapter, IECM_OTHER_CAPS, + VIRTCHNL2_CAP_MACFILTER)) { + dev_info(&vport->adapter->pdev->dev, "Setting MAC address is not supported\n"); + return -EOPNOTSUPP; + } + + if (!is_valid_ether_addr(addr->sa_data)) { + dev_info(&vport->adapter->pdev->dev, "Invalid MAC address: %pM\n", + addr->sa_data); + return -EADDRNOTAVAIL; + } + + if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) + return 0; + + /* Delete the current filter */ + if (is_valid_ether_addr(vport->default_mac_addr)) + iecm_del_mac_filter(vport, vport->default_mac_addr); + + /* Add new filter */ + f = iecm_add_mac_filter(vport, addr->sa_data); + + if (f) { + ether_addr_copy(vport->default_mac_addr, addr->sa_data); + dev_addr_mod(netdev, 0, addr->sa_data, ETH_ALEN); + } + + return f ? 0 : -ENOMEM; +} + void *iecm_alloc_dma_mem(struct iecm_hw *hw, struct iecm_dma_mem *mem, u64 size) { struct iecm_adapter *adapter = (struct iecm_adapter *)hw->back; @@ -1756,10 +1993,10 @@ void iecm_free_dma_mem(struct iecm_hw *hw, struct iecm_dma_mem *mem) static const struct net_device_ops iecm_netdev_ops_splitq = { .ndo_open = iecm_open, .ndo_stop = iecm_stop, - .ndo_start_xmit = NULL, - .ndo_set_rx_mode = NULL, + .ndo_start_xmit = iecm_tx_splitq_start, + .ndo_set_rx_mode = iecm_set_rx_mode, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = NULL, + .ndo_set_mac_address = iecm_set_mac, .ndo_change_mtu = NULL, .ndo_get_stats64 = NULL, .ndo_fix_features = NULL, @@ -1773,9 +2010,9 @@ static const struct net_device_ops iecm_netdev_ops_singleq = { .ndo_open = iecm_open, .ndo_stop = iecm_stop, .ndo_start_xmit = NULL, - .ndo_set_rx_mode = NULL, + .ndo_set_rx_mode = iecm_set_rx_mode, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = NULL, + .ndo_set_mac_address = iecm_set_mac, .ndo_change_mtu = NULL, .ndo_get_stats64 = NULL, .ndo_fix_features = NULL, diff --git a/drivers/net/ethernet/intel/iecm/iecm_txrx.c b/drivers/net/ethernet/intel/iecm/iecm_txrx.c index fb6a61277b00..ef5fe659389b 100644 --- a/drivers/net/ethernet/intel/iecm/iecm_txrx.c +++ b/drivers/net/ethernet/intel/iecm/iecm_txrx.c @@ -1655,6 +1655,673 @@ int iecm_vport_queues_alloc(struct iecm_vport *vport) return err; } +/** + * iecm_tx_splitq_build_ctb - populate command tag and size for queue + * based scheduling descriptors + * @desc: descriptor to populate + * @parms: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void +iecm_tx_splitq_build_ctb(union iecm_tx_flex_desc *desc, + struct iecm_tx_splitq_params *parms, + u16 td_cmd, u16 size) +{ + desc->q.qw1.cmd_dtype = + cpu_to_le16(parms->dtype & IECM_FLEX_TXD_QW1_DTYPE_M); + desc->q.qw1.cmd_dtype |= + cpu_to_le16((td_cmd << IECM_FLEX_TXD_QW1_CMD_S) & + IECM_FLEX_TXD_QW1_CMD_M); + desc->q.qw1.buf_size = cpu_to_le16((u16)size); + desc->q.qw1.flex.l2tags.l2tag1 = cpu_to_le16(parms->td_tag); +} + +/** + * iecm_tx_splitq_build_flow_desc - populate command tag and size for flow + * scheduling descriptors + * @desc: descriptor to populate + * @parms: pointer to tx params struct + * @td_cmd: command to be filled in desc + * @size: size of buffer + */ +void +iecm_tx_splitq_build_flow_desc(union iecm_tx_flex_desc *desc, + struct iecm_tx_splitq_params *parms, + u16 td_cmd, u16 size) +{ + desc->flow.qw1.cmd_dtype = (u16)parms->dtype | td_cmd; + desc->flow.qw1.rxr_bufsize = cpu_to_le16((u16)size); + desc->flow.qw1.compl_tag = cpu_to_le16(parms->compl_tag); +} + +/** + * iecm_tx_buf_avail - Stop Tx if no enough book keeping buffers are available + * @tx_q: the queue to be checked + * + * Return -EBUSY if Tx queue stop is needed, else 0 + */ +static int iecm_tx_buf_avail(struct iecm_queue *tx_q) +{ + /* If We have less than a quarter of the total desc_count left + * stop the queue to wait for more completions + */ + if (unlikely(IECM_TX_BUF_UNUSED(tx_q) < tx_q->desc_count >> 2)) { + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); + return -EBUSY; + } + return 0; +} + +/** + * __iecm_tx_maybe_stop - 2nd level check for Tx stop conditions + * @tx_q: the queue to be checked + * @size: the size buffer we want to assure is available + * + * Returns -EBUSY if a stop is needed, else 0 + */ +static int +__iecm_tx_maybe_stop(struct iecm_queue *tx_q, unsigned int size) +{ + netif_stop_subqueue(tx_q->vport->netdev, tx_q->idx); + + /* Memory barrier before checking head and tail */ + smp_mb(); + + /* Check again in a case another CPU has just made room available. */ + if (likely(IECM_DESC_UNUSED(tx_q) < size)) + return -EBUSY; + + /* A reprieve! - use start_subqueue because it doesn't call schedule */ + netif_start_subqueue(tx_q->vport->netdev, tx_q->idx); + + return 0; +} + +/** + * iecm_tx_maybe_stop - 1st level check for Tx stop conditions + * @tx_q: the queue to be checked + * @size: number of descriptors we want to assure is available + * + * Returns 0 if stop is not needed + */ +int iecm_tx_maybe_stop(struct iecm_queue *tx_q, unsigned int size) +{ + if (likely(IECM_DESC_UNUSED(tx_q) >= size)) + return 0; + + return __iecm_tx_maybe_stop(tx_q, size); +} + +/** + * iecm_tx_buf_hw_update - Store the new tail value + * @tx_q: queue to bump + * @val: new tail index + * @xmit_more: more skb's pending + * + * The naming here is special in that 'hw' signals that this function is about + * to do a register write to update our queue status. We know this can only + * mean tail here as HW should be owning head for TX. + */ +void iecm_tx_buf_hw_update(struct iecm_queue *tx_q, u32 val, + bool xmit_more) +{ + struct netdev_queue *nq; + + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + tx_q->next_to_use = val; + + iecm_tx_maybe_stop(tx_q, IECM_TX_DESC_NEEDED); + + /* Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + + /* notify HW of packet */ + if (netif_xmit_stopped(nq) || !xmit_more) + writel(val, tx_q->tail); +} + +/** + * iecm_size_to_txd_count - Get the number of descriptors needed for Tx + * @size: transmit request size in bytes + * + * Due to hardware alignment restrictions (4K alignment), we need to assume + * that we can have no more than 12K of data per descriptor, even though each + * descriptor can take up to 16K - 1 bytes of aligned memory. + */ +unsigned int iecm_size_to_txd_count(unsigned int size) +{ + return (size / 12288) + IECM_TX_DESCS_FOR_SKB_DATA_PTR; +} + +/** + * iecm_tx_desc_count_required - calculate number of Tx descriptors needed + * @skb: send buffer + * + * Returns number of data descriptors needed for this skb. + */ +unsigned int iecm_tx_desc_count_required(struct sk_buff *skb) +{ + const skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int count = 0, size = skb_headlen(skb); + + for (;;) { + count += iecm_size_to_txd_count(size); + + if (!nr_frags--) + break; + + size = skb_frag_size(frag++); + } + + return count; +} + +/** + * iecm_tx_splitq_map - Build the Tx flex descriptor + * @tx_q: queue to send buffer on + * @parms: pointer to splitq params struct + * @first: first buffer info buffer to use + * + * This function loops over the skb data pointed to by *first + * and gets a physical address for each memory location and programs + * it and the length into the transmit flex descriptor. + */ +static void +iecm_tx_splitq_map(struct iecm_queue *tx_q, + struct iecm_tx_splitq_params *parms, + struct iecm_tx_buf *first) +{ + union iecm_tx_flex_desc *tx_desc; + unsigned int data_len, size; + struct iecm_tx_buf *tx_buf; + u16 i = tx_q->next_to_use; + struct netdev_queue *nq; + struct sk_buff *skb; + skb_frag_t *frag; + u16 td_cmd = 0; + dma_addr_t dma; + + skb = first->skb; + + td_cmd = parms->offload.td_cmd; + parms->compl_tag = tx_q->tx_buf_key; + + data_len = skb->data_len; + size = skb_headlen(skb); + + tx_desc = IECM_FLEX_TX_DESC(tx_q, i); + + dma = dma_map_single(tx_q->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buf = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + unsigned int max_data = IECM_TX_MAX_DESC_DATA_ALIGNED; + + if (dma_mapping_error(tx_q->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buf, len, size); + dma_unmap_addr_set(tx_buf, dma, dma); + + /* align size to end of page */ + max_data += -dma & (IECM_TX_MAX_READ_REQ_SIZE - 1); + + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + + /* account for data chunks larger than the hardware + * can handle + */ + while (unlikely(size > IECM_TX_MAX_DESC_DATA)) { + parms->splitq_build_ctb(tx_desc, parms, td_cmd, + max_data); + + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IECM_FLEX_TX_DESC(tx_q, 0); + i = 0; + } + + dma += max_data; + size -= max_data; + + max_data = IECM_TX_MAX_DESC_DATA_ALIGNED; + /* buf_addr is in same location for both desc types */ + tx_desc->q.buf_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + parms->splitq_build_ctb(tx_desc, parms, td_cmd, size); + tx_desc++; + i++; + + if (i == tx_q->desc_count) { + tx_desc = IECM_FLEX_TX_DESC(tx_q, 0); + i = 0; + } + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_q->dev, frag, 0, size, + DMA_TO_DEVICE); + + tx_buf->compl_tag = parms->compl_tag; + tx_buf = &tx_q->tx_buf[i]; + } + + /* record bytecount for BQL */ + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); + netdev_tx_sent_queue(nq, first->bytecount); + + /* record SW timestamp if HW timestamp is not available */ + skb_tx_timestamp(first->skb); + + /* write last descriptor with RS and EOP bits */ + td_cmd |= parms->eop_cmd; + parms->splitq_build_ctb(tx_desc, parms, td_cmd, size); + i++; + if (i == tx_q->desc_count) + i = 0; + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + tx_buf->compl_tag = parms->compl_tag++; + + iecm_tx_buf_hw_update(tx_q, i, netdev_xmit_more()); + + /* Update TXQ Completion Tag key for next buffer */ + tx_q->tx_buf_key = parms->compl_tag; + + return; + +dma_error: + /* clear dma mappings for failed tx_buf map */ + for (;;) { + tx_buf = &tx_q->tx_buf[i]; + iecm_tx_buf_rel(tx_q, tx_buf); + if (tx_buf == first) + break; + if (i == 0) + i = tx_q->desc_count; + i--; + } + + tx_q->next_to_use = i; +} + +/** + * iecm_tx_prepare_vlan_flags - prepare generic vlan tagging for HW + * @tx_q: txq to find the tag location + * @first: pointer to struct iecm_tx_buf + * @skb: skb being xmitted + */ +void iecm_tx_prepare_vlan_flags(struct iecm_queue *tx_q, + struct iecm_tx_buf *first, + struct sk_buff *skb) +{ + struct iecm_vport *vport = tx_q->vport; + u32 tx_flags = 0; + + /* Stack sets protocol to 8021q when offload is disabled so SW can take + * any necessary steps to handle it. We don't need to do anything, + * just set protocol to encapsulated type. + */ + if (skb->protocol == htons(ETH_P_8021Q) && + !iecm_is_feature_ena(vport, NETIF_F_HW_VLAN_CTAG_RX)) { + skb->protocol = vlan_get_protocol(skb); + return; + } + + if (!skb_vlan_tag_present(skb)) + return; + + tx_flags |= skb_vlan_tag_get(skb) << IECM_TX_FLAGS_VLAN_SHIFT; + tx_flags |= IECM_TX_FLAGS_VLAN_TAG; + if (test_bit(__IECM_Q_VLAN_TAG_LOC_L2TAG2, tx_q->flags)) + tx_flags |= IECM_TX_FLAGS_HW_OUTER_SINGLE_VLAN; + else if (test_bit(__IECM_Q_VLAN_TAG_LOC_L2TAG1, tx_q->flags)) + tx_flags |= IECM_TX_FLAGS_HW_VLAN; + else + dev_dbg(&vport->adapter->pdev->dev, "Unsupported Tx VLAN tag location requested\n"); + + first->tx_flags |= tx_flags; +} + +/** + * iecm_tso - computes mss and TSO length to prepare for TSO + * @first: pointer to struct iecm_tx_buf + * @off: pointer to struct that holds offload parameters + * + * Returns error (negative) if TSO doesn't apply to the given skb, + * 0 otherwise. + * + * Note: this function can be used in the splitq and singleq paths + */ +int iecm_tso(struct iecm_tx_buf *first, struct iecm_tx_offload_params *off) +{ + struct sk_buff *skb = first->skb; + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + union { + struct tcphdr *tcp; + struct udphdr *udp; + unsigned char *hdr; + } l4; + u32 paylen, l4_start; + int err; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + ip.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + + /* initialize outer IP header fields */ + if (ip.v4->version == 4) { + ip.v4->tot_len = 0; + ip.v4->check = 0; + } else if (ip.v4->version == 6) { + ip.v6->payload_len = 0; + } else { + return -EINVAL; + } + + l4_start = skb_transport_offset(skb); + + /* remove payload length from checksum */ + paylen = skb->len - l4_start; + + switch (skb_shinfo(skb)->gso_type) { + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + csum_replace_by_diff(&l4.tcp->check, + (__force __wsum)htonl(paylen)); + + /* compute length of segmentation header */ + off->tso_hdr_len = tcp_hdrlen(skb) + l4_start; + break; + case SKB_GSO_UDP_L4: + csum_replace_by_diff(&l4.udp->check, + (__force __wsum)htonl(paylen)); + /* compute length of segmentation header */ + off->tso_hdr_len = sizeof(struct udphdr) + l4_start; + l4.udp->len = + htons(skb_shinfo(skb)->gso_size + + sizeof(struct udphdr)); + break; + default: + return -EINVAL; + } + + off->tso_len = skb->len - off->tso_hdr_len; + off->mss = skb_shinfo(skb)->gso_size; + + /* update gso_segs and bytecount */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount = qdisc_skb_cb(skb)->pkt_len; + + first->tx_flags |= IECM_TX_FLAGS_TSO; + + return 0; +} + +/** + * __iecm_chk_linearize - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + */ +static bool __iecm_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) +{ + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = skb_shinfo(skb)->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals@least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &skb_shinfo(skb)->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - skb_shinfo(skb)->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &skb_shinfo(skb)->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IECM_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IECM_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IECM_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IECM_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IECM_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** + * iecm_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs + * + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. We have to do some special checking around the boundary (max_bufs-1) + * if TSO is on since we need count the TSO header and payload separately. + * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO + * header, 1 for segment payload, and then 7 for the fragments. + */ +bool iecm_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count) +{ + if (likely(count < max_bufs)) + return false; + if (skb_is_gso(skb)) + return __iecm_chk_linearize(skb, max_bufs); + + return count != max_bufs; +} + +/** + * iecm_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors + * @skb: send buffer + * @tx_q: queue to send buffer on + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +static netdev_tx_t +iecm_tx_splitq_frame(struct sk_buff *skb, struct iecm_queue *tx_q) +{ + struct iecm_tx_splitq_params tx_parms = { + NULL, (enum iecm_tx_desc_dtype_value)0, 0, {0}, {0} + }; + struct iecm_tx_buf *first; + unsigned int count; + + count = iecm_tx_desc_count_required(skb); + if (iecm_chk_linearize(skb, tx_q->tx_max_bufs, count)) { + if (__skb_linearize(skb)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + count = iecm_size_to_txd_count(skb->len); + tx_q->vport->port_stats.tx_linearize++; + } + + if (iecm_tx_maybe_stop(tx_q, count + IECM_TX_DESCS_PER_CACHE_LINE + + IECM_TX_DESCS_FOR_CTX)) { + return NETDEV_TX_BUSY; + } + + /* Also check for available book keeping buffers */ + if (iecm_tx_buf_avail(tx_q)) + return NETDEV_TX_BUSY; + + /* record the location of the first descriptor for this packet */ + first = &tx_q->tx_buf[tx_q->next_to_use]; + first->skb = skb; + first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN); + first->gso_segs = 1; + first->tx_flags = 0; + + iecm_tx_prepare_vlan_flags(tx_q, first, skb); + + if (iecm_tso(first, &tx_parms.offload) < 0) { + /* If tso returns an error, drop the packet */ + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (first->tx_flags & IECM_TX_FLAGS_TSO) { + /* If tso is needed, set up context desc */ + union iecm_flex_tx_ctx_desc *ctx_desc; + int i = tx_q->next_to_use; + + /* grab the next descriptor */ + ctx_desc = IECM_FLEX_TX_CTX_DESC(tx_q, i); + i++; + tx_q->next_to_use = (i < tx_q->desc_count) ? i : 0; + + ctx_desc->tso.qw1.cmd_dtype = + cpu_to_le16(IECM_TX_DESC_DTYPE_FLEX_TSO_CTX | + IECM_TX_FLEX_CTX_DESC_CMD_TSO); + ctx_desc->tso.qw0.flex_tlen = + cpu_to_le32(tx_parms.offload.tso_len & + IECM_TXD_FLEX_CTX_TLEN_M); + ctx_desc->tso.qw0.mss_rt = + cpu_to_le16(tx_parms.offload.mss & + IECM_TXD_FLEX_CTX_MSS_RT_M); + ctx_desc->tso.qw0.hdr_len = tx_parms.offload.tso_hdr_len; + + u64_stats_update_begin(&tx_q->stats_sync); + tx_q->q_stats.tx.lso_pkts++; + u64_stats_update_end(&tx_q->stats_sync); + } + + if (test_bit(__IECM_Q_FLOW_SCH_EN, tx_q->flags)) { + tx_parms.dtype = IECM_TX_DESC_DTYPE_FLEX_FLOW_SCHE; + tx_parms.splitq_build_ctb = iecm_tx_splitq_build_flow_desc; + tx_parms.eop_cmd = + IECM_TXD_FLEX_FLOW_CMD_EOP | IECM_TXD_FLEX_FLOW_CMD_RE; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_parms.offload.td_cmd |= IECM_TXD_FLEX_FLOW_CMD_CS_EN; + + } else { + tx_parms.dtype = IECM_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; + tx_parms.splitq_build_ctb = iecm_tx_splitq_build_ctb; + tx_parms.eop_cmd = IECM_TXD_LAST_DESC_CMD; + + if (skb->ip_summed == CHECKSUM_PARTIAL) + tx_parms.offload.td_cmd |= IECM_TX_FLEX_DESC_CMD_CS_EN; + + /* VLAN Offload can only be used with queue based scheduling */ + if (first->tx_flags & IECM_TX_FLAGS_VLAN_TAG) { + tx_parms.offload.td_cmd |= (u64)IECM_TX_FLEX_DESC_CMD_IL2TAG1; + tx_parms.td_tag = (first->tx_flags & IECM_TX_FLAGS_VLAN_MASK) >> + IECM_TX_FLAGS_VLAN_SHIFT; + } + } + + iecm_tx_splitq_map(tx_q, &tx_parms, first); + + return NETDEV_TX_OK; +} + +/** + * iecm_tx_splitq_start - Selects the right Tx queue to send buffer + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t iecm_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev) +{ + struct iecm_vport *vport = iecm_netdev_to_vport(netdev); + struct iecm_queue *tx_q; + + if (skb->queue_mapping >= vport->num_txq) + return -EINVAL; + + tx_q = vport->txqs[skb->queue_mapping]; + + /* hardware can't handle really short frames, hardware padding works + * beyond this point + */ + if (skb_put_padto(skb, IECM_TX_MIN_LEN)) + return NETDEV_TX_OK; + + return iecm_tx_splitq_frame(skb, tx_q); +} + /** * iecm_vport_intr_clean_queues - MSIX mode Interrupt Handler * @irq: interrupt number diff --git a/drivers/net/ethernet/intel/include/iecm.h b/drivers/net/ethernet/intel/include/iecm.h index 4304256f7010..f6f9884c10c2 100644 --- a/drivers/net/ethernet/intel/include/iecm.h +++ b/drivers/net/ethernet/intel/include/iecm.h @@ -717,6 +717,7 @@ int iecm_send_alloc_vectors_msg(struct iecm_adapter *adapter, u16 num_vectors); int iecm_vport_params_buf_alloc(struct iecm_adapter *adapter); void iecm_vport_params_buf_rel(struct iecm_adapter *adapter); struct iecm_vport *iecm_netdev_to_vport(struct net_device *netdev); +struct iecm_adapter *iecm_netdev_to_adapter(struct net_device *netdev); int iecm_send_get_stats_msg(struct iecm_vport *vport); int iecm_get_vec_ids(struct iecm_adapter *adapter, u16 *vecids, int num_vecids, diff --git a/drivers/net/ethernet/intel/include/iecm_txrx.h b/drivers/net/ethernet/intel/include/iecm_txrx.h index 5e29148938fb..26e480343876 100644 --- a/drivers/net/ethernet/intel/include/iecm_txrx.h +++ b/drivers/net/ethernet/intel/include/iecm_txrx.h @@ -115,6 +115,11 @@ #define MAKEMASK(m, s) ((m) << (s)) +union iecm_tx_flex_desc { + struct iecm_flex_tx_desc q; /* queue based scheduling */ + struct iecm_flex_tx_sched_desc flow; /* flow based scheduling */ +}; + struct iecm_tx_buf { struct hlist_node hlist; void *next_to_watch; @@ -145,6 +150,37 @@ struct iecm_buf_lifo { struct iecm_tx_buf **bufs; }; +struct iecm_tx_offload_params { + u16 td_cmd; /* command field to be inserted into descriptor */ + u32 tso_len; /* total length of payload to segment */ + u16 mss; + u8 tso_hdr_len; /* length of headers to be duplicated */ + + /* Flow scheduling offload timestamp, formatting as hw expects it */ + /* timestamp = bits[0:22], overflow = bit[23] */ + u8 desc_ts[3]; + + /* For legacy offloads */ + u32 hdr_offsets; +}; + +struct iecm_tx_splitq_params { + /* Descriptor build function pointer */ + void (*splitq_build_ctb)(union iecm_tx_flex_desc *desc, + struct iecm_tx_splitq_params *params, + u16 td_cmd, u16 size); + + /* General descriptor info */ + enum iecm_tx_desc_dtype_value dtype; + u16 eop_cmd; + union { + u16 compl_tag; /* only relevant for flow scheduling */ + u16 td_tag; /* only relevant for queue scheduling */ + }; + + struct iecm_tx_offload_params offload; +}; + /* Checksum offload bits decoded from the receive descriptor. */ struct iecm_rx_csum_decoded { u8 l3l4p : 1; @@ -588,6 +624,12 @@ struct iecm_txq_group { struct iecm_adapter; +void iecm_tx_splitq_build_ctb(union iecm_tx_flex_desc *desc, + struct iecm_tx_splitq_params *parms, + u16 td_cmd, u16 size); +void iecm_tx_splitq_build_flow_desc(union iecm_tx_flex_desc *desc, + struct iecm_tx_splitq_params *parms, + u16 td_cmd, u16 size); int iecm_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void iecm_vport_init_num_qs(struct iecm_vport *vport, struct virtchnl2_create_vport *vport_msg); @@ -614,7 +656,25 @@ int iecm_init_rss(struct iecm_vport *vport); void iecm_deinit_rss(struct iecm_vport *vport); bool iecm_init_rx_buf_hw_alloc(struct iecm_queue *rxq, struct iecm_rx_buf *buf); void iecm_rx_buf_hw_update(struct iecm_queue *rxq, u32 val); +void iecm_tx_buf_hw_update(struct iecm_queue *tx_q, u32 val, + bool xmit_more); void iecm_tx_buf_rel(struct iecm_queue *tx_q, struct iecm_tx_buf *tx_buf); +unsigned int iecm_size_to_txd_count(unsigned int size); +unsigned int iecm_tx_desc_count_required(struct sk_buff *skb); +bool iecm_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, + unsigned int count); +int iecm_tx_maybe_stop(struct iecm_queue *tx_q, unsigned int size); +void iecm_tx_timeout(struct net_device *netdev, + unsigned int __always_unused txqueue); +netdev_tx_t iecm_tx_splitq_start(struct sk_buff *skb, + struct net_device *netdev); +netdev_tx_t iecm_tx_singleq_start(struct sk_buff *skb, + struct net_device *netdev); bool iecm_rx_singleq_buf_hw_alloc_all(struct iecm_queue *rxq, u16 cleaned_count); +int iecm_tso(struct iecm_tx_buf *first, struct iecm_tx_offload_params *off); +void iecm_tx_prepare_vlan_flags(struct iecm_queue *tx_q, + struct iecm_tx_buf *first, + struct sk_buff *skb); + #endif /* !_IECM_TXRX_H_ */ -- 2.33.0