From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753051AbcFFSZZ (ORCPT ); Mon, 6 Jun 2016 14:25:25 -0400 Received: from mail-pf0-f194.google.com ([209.85.192.194]:35904 "EHLO mail-pf0-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752769AbcFFSZT (ORCPT ); Mon, 6 Jun 2016 14:25:19 -0400 Subject: Re: [PATCH 1/5] ethernet: add sun8i-emac driver To: LABBE Corentin , robh+dt@kernel.org, pawel.moll@arm.com, mark.rutland@arm.com, ijc+devicetree@hellion.org.uk, galak@codeaurora.org, maxime.ripard@free-electrons.com, wens@csie.org, linux@armlinux.org.uk, davem@davemloft.net References: <1464947790-22991-1-git-send-email-clabbe.montjoie@gmail.com> <1464947790-22991-2-git-send-email-clabbe.montjoie@gmail.com> Cc: netdev@vger.kernel.org, devicetree@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, linux-sunxi@googlegroups.com From: Florian Fainelli Message-ID: <5755C00B.2040203@gmail.com> Date: Mon, 6 Jun 2016 11:25:15 -0700 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.8.0 MIME-Version: 1.0 In-Reply-To: <1464947790-22991-2-git-send-email-clabbe.montjoie@gmail.com> Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 06/03/2016 02:56 AM, LABBE Corentin wrote: > This patch add support for sun8i-emac ethernet MAC hardware. > It could be found in Allwinner H3/A83T/A64 SoCs. > > It supports 10/100/1000 Mbit/s speed with half/full duplex. > It can use an internal PHY (MII 10/100) or an external PHY > via RGMII/RMII. > > Signed-off-by: LABBE Corentin > --- [snip] > + > +struct ethtool_str { > + char name[ETH_GSTRING_LEN]; You might as well drop the encapsulating structure and just use an array of strings? > +}; > + [snip] > + > +/* The datasheet said that each descriptor can transfers up to 4096bytes > + * But latter, a register documentation reduce that value to 2048 > + * Anyway using 2048 cause strange behaviours and even BSP driver use 2047 > + */ > +#define DESC_BUF_MAX 2044 > +#if (DESC_BUF_MAX < (ETH_FRAME_LEN + 4)) > +#error "DESC_BUF_MAX must be set at minimum to ETH_FRAME_LEN + 4" > +#endif You can probably drop that, it would not make much sense to enable fragments and a buffer size smaller than ETH_FRAME_LEN + ETH_FCS_LEN anyway. > + > +/* MAGIC value for knowing if a descriptor is available or not */ > +#define DCLEAN (BIT(16) | BIT(14) | BIT(12) | BIT(10) | BIT(9)) > + > +/* Structure of DMA descriptor used by the hardware */ > +struct dma_desc { > + u32 status; /* status of the descriptor */ > + u32 st; /* Information on the frame */ > + u32 buf_addr; /* physical address of the frame data */ > + u32 next; /* physical address of next dma_desc */ > +} __packed __aligned(4); This has been noticed in other emails, no need for the __packed here, they are all naturally aligned. > + > +/* Benched on OPIPC with 100M, setting more than 256 does not give any > + * perf boost > + */ > +static int nbdesc_tx = 256; > +module_param(nbdesc_tx, int, S_IRUGO | S_IWUSR); > +MODULE_PARM_DESC(nbdesc_tx, "Number of descriptors in the TX list"); > +static int nbdesc_rx = 128; > +module_param(nbdesc_rx, int, S_IRUGO | S_IWUSR); > +MODULE_PARM_DESC(nbdesc_rx, "Number of descriptors in the RX list"); This needs to be statically defined to begin driver operation with, and then implement the ethtool operations to re-size the rings would you want that. [snip] > +/* Return the number of contiguous free descriptors > + * starting from tx_slot > + */ > +static int rb_tx_numfreedesc(struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + > + if (priv->tx_slot < priv->tx_dirty) > + return priv->tx_dirty - priv->tx_slot; Does this work with if tx_dirty wraps around? > + > + return (nbdesc_tx - priv->tx_slot) + priv->tx_dirty; > +} > + > +/* Allocate a skb in a DMA descriptor > + * > + * @i index of slot to fill > +*/ > +static int sun8i_emac_rx_sk(struct net_device *ndev, int i) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct dma_desc *ddesc; > + struct sk_buff *sk; The networking stack typically refers to "sk" as socket and skb as socket buffers. > + > + ddesc = priv->dd_rx + i; > + > + ddesc->st = 0; > + > + sk = netdev_alloc_skb_ip_align(ndev, DESC_BUF_MAX); > + if (!sk) > + return -ENOMEM; > + > + /* should not happen */ > + if (unlikely(priv->rx_sk[i])) > + dev_warn(priv->dev, "BUG: Leaking a skbuff\n"); > + > + priv->rx_sk[i] = sk; > + > + ddesc->buf_addr = dma_map_single(priv->dev, sk->data, > + DESC_BUF_MAX, DMA_FROM_DEVICE); > + if (dma_mapping_error(priv->dev, ddesc->buf_addr)) { > + dev_err(priv->dev, "ERROR: Cannot dma_map RX buffer\n"); > + dev_kfree_skb(sk); > + return -EFAULT; > + } > + ddesc->st |= DESC_BUF_MAX; > + ddesc->status = BIT(31); You are missing a lightweight barrier here to ensure there is no re-ordering done by the compiler in how you write to the descriptors in DRAM, even though they are allocated from dma_alloc_coherent(). [snip] > +static void sun8i_emac_set_link_mode(struct sun8i_emac_priv *priv) > +{ > + u32 v; > + > + v = readl(priv->base + SUN8I_EMAC_BASIC_CTL0); > + > + if (priv->duplex) > + v |= BIT(0); > + else > + v &= ~BIT(0); > + > + v &= ~0x0C; > + switch (priv->speed) { > + case 1000: > + break; > + case 100: > + v |= BIT(2); > + v |= BIT(3); > + break; > + case 10: > + v |= BIT(3); > + break; > + } Proper defines for all of these bits and masks? > + > + writel(v, priv->base + SUN8I_EMAC_BASIC_CTL0); > +} > + > +static void sun8i_emac_flow_ctrl(struct sun8i_emac_priv *priv, int duplex, > + int fc, int pause) > +{ > + u32 flow = 0; pause is unused (outside of printing it) here > + > + netif_dbg(priv, link, priv->ndev, "%s %d %d %d\n", __func__, > + duplex, fc, pause); > + > + flow = readl(priv->base + SUN8I_EMAC_RX_CTL0); > + if (fc & FLOW_RX) > + flow |= BIT(16); > + else > + flow &= ~BIT(16); > + writel(flow, priv->base + SUN8I_EMAC_RX_CTL0); > + > + flow = readl(priv->base + SUN8I_EMAC_TX_FLOW_CTL); > + if (fc & FLOW_TX) > + flow |= BIT(0); > + else > + flow &= ~BIT(0); > + writel(flow, priv->base + SUN8I_EMAC_TX_FLOW_CTL); > +} > + > +/* Grab a frame into a skb from descriptor number i */ > +static int sun8i_emac_rx_from_ddesc(struct net_device *ndev, int i) > +{ > + struct sk_buff *skb; > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct dma_desc *ddesc = priv->dd_rx + i; > + int frame_len; > + int crc_checked = 0; > + > + if (ndev->features & NETIF_F_RXCSUM) > + crc_checked = 1; Assuming CRC here refers to the Ethernet frame's FCS, then this is absolutely not how NETIF_F_RXCSUM works. NETIF_F_RXCSUM is about your Ethernet adapter supporting L3/L4 checksum offloads, while the Ethernet FCS is pretty much mandatory for the frame to be properly received in the first place. Can you clarify which way it is? > + > + /* bit0/bit7 work only on IPv4/IPv6 TCP traffic, > + * (not on ARP for example) so we dont raise rx_errors/discard frame > + */ > + /* the checksum or length of received frame's payload is wrong*/ > + if (ddesc->status & BIT(0)) { > + priv->estats.rx_payload_error++; > + crc_checked = 0; > + } > + if (ddesc->status & BIT(1)) { > + priv->ndev->stats.rx_errors++; > + priv->ndev->stats.rx_crc_errors++; > + priv->estats.rx_crc_error++; > + goto discard_frame; > + } > + if ((ddesc->status & BIT(3))) { > + priv->ndev->stats.rx_errors++; > + priv->estats.rx_phy_error++; > + goto discard_frame; > + } > + if ((ddesc->status & BIT(4))) { > + priv->ndev->stats.rx_errors++; > + priv->ndev->stats.rx_length_errors++; > + priv->estats.rx_length_error++; > + goto discard_frame; > + } > + if ((ddesc->status & BIT(6))) { > + priv->ndev->stats.rx_errors++; > + priv->estats.rx_col_error++; > + goto discard_frame; > + } > + if ((ddesc->status & BIT(7))) { > + priv->estats.rx_header_error++; > + crc_checked = 0; > + } > + if ((ddesc->status & BIT(11))) { > + priv->ndev->stats.rx_over_errors++; > + priv->estats.rx_overflow_error++; > + goto discard_frame; > + } > + if ((ddesc->status & BIT(14))) { > + priv->ndev->stats.rx_errors++; > + priv->estats.rx_buf_error++; > + goto discard_frame; > + } Please define what these bits are. > + > + if ((ddesc->status & BIT(9)) == 0) { > + /* begin of a Jumbo frame */ > + dev_warn(priv->dev, "This should not happen\n"); > + goto discard_frame; > + } This should be ratelimited at the very least, if there is a mis configuration, chances are that you could see this happen fairly often. > + frame_len = (ddesc->status >> 16) & 0x3FFF; > + if (!(ndev->features & NETIF_F_RXFCS)) > + frame_len -= ETH_FCS_LEN; > + > + skb = priv->rx_sk[i]; > + > + netif_dbg(priv, rx_status, priv->ndev, > + "%s from %02d %pad len=%d status=%x st=%x\n", > + __func__, i, &ddesc, frame_len, ddesc->status, ddesc->st); > + > + skb_put(skb, frame_len); > + > + dma_unmap_single(priv->dev, ddesc->buf_addr, DESC_BUF_MAX, > + DMA_FROM_DEVICE); > + skb->protocol = eth_type_trans(skb, priv->ndev); > + if (crc_checked) { > + skb->ip_summed = CHECKSUM_UNNECESSARY; > + priv->estats.rx_hw_csum++; > + } else { > + skb->ip_summed = CHECKSUM_PARTIAL; > + } > + skb->dev = priv->ndev; eth_type_trans() already does that. > + > + priv->ndev->stats.rx_packets++; > + priv->ndev->stats.rx_bytes += frame_len; > + priv->rx_sk[i] = NULL; > + > + /* this frame is not the last */ > + if ((ddesc->status & BIT(8)) == 0) { > + dev_warn(priv->dev, "Multi frame not implemented currlen=%d\n", > + frame_len); > + } > + > + sun8i_emac_rx_sk(ndev, i); > + > + netif_rx(skb); netif_receive_skb() at the very least, or if you implement NAPI, like you shoud napi_gro_receive(). > + > + return 0; > + /* If the frame need to be dropped, we simply reuse the buffer */ > +discard_frame: > + ddesc->st = DESC_BUF_MAX; > + ddesc->status = BIT(31); Same here, there does not seem to be any barrier to ensure proper ordering. > + return 0; > +} > + > +/* Cycle over RX DMA descriptors for finding frame to receive > + */ > +static int sun8i_emac_receive_all(struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct dma_desc *ddesc; > + > + ddesc = priv->dd_rx + priv->rx_dirty; > + while (!(ddesc->status & BIT(31))) { > + sun8i_emac_rx_from_ddesc(ndev, priv->rx_dirty); > + rb_inc(&priv->rx_dirty, nbdesc_rx); > + ddesc = priv->dd_rx + priv->rx_dirty; > + }; So, what if we ping flood your device here, is not there a remote chance that we keep the RX interrupt so busy we can't break out of this loop, and we are executing from hard IRQ context, that's bad. > + > + return 0; > +} > + > +/* iterate over dma_desc for finding completed xmit. > + * Called from interrupt context, so no need to spinlock tx Humm, well it depends if what you are doing here may race with ndo_start_xmit(), and usually it does. Also, you should consider completing TX packets in NAPI context (soft IRQ) instead of hard IRQs like here. [snip] > +static int sun8i_emac_mdio_register(struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct mii_bus *bus; > + int ret; > + > + bus = devm_mdiobus_alloc(priv->dev); > + if (!bus) { > + netdev_err(ndev, "Failed to allocate new mdio bus\n"); > + return -ENOMEM; > + } > + > + bus->name = dev_name(priv->dev); > + bus->read = &sun8i_mdio_read; > + bus->write = &sun8i_mdio_write; > + snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%x", bus->name, 0); If there are two of these adapters or more you will start seeing conflicts, give this a better unique name that uses pdev->id or something like that. > + > + bus->parent = priv->dev; > + bus->priv = ndev; > + > + ret = of_mdiobus_register(bus, priv->dev->of_node); > + if (ret) { > + netdev_err(ndev, "Could not register as MDIO bus: %d\n", ret); > + return ret; > + } > + > + priv->mdio = bus; > + > + return 0; > +} > + > +static void sun8i_emac_adjust_link(struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct phy_device *phydev = ndev->phydev; > + unsigned long flags; > + int new_state = 0; > + > + netif_dbg(priv, link, priv->ndev, > + "%s link=%x duplex=%x speed=%x\n", __func__, > + phydev->link, phydev->duplex, phydev->speed); > + if (!phydev) > + return; > + > + spin_lock_irqsave(&priv->lock, flags); You are executing under the phydev->lock mutex already, what is this achieving? [snip] > + priv->tx_slot = 0; > + priv->tx_dirty = 0; > + priv->rx_dirty = 0; > + > + priv->rx_sk = kcalloc(nbdesc_rx, sizeof(struct sk_buff *), GFP_KERNEL); > + if (!priv->rx_sk) { > + err = -ENOMEM; > + goto rx_sk_error; > + } > + priv->tx_sk = kcalloc(nbdesc_tx, sizeof(struct sk_buff *), GFP_KERNEL); > + if (!priv->tx_sk) { > + err = -ENOMEM; > + goto tx_sk_error; > + } > + priv->tx_map = kcalloc(nbdesc_tx, sizeof(int), GFP_KERNEL); > + if (!priv->tx_map) { > + err = -ENOMEM; > + goto tx_map_error; > + } You are allocating two arrays of the same size, why not combine them into a single one which holds both the tx_sk and the tx_map? > + > + priv->dd_rx = dma_alloc_coherent(priv->dev, > + nbdesc_rx * sizeof(struct dma_desc), > + &priv->dd_rx_phy, > + GFP_KERNEL); > + if (!priv->dd_rx) { > + dev_err(priv->dev, "ERROR: cannot DMA RX"); > + err = -ENOMEM; > + goto dma_rx_error; > + } > + memset(priv->dd_rx, 0, nbdesc_rx * sizeof(struct dma_desc)); > + ddesc = priv->dd_rx; > + for (i = 0; i < nbdesc_rx; i++) { > + sun8i_emac_rx_sk(ndev, i); > + ddesc->next = (u32)priv->dd_rx_phy + (i + 1) > + * sizeof(struct dma_desc); > + ddesc++; > + } > + /* last descriptor point back to first one */ > + ddesc--; > + ddesc->next = (u32)priv->dd_rx_phy; So is there a limitation of this hardware that can only do DMA within the first 4GB of the system? > + > + priv->dd_tx = dma_alloc_coherent(priv->dev, > + nbdesc_tx * sizeof(struct dma_desc), > + &priv->dd_tx_phy, > + GFP_KERNEL); > + if (!priv->dd_tx) { > + dev_err(priv->dev, "ERROR: cannot DMA TX"); > + err = -ENOMEM; > + goto dma_tx_error; > + } > + memset(priv->dd_tx, 0, nbdesc_tx * sizeof(struct dma_desc)); dma_zalloc_coherent()? > + ddesc = priv->dd_tx; > + for (i = 0; i < nbdesc_tx; i++) { > + ddesc->status = DCLEAN; > + ddesc->st = 0; > + ddesc->next = (u32)(priv->dd_tx_phy + (i + 1) > + * sizeof(struct dma_desc)); > + ddesc++; > + } > + /* last descriptor point back to first one */ > + ddesc--; > + ddesc->next = (u32)priv->dd_tx_phy; > + i--; > + > + if (ndev->phydev) > + phy_start(ndev->phydev); You can't get here without a valid phydev pointer. [snip] > +static int sun8i_emac_stop(struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + int i; > + struct dma_desc *ddesc; > + > + /* Stop receiver */ > + writel(0, priv->base + SUN8I_EMAC_RX_CTL0); > + writel(0, priv->base + SUN8I_EMAC_RX_CTL1); > + /* Stop transmitter */ > + writel(0, priv->base + SUN8I_EMAC_TX_CTL0); > + writel(0, priv->base + SUN8I_EMAC_TX_CTL1); > + > + netif_stop_queue(ndev); > + netif_carrier_off(ndev); phy_stop() does that. > + > + phy_stop(ndev->phydev); > + phy_disconnect(ndev->phydev); > + > + /* clean RX ring */ > + for (i = 0; i < nbdesc_rx; i++) > + if (priv->rx_sk[i]) { > + ddesc = priv->dd_rx + i; > + dma_unmap_single(priv->dev, ddesc->buf_addr, > + DESC_BUF_MAX, DMA_FROM_DEVICE); > + dev_kfree_skb_any(priv->rx_sk[i]); > + priv->rx_sk[i] = NULL; > + } > + sun8i_emac_tx_clean(ndev); > + > + kfree(priv->rx_sk); > + kfree(priv->tx_sk); > + kfree(priv->tx_map); > + > + dma_free_coherent(priv->dev, nbdesc_rx * sizeof(struct dma_desc), > + priv->dd_rx, priv->dd_rx_phy); > + dma_free_coherent(priv->dev, nbdesc_tx * sizeof(struct dma_desc), > + priv->dd_tx, priv->dd_tx_phy); > + > + return 0; > +} > + > +static netdev_tx_t sun8i_emac_xmit(struct sk_buff *skb, struct net_device *ndev) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct dma_desc *ddesc; > + struct dma_desc *first; > + int i = 0, rbd_first; > + unsigned int len, fraglen; > + u32 v; > + int n; > + int nf; > + const skb_frag_t *frag; > + int do_csum = 0; > + > + len = skb_headlen(skb); > + if (len < ETH_ZLEN) { > + if (skb_padto(skb, ETH_ZLEN)) > + return NETDEV_TX_OK; > + len = ETH_ZLEN; > + } skb_put_padto() would be able to do a bit of that. > + n = skb_shinfo(skb)->nr_frags; > + > + if (skb->ip_summed == CHECKSUM_PARTIAL) { > + do_csum = 1; > + priv->estats.tx_hw_csum++; > + } > + netif_dbg(priv, tx_queued, ndev, "%s len=%u skblen=%u %x\n", __func__, > + len, skb->len, > + (skb->ip_summed == CHECKSUM_PARTIAL)); > + > + spin_lock(&priv->tx_lock); > + > + /* check for contigous space > + * We need at least 1(skb->data) + n(numfrags) + 1(one clean slot) > + */ > + if (rb_tx_numfreedesc(ndev) < n + 2) { > + dev_err_ratelimited(priv->dev, "BUG!: TX is full %d %d\n", > + priv->tx_dirty, priv->tx_slot); > + netif_stop_queue(ndev); > + spin_unlock(&priv->tx_lock); > + return NETDEV_TX_BUSY; > + } > + i = priv->tx_slot; > + > + ddesc = priv->dd_tx + i; > + first = priv->dd_tx + i; > + rbd_first = i; > + > + priv->tx_slot = (i + 1 + n) % nbdesc_tx; > + > + ddesc->buf_addr = dma_map_single(priv->dev, skb->data, len, > + DMA_TO_DEVICE); > + if (dma_mapping_error(priv->dev, ddesc->buf_addr)) { > + dev_err(priv->dev, "ERROR: Cannot dmamap buf\n"); > + goto xmit_error; > + } > + priv->tx_map[i] = MAP_SINGLE; > + priv->tx_sk[i] = skb; > + priv->ndev->stats.tx_packets++; > + priv->ndev->stats.tx_bytes += len; You should not increment stats until you had a schance to complete the actual transmission of the packets, there could be many reasons why these are not transmited. > + > + ddesc->st = len; > + /* undocumented bit that make it works */ > + ddesc->st |= BIT(24); > + if (do_csum) > + ddesc->st |= SUN8I_EMAC_TX_DO_CRC; Missing barriers. > + > + /* handle fragmented skb, one descriptor per fragment */ > + for (nf = 0; nf < n; nf++) { > + frag = &skb_shinfo(skb)->frags[nf]; > + rb_inc(&i, nbdesc_tx); > + priv->tx_sk[i] = skb; > + ddesc = priv->dd_tx + i; > + fraglen = skb_frag_size(frag); > + ddesc->st = fraglen; > + priv->ndev->stats.tx_bytes += fraglen; > + ddesc->st |= BIT(24); > + if (do_csum) > + ddesc->st |= SUN8I_EMAC_TX_DO_CRC; > + > + ddesc->buf_addr = skb_frag_dma_map(priv->dev, frag, 0, > + fraglen, DMA_TO_DEVICE); > + if (dma_mapping_error(priv->dev, ddesc->buf_addr)) { > + dev_err(priv->dev, "DMA MAP ERROR\n"); > + goto xmit_error; > + } > + priv->tx_map[i] = MAP_PAGE; > + ddesc->status = BIT(31); > + } > + > + /* frame end */ > + ddesc->st |= BIT(30); > + /* We want an interrupt after transmission */ > + ddesc->st |= BIT(31); > + > + rb_inc(&i, nbdesc_tx); > + > + /* frame begin */ > + first->st |= BIT(29); > + first->status = BIT(31); > + priv->tx_slot = i; > + > + /* Trying to optimize this (recording DMA start/stop) seems > + * to lead to errors. So we always start DMA. > + */ > + v = readl(priv->base + SUN8I_EMAC_TX_CTL1); > + /* TX DMA START */ > + v |= BIT(31); > + /* Start an run DMA */ > + v |= BIT(30); > + writel(v, priv->base + SUN8I_EMAC_TX_CTL1); > + > + if (rb_tx_numfreedesc(ndev) < MAX_SKB_FRAGS + 1) { > + netif_stop_queue(ndev); > + priv->estats.tx_stop_queue++; > + } > + priv->estats.tx_used_desc = rb_tx_numfreedesc(ndev); > + > + spin_unlock(&priv->tx_lock); > + > + return NETDEV_TX_OK; > + > +xmit_error: > + /* destroy skb and return TX OK Documentation/DMA-API-HOWTO.txt */ > + /* clean descritors from rbd_first to i */ > + ddesc->st = 0; > + ddesc->status = DCLEAN; > + do { > + ddesc = priv->dd_tx + rbd_first; > + ddesc->st = 0; > + ddesc->status = DCLEAN; > + rb_inc(&rbd_first, nbdesc_tx); > + } while (rbd_first != i); > + spin_unlock(&priv->tx_lock); > + dev_kfree_skb_any(skb); > + return NETDEV_TX_OK; > +} > + [snip] > + > +static int sun8i_emac_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) > +{ > + struct phy_device *phydev = ndev->phydev; > + > + if (!netif_running(ndev)) > + return -EINVAL; > + > + if (!phydev) > + return -ENODEV; You don't allow your MDIO probe function not to have a phydev so this cannot really happen by the time you are here? > + > + return phy_mii_ioctl(phydev, rq, cmd); > +} > + > +static int sun8i_emac_check_if_running(struct net_device *ndev) > +{ > + if (!netif_running(ndev)) > + return -EBUSY; This does not seem like the intended usage of a > + return 0; > +} > + > +static int sun8i_emac_get_sset_count(struct net_device *ndev, int sset) > +{ > + switch (sset) { > + case ETH_SS_STATS: > + return ARRAY_SIZE(estats_str); > + } > + return -EOPNOTSUPP; > +} > + > +static int sun8i_emac_ethtool_get_settings(struct net_device *ndev, > + struct ethtool_cmd *cmd) > +{ > + struct phy_device *phy = ndev->phydev; > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + > + if (!phy) { > + netdev_err(ndev, "%s: %s: PHY is not registered\n", > + __func__, ndev->name); > + return -ENODEV; > + } > + > + if (!netif_running(ndev)) { > + dev_err(priv->dev, "interface disabled: we cannot track link speed / duplex setting\n"); > + return -EBUSY; > + } > + > + cmd->transceiver = XCVR_INTERNAL; If you write a PHY driver for the internal PHY, and set PHY_IS_INTERNAL in the flags of that driver (see bcm63xx.c and bcm7xxx.c in drivers/net/phy/*), this is set automatically by the core PHYLIB code. > + return phy_ethtool_gset(phy, cmd); > +} > + [snip] > + > +static void sun8i_emac_get_pauseparam(struct net_device *ndev, > + struct ethtool_pauseparam *pause) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + > + pause->rx_pause = 0; > + pause->tx_pause = 0; > + pause->autoneg = ndev->phydev->autoneg; > + > + if (priv->flow_ctrl & FLOW_RX) > + pause->rx_pause = 1; > + if (priv->flow_ctrl & FLOW_TX) > + pause->tx_pause = 1; > +} > + > +static int sun8i_emac_set_pauseparam(struct net_device *ndev, > + struct ethtool_pauseparam *pause) > +{ > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + struct phy_device *phy = ndev->phydev; > + int new_pause = 0; > + int ret = 0; > + > + if (pause->rx_pause) > + new_pause |= FLOW_RX; > + if (pause->tx_pause) > + new_pause |= FLOW_TX; > + > + priv->flow_ctrl = new_pause; > + phy->autoneg = pause->autoneg; > + > + if (phy->autoneg) { > + if (netif_running(ndev)) > + ret = phy_start_aneg(phy); > + } else { > + sun8i_emac_flow_ctrl(priv, phy->duplex, priv->flow_ctrl, > + priv->pause); > + } > + return ret; > +} > + > +static const struct ethtool_ops sun8i_emac_ethtool_ops = { > + .begin = sun8i_emac_check_if_running, > + .get_settings = sun8i_emac_ethtool_get_settings, > + .set_settings = sun8i_emac_ethtool_set_settings, > + .get_link = ethtool_op_get_link, > + .get_pauseparam = sun8i_emac_get_pauseparam, > + .set_pauseparam = sun8i_emac_set_pauseparam, > + .get_ethtool_stats = sun8i_emac_ethtool_stats, > + .get_strings = sun8i_emac_ethtool_strings, > + .get_wol = NULL, > + .set_wol = NULL, Not necessary > + .get_sset_count = sun8i_emac_get_sset_count, > + .get_drvinfo = sun8i_emac_ethtool_getdrvinfo, > + .get_msglevel = sun8i_emac_ethtool_getmsglevel, > + .set_msglevel = sun8i_emac_ethtool_setmsglevel, > +}; > + > +static const struct net_device_ops sun8i_emac_netdev_ops = { > + .ndo_init = sun8i_emac_init, > + .ndo_uninit = sun8i_emac_uninit, > + .ndo_open = sun8i_emac_open, > + .ndo_start_xmit = sun8i_emac_xmit, > + .ndo_stop = sun8i_emac_stop, > + .ndo_change_mtu = sun8i_emac_change_mtu, > + .ndo_fix_features = sun8i_emac_fix_features, > + .ndo_set_rx_mode = sun8i_emac_set_rx_mode, > + .ndo_tx_timeout = sun8i_emac_tx_timeout, > + .ndo_do_ioctl = sun8i_emac_ioctl, > + .ndo_set_mac_address = eth_mac_addr, > + .ndo_set_features = sun8i_emac_set_features, > +}; > + > +static irqreturn_t sun8i_emac_dma_interrupt(int irq, void *dev_id) > +{ > + struct net_device *ndev = (struct net_device *)dev_id; No need for a cast here. > + struct sun8i_emac_priv *priv = netdev_priv(ndev); > + u32 v, u; > + > + v = readl(priv->base + SUN8I_EMAC_INT_STA); > + > + netif_info(priv, intr, ndev, "%s %x\n", __func__, v); This is a fastpath, you would want something that does not have to test against netif_msg_##type at all here. > + > + /* When this bit is asserted, a frame transmission is completed. */ > + if (v & BIT(0)) > + sun8i_emac_complete_xmit(ndev); > + > + /* When this bit is asserted, the TX DMA FSM is stopped. */ > + if (v & BIT(1)) > + priv->estats.tx_dma_stop++; > + > + /* When this asserted, the TX DMA can not acquire next TX descriptor > + * and TX DMA FSM is suspended. > + */ > + if (v & BIT(2)) > + priv->estats.tx_dma_ua++; > + > + if (v & BIT(3)) > + netif_dbg(priv, intr, ndev, "Unhandled interrupt TX TIMEOUT\n"); > + > + if (v & BIT(4)) { > + netif_dbg(priv, intr, ndev, "Unhandled interrupt TX underflow\n"); > + priv->estats.tx_underflow_int++; > + } > + > + /* When this bit asserted , the frame is transmitted to FIFO totally. */ > + if (v & BIT(5)) { > + netif_dbg(priv, intr, ndev, "Unhandled interrupt TX_EARLY_INT\n"); > + priv->estats.tx_early_int++; > + } > + > + /* When this bit is asserted, a frame reception is completed */ > + if (v & BIT(8)) > + sun8i_emac_receive_all(ndev); Can we get proper definitions for each and every one of these bits? [snip] > + > + ndev = alloc_etherdev(sizeof(*priv)); > + if (!ndev) > + return -ENOMEM; > + > + SET_NETDEV_DEV(ndev, &pdev->dev); > + priv = netdev_priv(ndev); > + platform_set_drvdata(pdev, ndev); > + > + priv->variant = (enum emac_variant)of_device_get_match_data(&pdev->dev); Should not we still test if the of_device_id's data member has been set, if you later add a new member to the compatible list with an absent .data member won't this crash? > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + priv->base = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(priv->base)) { > + ret = PTR_ERR(priv->base); > + dev_err(&pdev->dev, "Cannot request MMIO: %d\n", ret); > + goto probe_err; > + } > + > + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "syscon"); > + priv->syscon = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(priv->syscon)) { > + ret = PTR_ERR(priv->syscon); > + dev_err(&pdev->dev, > + "Cannot map system control registers: %d\n", ret); > + goto probe_err; > + } > + > + priv->irq = platform_get_irq(pdev, 0); > + if (priv->irq < 0) { > + ret = priv->irq; > + dev_err(&pdev->dev, "Cannot claim IRQ: %d\n", ret); > + goto probe_err; > + } > + > + ret = devm_request_irq(&pdev->dev, priv->irq, sun8i_emac_dma_interrupt, > + 0, dev_name(&pdev->dev), ndev); > + if (ret) { > + dev_err(&pdev->dev, "Cannot request IRQ: %d\n", ret); > + goto probe_err; > + } Network drivers typically do not request any resources (memory, interrupts) until ndo_open() is called, which helps with making sure that interrupts are properly disabled if the hardware is never used. You are also missing masking of interrupts of the MAC/DMA hardware here. [snip] > + ndev->netdev_ops = &sun8i_emac_netdev_ops; > + ndev->ethtool_ops = &sun8i_emac_ethtool_ops; > + > + priv->ndev = ndev; > + priv->dev = &pdev->dev; > + > + ndev->base_addr = (unsigned long)priv->base; > + ndev->irq = priv->irq; These are now deprecated and there is no requirement to set these two. > + > + ndev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA; > + ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | > + NETIF_F_RXCSUM; > + ndev->features |= ndev->hw_features; > + ndev->hw_features |= NETIF_F_RXFCS; > + ndev->hw_features |= NETIF_F_RXALL; > + ndev->hw_features |= NETIF_F_LOOPBACK; > + ndev->priv_flags |= IFF_UNICAST_FLT; > + > + ndev->watchdog_timeo = msecs_to_jiffies(5000); Missing netif_carrier_off() here. > + > + ret = register_netdev(ndev); > + if (ret) { > + dev_err(&pdev->dev, "ERROR: Register %s failed\n", ndev->name); > + goto probe_err; > + } > + > + return 0; > + > +probe_err: > + free_netdev(ndev); > + return ret; > +} > + > +static int sun8i_emac_remove(struct platform_device *pdev) > +{ > + struct net_device *ndev = platform_get_drvdata(pdev); > + > + unregister_netdev(ndev); > + platform_set_drvdata(pdev, NULL); Missing unregister_netdevice() here. > + free_netdev(ndev); > + > + return 0; > +} -- Florian