All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Roger Pau Monné" <roger.pau@citrix.com>
To: G.R. <firemeteor@users.sourceforge.net>
Cc: <xen-devel@lists.xenproject.org>
Subject: Re: Possible bug? DOM-U network stopped working after fatal error reported in DOM0
Date: Wed, 29 Dec 2021 20:07:23 +0100	[thread overview]
Message-ID: <Ycyx65BDakqWmEe+@Air-de-Roger> (raw)
In-Reply-To: <Ycw4Jna5J2iQJyeM@Air-de-Roger>

On Wed, Dec 29, 2021 at 11:27:50AM +0100, Roger Pau Monné wrote:
> On Wed, Dec 29, 2021 at 05:13:00PM +0800, G.R. wrote:
> > >
> > > I think this is hitting a KASSERT, could you paste the text printed as
> > > part of the panic (not just he backtrace)?
> > >
> > > Sorry this is taking a bit of time to solve.
> > >
> > > Thanks!
> > >
> > Sorry that I didn't make it clear in the first place.
> > It is the same cross boundary assertion.
> 
> I see. After looking at the code it seems like sglist will coalesce
> contiguous physical ranges without taking page boundaries into
> account, which is not suitable for our purpose here. I guess I will
> either have to modify sglist, or switch to using bus_dma. The main
> problem with using bus_dma is that it will require bigger changes to
> netfront I think.

I have a crappy patch to use bus_dma. It's not yet ready for upstream
but you might want to give it a try to see if it solves the cross page
boundary issues.

Thanks, Roger.
---
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 8dba5a8dc6d5..693ef25c8783 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -71,6 +71,8 @@ __FBSDID("$FreeBSD$");
 #include <xen/interface/io/netif.h>
 #include <xen/xenbus/xenbusvar.h>
 
+#include <machine/bus.h>
+
 #include "xenbus_if.h"
 
 /* Features supported by all backends.  TSO and LRO can be negotiated */
@@ -199,6 +201,12 @@ struct netfront_txq {
 	struct taskqueue 	*tq;
 	struct task       	defrtask;
 
+	bus_dmamap_t		dma_map;
+	struct mbuf_refcount {
+		struct m_tag 	tag;
+		u_int 		count;
+	}			refcount_tag[NET_TX_RING_SIZE + 1];
+
 	bool			full;
 };
 
@@ -221,6 +229,8 @@ struct netfront_info {
 
 	struct ifmedia		sc_media;
 
+	bus_dma_tag_t		dma_tag;
+
 	bool			xn_reset;
 };
 
@@ -301,6 +311,39 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
 	return (ref);
 }
 
+#define MTAG_COOKIE 1218492000
+#define MTAG_REFCOUNT 0
+
+static void mbuf_grab(struct mbuf *m)
+{
+	struct mbuf_refcount *ref;
+
+	ref = (struct mbuf_refcount *)m_tag_locate(m, MTAG_COOKIE,
+	    MTAG_REFCOUNT, NULL);
+	KASSERT(ref != NULL, ("Cannot find refcount"));
+	ref->count++;
+}
+
+static void mbuf_release(struct mbuf *m)
+{
+	struct mbuf_refcount *ref;
+
+	ref = (struct mbuf_refcount *)m_tag_locate(m, MTAG_COOKIE,
+	    MTAG_REFCOUNT, NULL);
+	KASSERT(ref != NULL, ("Cannot find refcount"));
+	KASSERT(ref->count > 0, ("Invalid reference count"));
+
+	if (--ref->count == 0)
+		m_freem(m);
+}
+
+static void tag_free(struct m_tag *t)
+{
+	struct mbuf_refcount *ref = (struct mbuf_refcount *)t;
+
+	KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt"));
+}
+
 #define IPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #ifdef INVARIANTS
@@ -783,6 +826,7 @@ destroy_txq(struct netfront_txq *txq)
 	buf_ring_free(txq->br, M_DEVBUF);
 	taskqueue_drain_all(txq->tq);
 	taskqueue_free(txq->tq);
+	bus_dmamap_destroy(txq->info->dma_tag, txq->dma_map);
 }
 
 static void
@@ -826,6 +870,11 @@ setup_txqs(device_t dev, struct netfront_info *info,
 		for (i = 0; i <= NET_TX_RING_SIZE; i++) {
 			txq->mbufs[i] = (void *) ((u_long) i+1);
 			txq->grant_ref[i] = GRANT_REF_INVALID;
+			m_tag_setup(&txq->refcount_tag[i].tag,
+			    MTAG_COOKIE, MTAG_REFCOUNT,
+			    sizeof(txq->refcount_tag[i]) -
+			    sizeof(txq->refcount_tag[i].tag));
+			txq->refcount_tag[i].tag.m_tag_free = &tag_free;
 		}
 		txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
 
@@ -874,10 +923,18 @@ setup_txqs(device_t dev, struct netfront_info *info,
 			device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n");
 			goto fail_bind_port;
 		}
+
+		error = bus_dmamap_create(info->dma_tag, 0, &txq->dma_map);
+		if (error != 0) {
+			device_printf(dev, "failed to create dma map\n");
+			goto fail_dma_map;
+		}
 	}
 
 	return (0);
 
+fail_dma_map:
+	xen_intr_unbind(&txq->xen_intr_handle);
 fail_bind_port:
 	taskqueue_drain_all(txq->tq);
 fail_start_thread:
@@ -1041,7 +1098,7 @@ xn_release_tx_bufs(struct netfront_txq *txq)
 		if (txq->mbufs_cnt < 0) {
 			panic("%s: tx_chain_cnt must be >= 0", __func__);
 		}
-		m_free(m);
+		mbuf_release(m);
 	}
 }
 
@@ -1311,7 +1368,7 @@ xn_txeof(struct netfront_txq *txq)
 			txq->mbufs[id] = NULL;
 			add_id_to_freelist(txq->mbufs, id);
 			txq->mbufs_cnt--;
-			m_free(m);
+			mbuf_release(m);
 			/* Only mark the txq active if we've freed up at least one slot to try */
 			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		}
@@ -1530,27 +1587,18 @@ xn_count_frags(struct mbuf *m)
 static int
 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 {
-	struct mbuf *m;
 	struct netfront_info *np = txq->info;
 	struct ifnet *ifp = np->xn_ifp;
-	u_int nfrags;
-	int otherend_id;
+	int otherend_id, error, nfrags;
+	unsigned int i;
+	bus_dma_segment_t segs[MAX_TX_REQ_FRAGS];
 
-	/**
-	 * Defragment the mbuf if necessary.
-	 */
-	nfrags = xn_count_frags(m_head);
+	error = bus_dmamap_load_mbuf_sg(np->dma_tag, txq->dma_map, m_head,
+	    segs, &nfrags, 0);
+	if (error == EFBIG || nfrags > np->maxfrags) {
+		struct mbuf *m;
 
-	/*
-	 * Check to see whether this request is longer than netback
-	 * can handle, and try to defrag it.
-	 */
-	/**
-	 * It is a bit lame, but the netback driver in Linux can't
-	 * deal with nfrags > MAX_TX_REQ_FRAGS, which is a quirk of
-	 * the Linux network stack.
-	 */
-	if (nfrags > np->maxfrags) {
+		bus_dmamap_unload(np->dma_tag, txq->dma_map);
 		m = m_defrag(m_head, M_NOWAIT);
 		if (!m) {
 			/*
@@ -1561,15 +1609,18 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			return (EMSGSIZE);
 		}
 		m_head = m;
+		error = bus_dmamap_load_mbuf_sg(np->dma_tag, txq->dma_map,
+		    m_head, segs, &nfrags, 0);
+		if (error != 0 || nfrags > np->maxfrags) {
+			bus_dmamap_unload(np->dma_tag, txq->dma_map);
+			m_freem(m_head);
+			return (error ?: EFBIG);
+		}
+	} else if (error != 0) {
+		m_freem(m_head);
+		return (error);
 	}
 
-	/* Determine how many fragments now exist */
-	nfrags = xn_count_frags(m_head);
-
-	/*
-	 * Check to see whether the defragmented packet has too many
-	 * segments for the Linux netback driver.
-	 */
 	/**
 	 * The FreeBSD TCP stack, with TSO enabled, can produce a chain
 	 * of mbufs longer than Linux can handle.  Make sure we don't
@@ -1604,9 +1655,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 	 * the fragment pointers. Stop when we run out
 	 * of fragments or hit the end of the mbuf chain.
 	 */
-	m = m_head;
 	otherend_id = xenbus_get_otherend_id(np->xbdev);
-	for (m = m_head; m; m = m->m_next) {
+	for (i = 0; i < nfrags; i++) {
 		netif_tx_request_t *tx;
 		uintptr_t id;
 		grant_ref_t ref;
@@ -1621,17 +1671,22 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 		if (txq->mbufs_cnt > NET_TX_RING_SIZE)
 			panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
 			    __func__);
-		txq->mbufs[id] = m;
+		if (i == 0)
+			m_tag_prepend(m_head, &txq->refcount_tag[id].tag);
+		mbuf_grab(m_head);
+		txq->mbufs[id] = m_head;
 		tx->id = id;
 		ref = gnttab_claim_grant_reference(&txq->gref_head);
 		KASSERT((short)ref >= 0, ("Negative ref"));
-		mfn = virt_to_mfn(mtod(m, vm_offset_t));
+		mfn = atop(segs[i].ds_addr);
 		gnttab_grant_foreign_access_ref(ref, otherend_id,
 		    mfn, GNTMAP_readonly);
 		tx->gref = txq->grant_ref[id] = ref;
-		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
+		tx->offset = segs[i].ds_addr & PAGE_MASK;
+		KASSERT(tx->offset + segs[i].ds_len <= PAGE_SIZE,
+		    ("mbuf segment crosses a page boundary"));
 		tx->flags = 0;
-		if (m == m_head) {
+		if (i == 0) {
 			/*
 			 * The first fragment has the entire packet
 			 * size, subsequent fragments have just the
@@ -1640,7 +1695,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			 * subtracting the sizes of the other
 			 * fragments.
 			 */
-			tx->size = m->m_pkthdr.len;
+			tx->size = m_head->m_pkthdr.len;
 
 			/*
 			 * The first fragment contains the checksum flags
@@ -1654,12 +1709,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			 * so we have to test for CSUM_TSO
 			 * explicitly.
 			 */
-			if (m->m_pkthdr.csum_flags
+			if (m_head->m_pkthdr.csum_flags
 			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
 				tx->flags |= (NETTXF_csum_blank
 				    | NETTXF_data_validated);
 			}
-			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+			if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 				struct netif_extra_info *gso =
 					(struct netif_extra_info *)
 					RING_GET_REQUEST(&txq->ring,
@@ -1667,7 +1722,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 
 				tx->flags |= NETTXF_extra_info;
 
-				gso->u.gso.size = m->m_pkthdr.tso_segsz;
+				gso->u.gso.size = m_head->m_pkthdr.tso_segsz;
 				gso->u.gso.type =
 					XEN_NETIF_GSO_TYPE_TCPV4;
 				gso->u.gso.pad = 0;
@@ -1677,13 +1732,14 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 				gso->flags = 0;
 			}
 		} else {
-			tx->size = m->m_len;
+			tx->size = segs[i].ds_len;
 		}
-		if (m->m_next)
+		if (i != nfrags - 1)
 			tx->flags |= NETTXF_more_data;
 
 		txq->ring.req_prod_pvt++;
 	}
+	bus_dmamap_unload(np->dma_tag, txq->dma_map);
 	BPF_MTAP(ifp, m_head);
 
 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
@@ -2244,7 +2300,20 @@ create_netdev(device_t dev)
     	ether_ifattach(ifp, np->mac);
 	netfront_carrier_off(np);
 
-	return (0);
+	err = bus_dma_tag_create(
+	    bus_get_dma_tag(dev),		/* parent */
+	    1, PAGE_SIZE,			/* algnmnt, boundary */
+	    BUS_SPACE_MAXADDR,			/* lowaddr */
+	    BUS_SPACE_MAXADDR,			/* highaddr */
+	    NULL, NULL,				/* filter, filterarg */
+	    PAGE_SIZE * MAX_TX_REQ_FRAGS,	/* max request size */
+	    MAX_TX_REQ_FRAGS,			/* max segments */
+	    PAGE_SIZE,				/* maxsegsize */
+	    BUS_DMA_ALLOCNOW,			/* flags */
+	    NULL, NULL,				/* lockfunc, lockarg */
+	    &np->dma_tag);
+
+	return (err);
 
 error:
 	KASSERT(err != 0, ("Error path with no error code specified"));
@@ -2277,6 +2346,7 @@ netif_free(struct netfront_info *np)
 	if_free(np->xn_ifp);
 	np->xn_ifp = NULL;
 	ifmedia_removeall(&np->sc_media);
+	bus_dma_tag_destroy(np->dma_tag);
 }
 
 static void



  reply	other threads:[~2021-12-29 19:08 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-18 18:35 Possible bug? DOM-U network stopped working after fatal error reported in DOM0 G.R.
2021-12-19  6:10 ` Juergen Gross
2021-12-19 17:31 ` G.R.
2021-12-20 17:13   ` G.R.
2021-12-21 13:50     ` Roger Pau Monné
2021-12-21 18:19       ` G.R.
2021-12-21 19:12         ` Roger Pau Monné
2021-12-23 15:49           ` G.R.
2021-12-24 11:24             ` Roger Pau Monné
2021-12-25 16:39               ` G.R.
2021-12-25 18:06                 ` G.R.
2021-12-27 19:04                   ` Roger Pau Monné
     [not found]                     ` <CAKhsbWY5=vENgwgq3NV44KSZQgpOPY=33CMSZo=jweAcRDjBwg@mail.gmail.com>
2021-12-29  8:32                       ` Roger Pau Monné
2021-12-29  9:13                         ` G.R.
2021-12-29 10:27                           ` Roger Pau Monné
2021-12-29 19:07                             ` Roger Pau Monné [this message]
2021-12-30 15:12                               ` G.R.
2021-12-30 18:51                                 ` Roger Pau Monné
2021-12-31 14:47                                   ` G.R.
2022-01-04 10:25                                     ` Roger Pau Monné
2022-01-04 16:05                                       ` G.R.
2022-01-05 14:33                                         ` Roger Pau Monné
2022-01-07 17:14                                           ` G.R.
2022-01-10 14:53                                             ` Roger Pau Monné
2022-01-11 14:24                                               ` G.R.
2022-10-30 16:36                                               ` G.R.
2022-11-03  6:58                                                 ` Paul Leiber
2022-11-03 12:22                                                   ` Roger Pau Monné
2022-12-14  6:16                                                     ` G.R.
2024-01-09 11:13                                                       ` Niklas Hallqvist
2024-01-09 13:53                                                         ` Roger Pau Monné
2024-01-19 15:51                                                           ` G.R.
2021-12-20 13:51 ` Roger Pau Monné

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Ycyx65BDakqWmEe+@Air-de-Roger \
    --to=roger.pau@citrix.com \
    --cc=firemeteor@users.sourceforge.net \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.