All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Roger Pau Monné" <roger.pau@citrix.com>
To: G.R. <firemeteor@users.sourceforge.net>
Cc: xen-devel <xen-devel@lists.xen.org>
Subject: Re: Possible bug? DOM-U network stopped working after fatal error reported in DOM0
Date: Mon, 27 Dec 2021 20:04:51 +0100	[thread overview]
Message-ID: <YcoOUw/u3SqTxWKm@Air-de-Roger> (raw)
In-Reply-To: <CAKhsbWarxwi_n3NAr81op_apyN69itUrv7f1k1ZJ6=gXuFXtGA@mail.gmail.com>

On Sun, Dec 26, 2021 at 02:06:55AM +0800, G.R. wrote:
> > > Thanks. I've raised this on freensd-net for advice [0]. IMO netfront
> > > shouldn't receive an mbuf that crosses a page boundary, but if that's
> > > indeed a legit mbuf I will figure out the best way to handle it.
> > >
> > > I have a clumsy patch (below) that might solve this, if you want to
> > > give it a try.
> >
> > Applied the patch and it worked like a charm!
> > Thank you so much for your quick help!
> > Wish you a wonderful holiday!
> 
> I may have said too quickly...
> With the patch I can attach the iscsi disk and neither the dom0 nor
> the NAS domU complains this time.
> But when I attempt to mount the attached disk it reports I/O errors randomly.
> By randomly I mean different disks behave differently...
> I don't see any error logs from kernels this time.
> (most of the iscsi disks are NTFS FS and mounted through the user mode
> fuse library)
> But since I have a local backup copy of the image, I can confirm that
> mounting that backup image does not result in any I/O error.
> Looks like something is still broken here...

Indeed. That patch was likely too simple, and didn't properly handle
the split of mbuf data buffers.

I have another version based on using sglist, which I think it's also
a worthwhile change for netfront. Can you please give it a try? I've
done a very simple test and seems fine, but you certainly have more
interesting cases.

You will have to apply it on top of a clean tree, without any of the
other patches applied.

Thanks, Roger.
---
diff --git a/sys/dev/xen/netfront/netfront.c b/sys/dev/xen/netfront/netfront.c
index 8dba5a8dc6d5..37ea7b1fa059 100644
--- a/sys/dev/xen/netfront/netfront.c
+++ b/sys/dev/xen/netfront/netfront.c
@@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
+#include <sys/types.h>
+
 #include <sys/param.h>
 #include <sys/sockio.h>
 #include <sys/limits.h>
@@ -40,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/kernel.h>
+#include <sys/sglist.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
@@ -199,6 +202,12 @@ struct netfront_txq {
 	struct taskqueue 	*tq;
 	struct task       	defrtask;
 
+	struct sglist 		*segments;
+	struct mbuf_refcount {
+		struct m_tag 	tag;
+		u_int 		count;
+	}			refcount_tag[NET_TX_RING_SIZE + 1];
+
 	bool			full;
 };
 
@@ -301,6 +310,38 @@ xn_get_rx_ref(struct netfront_rxq *rxq, RING_IDX ri)
 	return (ref);
 }
 
+#define MTAG_REFCOUNT 0
+
+static void mbuf_grab(uint32_t cookie, struct mbuf *m)
+{
+	struct mbuf_refcount *ref;
+
+	ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT,
+	    NULL);
+	KASSERT(ref != NULL, ("Cannot find refcount"));
+	ref->count++;
+}
+
+static void mbuf_release(uint32_t cookie, struct mbuf *m)
+{
+	struct mbuf_refcount *ref;
+
+	ref = (struct mbuf_refcount *)m_tag_locate(m, cookie, MTAG_REFCOUNT,
+	    NULL);
+	KASSERT(ref != NULL, ("Cannot find refcount"));
+	KASSERT(ref->count > 0, ("Invalid reference count"));
+
+	if(--ref->count == 0)
+		m_freem(m);
+}
+
+static void tag_free(struct m_tag *t)
+{
+	struct mbuf_refcount *ref = (struct mbuf_refcount *)t;
+
+	KASSERT(ref->count == 0, ("Free mbuf tag with pending refcnt"));
+}
+
 #define IPRINTK(fmt, args...) \
     printf("[XEN] " fmt, ##args)
 #ifdef INVARIANTS
@@ -778,7 +819,7 @@ disconnect_txq(struct netfront_txq *txq)
 static void
 destroy_txq(struct netfront_txq *txq)
 {
-
+	sglist_free(txq->segments);
 	free(txq->ring.sring, M_DEVBUF);
 	buf_ring_free(txq->br, M_DEVBUF);
 	taskqueue_drain_all(txq->tq);
@@ -826,6 +867,11 @@ setup_txqs(device_t dev, struct netfront_info *info,
 		for (i = 0; i <= NET_TX_RING_SIZE; i++) {
 			txq->mbufs[i] = (void *) ((u_long) i+1);
 			txq->grant_ref[i] = GRANT_REF_INVALID;
+			m_tag_setup(&txq->refcount_tag[i].tag,
+			    (unsigned long)txq, MTAG_REFCOUNT,
+			    sizeof(txq->refcount_tag[i]) -
+			    sizeof(txq->refcount_tag[i].tag));
+			txq->refcount_tag[i].tag.m_tag_free = &tag_free;
 		}
 		txq->mbufs[NET_TX_RING_SIZE] = (void *)0;
 
@@ -874,10 +920,18 @@ setup_txqs(device_t dev, struct netfront_info *info,
 			device_printf(dev, "xen_intr_alloc_and_bind_local_port failed\n");
 			goto fail_bind_port;
 		}
+
+		txq->segments = sglist_alloc(MAX_TX_REQ_FRAGS, M_WAITOK);
+		if (txq->segments == NULL) {
+			device_printf(dev, "failed to allocate sglist\n");
+			goto fail_sglist;
+		}
 	}
 
 	return (0);
 
+fail_sglist:
+	xen_intr_unbind(&txq->xen_intr_handle);
 fail_bind_port:
 	taskqueue_drain_all(txq->tq);
 fail_start_thread:
@@ -1041,7 +1095,7 @@ xn_release_tx_bufs(struct netfront_txq *txq)
 		if (txq->mbufs_cnt < 0) {
 			panic("%s: tx_chain_cnt must be >= 0", __func__);
 		}
-		m_free(m);
+		mbuf_release((unsigned long)txq, m);
 	}
 }
 
@@ -1311,7 +1365,7 @@ xn_txeof(struct netfront_txq *txq)
 			txq->mbufs[id] = NULL;
 			add_id_to_freelist(txq->mbufs, id);
 			txq->mbufs_cnt--;
-			m_free(m);
+			mbuf_release((unsigned long)txq, m);
 			/* Only mark the txq active if we've freed up at least one slot to try */
 			ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 		}
@@ -1507,22 +1561,6 @@ xn_get_responses(struct netfront_rxq *rxq,
 	return (err);
 }
 
-/**
- * \brief Count the number of fragments in an mbuf chain.
- *
- * Surprisingly, there isn't an M* macro for this.
- */
-static inline int
-xn_count_frags(struct mbuf *m)
-{
-	int nfrags;
-
-	for (nfrags = 0; m != NULL; m = m->m_next)
-		nfrags++;
-
-	return (nfrags);
-}
-
 /**
  * Given an mbuf chain, make sure we have enough room and then push
  * it onto the transmit ring.
@@ -1530,16 +1568,22 @@ xn_count_frags(struct mbuf *m)
 static int
 xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 {
-	struct mbuf *m;
 	struct netfront_info *np = txq->info;
 	struct ifnet *ifp = np->xn_ifp;
-	u_int nfrags;
-	int otherend_id;
+	u_int nfrags, i;
+	int otherend_id, rc;
+
+	sglist_reset(txq->segments);
+	rc = sglist_append_mbuf(txq->segments, m_head);
+	if (rc != 0) {
+		m_freem(m_head);
+		return (rc);
+	}
 
 	/**
 	 * Defragment the mbuf if necessary.
 	 */
-	nfrags = xn_count_frags(m_head);
+	nfrags = txq->segments->sg_nseg;
 
 	/*
 	 * Check to see whether this request is longer than netback
@@ -1551,6 +1595,8 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 	 * the Linux network stack.
 	 */
 	if (nfrags > np->maxfrags) {
+		struct mbuf *m;
+
 		m = m_defrag(m_head, M_NOWAIT);
 		if (!m) {
 			/*
@@ -1561,11 +1607,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			return (EMSGSIZE);
 		}
 		m_head = m;
+		sglist_reset(txq->segments);
+		rc = sglist_append_mbuf(txq->segments, m_head);
+		if (rc != 0) {
+			m_freem(m_head);
+			return (rc);
+		}
+		nfrags = txq->segments->sg_nseg;
 	}
 
-	/* Determine how many fragments now exist */
-	nfrags = xn_count_frags(m_head);
-
 	/*
 	 * Check to see whether the defragmented packet has too many
 	 * segments for the Linux netback driver.
@@ -1604,14 +1654,15 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 	 * the fragment pointers. Stop when we run out
 	 * of fragments or hit the end of the mbuf chain.
 	 */
-	m = m_head;
 	otherend_id = xenbus_get_otherend_id(np->xbdev);
-	for (m = m_head; m; m = m->m_next) {
+	for (i = 0; i < nfrags; i++) {
 		netif_tx_request_t *tx;
 		uintptr_t id;
 		grant_ref_t ref;
 		u_long mfn; /* XXX Wrong type? */
+		struct sglist_seg *seg;
 
+		seg = &txq->segments->sg_segs[i];
 		tx = RING_GET_REQUEST(&txq->ring, txq->ring.req_prod_pvt);
 		id = get_id_from_freelist(txq->mbufs);
 		if (id == 0)
@@ -1621,17 +1672,22 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 		if (txq->mbufs_cnt > NET_TX_RING_SIZE)
 			panic("%s: tx_chain_cnt must be <= NET_TX_RING_SIZE\n",
 			    __func__);
-		txq->mbufs[id] = m;
+		if (i == 0)
+			m_tag_prepend(m_head, &txq->refcount_tag[id].tag);
+		mbuf_grab((unsigned long)txq, m_head);
+		txq->mbufs[id] = m_head;
 		tx->id = id;
 		ref = gnttab_claim_grant_reference(&txq->gref_head);
 		KASSERT((short)ref >= 0, ("Negative ref"));
-		mfn = virt_to_mfn(mtod(m, vm_offset_t));
+		mfn = atop(seg->ss_paddr);
 		gnttab_grant_foreign_access_ref(ref, otherend_id,
 		    mfn, GNTMAP_readonly);
 		tx->gref = txq->grant_ref[id] = ref;
-		tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
+		tx->offset = seg->ss_paddr & PAGE_MASK;
+		KASSERT(tx->offset + seg->ss_len <= PAGE_SIZE,
+		    ("mbuf segment crosses a page boundary"));
 		tx->flags = 0;
-		if (m == m_head) {
+		if (i == 0) {
 			/*
 			 * The first fragment has the entire packet
 			 * size, subsequent fragments have just the
@@ -1640,7 +1696,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			 * subtracting the sizes of the other
 			 * fragments.
 			 */
-			tx->size = m->m_pkthdr.len;
+			tx->size = m_head->m_pkthdr.len;
 
 			/*
 			 * The first fragment contains the checksum flags
@@ -1654,12 +1710,12 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 			 * so we have to test for CSUM_TSO
 			 * explicitly.
 			 */
-			if (m->m_pkthdr.csum_flags
+			if (m_head->m_pkthdr.csum_flags
 			    & (CSUM_DELAY_DATA | CSUM_TSO)) {
 				tx->flags |= (NETTXF_csum_blank
 				    | NETTXF_data_validated);
 			}
-			if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+			if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
 				struct netif_extra_info *gso =
 					(struct netif_extra_info *)
 					RING_GET_REQUEST(&txq->ring,
@@ -1667,7 +1723,7 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 
 				tx->flags |= NETTXF_extra_info;
 
-				gso->u.gso.size = m->m_pkthdr.tso_segsz;
+				gso->u.gso.size = m_head->m_pkthdr.tso_segsz;
 				gso->u.gso.type =
 					XEN_NETIF_GSO_TYPE_TCPV4;
 				gso->u.gso.pad = 0;
@@ -1677,9 +1733,9 @@ xn_assemble_tx_request(struct netfront_txq *txq, struct mbuf *m_head)
 				gso->flags = 0;
 			}
 		} else {
-			tx->size = m->m_len;
+			tx->size = seg->ss_len;
 		}
-		if (m->m_next)
+		if (i != nfrags - 1)
 			tx->flags |= NETTXF_more_data;
 
 		txq->ring.req_prod_pvt++;



  reply	other threads:[~2021-12-27 19:05 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-18 18:35 Possible bug? DOM-U network stopped working after fatal error reported in DOM0 G.R.
2021-12-19  6:10 ` Juergen Gross
2021-12-19 17:31 ` G.R.
2021-12-20 17:13   ` G.R.
2021-12-21 13:50     ` Roger Pau Monné
2021-12-21 18:19       ` G.R.
2021-12-21 19:12         ` Roger Pau Monné
2021-12-23 15:49           ` G.R.
2021-12-24 11:24             ` Roger Pau Monné
2021-12-25 16:39               ` G.R.
2021-12-25 18:06                 ` G.R.
2021-12-27 19:04                   ` Roger Pau Monné [this message]
     [not found]                     ` <CAKhsbWY5=vENgwgq3NV44KSZQgpOPY=33CMSZo=jweAcRDjBwg@mail.gmail.com>
2021-12-29  8:32                       ` Roger Pau Monné
2021-12-29  9:13                         ` G.R.
2021-12-29 10:27                           ` Roger Pau Monné
2021-12-29 19:07                             ` Roger Pau Monné
2021-12-30 15:12                               ` G.R.
2021-12-30 18:51                                 ` Roger Pau Monné
2021-12-31 14:47                                   ` G.R.
2022-01-04 10:25                                     ` Roger Pau Monné
2022-01-04 16:05                                       ` G.R.
2022-01-05 14:33                                         ` Roger Pau Monné
2022-01-07 17:14                                           ` G.R.
2022-01-10 14:53                                             ` Roger Pau Monné
2022-01-11 14:24                                               ` G.R.
2022-10-30 16:36                                               ` G.R.
2022-11-03  6:58                                                 ` Paul Leiber
2022-11-03 12:22                                                   ` Roger Pau Monné
2022-12-14  6:16                                                     ` G.R.
2024-01-09 11:13                                                       ` Niklas Hallqvist
2024-01-09 13:53                                                         ` Roger Pau Monné
2024-01-19 15:51                                                           ` G.R.
2021-12-20 13:51 ` Roger Pau Monné

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YcoOUw/u3SqTxWKm@Air-de-Roger \
    --to=roger.pau@citrix.com \
    --cc=firemeteor@users.sourceforge.net \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.