linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: NFS weirdness in 2.6.0-test1
       [not found]   ` <20030726015007.GA18944@linux-sh.org>
@ 2003-07-28 19:48     ` Trond Myklebust
  2003-07-29 17:53     ` Trond Myklebust
  1 sibling, 0 replies; 3+ messages in thread
From: Trond Myklebust @ 2003-07-28 19:48 UTC (permalink / raw)
  To: Paul Mundt; +Cc: Linux Kernel

>>>>> " " == Paul Mundt <lethal@linux-sh.org> writes:

     > Any other suggestions?

I think I've found the problem. It is due to us overwriting
req->rq_rcv_buf in call_encode() while the RPC request is still on the
list waiting for a reply from the server.

Actually, this *is* likely to be an issue on 2.4.x too, but as it is
also going to be very timing related, you are probably being lucky in
one case, and not the other.

I'll draw up a patch some time in the next 24 hours...

Cheers,
  Trond

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: NFS weirdness in 2.6.0-test1
       [not found]   ` <20030726015007.GA18944@linux-sh.org>
  2003-07-28 19:48     ` NFS weirdness in 2.6.0-test1 Trond Myklebust
@ 2003-07-29 17:53     ` Trond Myklebust
  2003-07-30  1:57       ` Paul Mundt
  1 sibling, 1 reply; 3+ messages in thread
From: Trond Myklebust @ 2003-07-29 17:53 UTC (permalink / raw)
  To: Paul Mundt; +Cc: Linux Kernel

>>>>> " " == Paul Mundt <lethal@linux-sh.org> writes:

     > I'm still left with corrupted data every time I get:

     > NFS: server cheating in read reply: count 1526 > recvd 1000
     > NFS: server cheating in read reply: count 4096 > recvd 1000
     > NFS: server cheating in read reply: count 1583 > recvd 1000

     > Any other suggestions?

Does the following patch fix it?

Note: I'm not entirely sure about whether or not we need those 2
smp_rmb(), but since the value of req->rq_received may be changed from
inside a softirq while we're working, I'm assuming that it is.

Cheers,
  Trond

diff -u --recursive --new-file linux-2.6.0-test2/include/linux/sunrpc/xprt.h linux-2.6.0-01-fix_rcv/include/linux/sunrpc/xprt.h
--- linux-2.6.0-test2/include/linux/sunrpc/xprt.h	2003-05-07 13:03:10.000000000 +0200
+++ linux-2.6.0-01-fix_rcv/include/linux/sunrpc/xprt.h	2003-07-29 15:36:57.000000000 +0200
@@ -98,6 +98,10 @@
 
 	struct list_head	rq_list;
 
+	struct xdr_buf		rq_private_buf;		/* The receive buffer
+							 * used in the softirq.
+							 */
+
 	/*
 	 * For authentication (e.g. auth_des)
 	 */
diff -u --recursive --new-file linux-2.6.0-test2/net/sunrpc/clnt.c linux-2.6.0-01-fix_rcv/net/sunrpc/clnt.c
--- linux-2.6.0-test2/net/sunrpc/clnt.c	2003-07-16 02:02:31.000000000 +0200
+++ linux-2.6.0-01-fix_rcv/net/sunrpc/clnt.c	2003-07-29 19:47:55.380468376 +0200
@@ -659,7 +659,7 @@
 	if (task->tk_status < 0)
 		return;
 	task->tk_status = xprt_prepare_transmit(task);
-	if (task->tk_status < 0)
+	if (task->tk_status != 0)
 		return;
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
 	if (!task->tk_rqstp->rq_bytes_sent)
@@ -685,7 +685,8 @@
 	struct rpc_rqst	*req = task->tk_rqstp;
 	int		status;
 
-	if (req->rq_received != 0)
+	smp_rmb();
+	if (req->rq_received > 0 && !req->rq_bytes_sent)
 		task->tk_status = req->rq_received;
 
 	dprintk("RPC: %4d call_status (status %d)\n", 
@@ -789,17 +790,21 @@
 		if (!clnt->cl_softrtry) {
 			task->tk_action = call_transmit;
 			clnt->cl_stats->rpcretrans++;
-		} else {
-			printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
-				clnt->cl_protname, task->tk_status);
-			rpc_exit(task, -EIO);
+			goto out_retry;
 		}
+		printk(KERN_WARNING "%s: too small RPC reply size (%d bytes)\n",
+			clnt->cl_protname, task->tk_status);
+		rpc_exit(task, -EIO);
 		return;
 	}
 
+	/* Check that the softirq receive buffer is valid */
+	WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
+				sizeof(req->rq_rcv_buf)) != 0);
+
 	/* Verify the RPC header */
 	if (!(p = call_verify(task)))
-		return;
+		goto out_retry;
 
 	/*
 	 * The following is an NFS-specific hack to cater for setuid
@@ -812,7 +817,7 @@
 			task->tk_flags ^= RPC_CALL_REALUID;
 			task->tk_action = call_bind;
 			task->tk_suid_retry--;
-			return;
+			goto out_retry;
 		}
 	}
 
@@ -822,6 +827,10 @@
 		task->tk_status = decode(req, p, task->tk_msg.rpc_resp);
 	dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
 					task->tk_status);
+	return;
+out_retry:
+	req->rq_received = 0;
+	task->tk_status = 0;
 }
 
 /*
diff -u --recursive --new-file linux-2.6.0-test2/net/sunrpc/xprt.c linux-2.6.0-01-fix_rcv/net/sunrpc/xprt.c
--- linux-2.6.0-test2/net/sunrpc/xprt.c	2003-07-16 02:02:37.000000000 +0200
+++ linux-2.6.0-01-fix_rcv/net/sunrpc/xprt.c	2003-07-29 19:48:15.107469416 +0200
@@ -140,15 +140,20 @@
 static int
 __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
+
 	if (!xprt->snd_task) {
-		if (xprt->nocong || __xprt_get_cong(xprt, task))
+		if (xprt->nocong || __xprt_get_cong(xprt, task)) {
 			xprt->snd_task = task;
+			if (req)
+				req->rq_bytes_sent = 0;
+		}
 	}
 	if (xprt->snd_task != task) {
 		dprintk("RPC: %4d TCP write queue full\n", task->tk_pid);
 		task->tk_timeout = 0;
 		task->tk_status = -EAGAIN;
-		if (task->tk_rqstp && task->tk_rqstp->rq_nresend)
+		if (req && req->rq_nresend)
 			rpc_sleep_on(&xprt->resend, task, NULL, NULL);
 		else
 			rpc_sleep_on(&xprt->sending, task, NULL, NULL);
@@ -183,8 +188,12 @@
 		if (!task)
 			return;
 	}
-	if (xprt->nocong || __xprt_get_cong(xprt, task))
+	if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+		struct rpc_rqst *req = task->tk_rqstp;
 		xprt->snd_task = task;
+		if (req)
+			req->rq_bytes_sent = 0;
+	}
 }
 
 /*
@@ -424,6 +433,9 @@
 	if (xprt_connected(xprt))
 		goto out_write;
 
+	if (task->tk_rqstp)
+		task->tk_rqstp->rq_bytes_sent = 0;
+
 	/*
 	 * We're here because the xprt was marked disconnected.
 	 * Start by resetting any existing state.
@@ -716,11 +728,11 @@
 
 	dprintk("RPC: %4d received reply\n", task->tk_pid);
 
-	if ((copied = rovr->rq_rlen) > repsize)
+	if ((copied = rovr->rq_private_buf.len) > repsize)
 		copied = repsize;
 
 	/* Suck it into the iovec, verify checksum if not done by hw. */
-	if (csum_partial_copy_to_xdr(&rovr->rq_rcv_buf, skb))
+	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
 		goto out_unlock;
 
 	/* Something worked... */
@@ -843,7 +855,7 @@
 		return;
 	}
 
-	rcvbuf = &req->rq_rcv_buf;
+	rcvbuf = &req->rq_private_buf;
 	len = desc->count;
 	if (len > xprt->tcp_reclen - xprt->tcp_offset) {
 		skb_reader_t my_desc;
@@ -861,7 +873,7 @@
 	xprt->tcp_copied += len;
 	xprt->tcp_offset += len;
 
-	if (xprt->tcp_copied == req->rq_rlen)
+	if (xprt->tcp_copied == req->rq_private_buf.len)
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
 	else if (xprt->tcp_offset == xprt->tcp_reclen) {
 		if (xprt->tcp_flags & XPRT_LAST_FRAG)
@@ -1106,10 +1118,11 @@
 	if (xprt->shutdown)
 		return -EIO;
 
-	if (task->tk_rpcwait)
-		rpc_remove_wait_queue(task);
-
 	spin_lock_bh(&xprt->sock_lock);
+	if (req->rq_received && !req->rq_bytes_sent) {
+		err = req->rq_received;
+		goto out_unlock;
+	}
 	if (!__xprt_lock_write(xprt, task)) {
 		err = -EAGAIN;
 		goto out_unlock;
@@ -1119,11 +1132,6 @@
 		err = -ENOTCONN;
 		goto out_unlock;
 	}
-
-	if (list_empty(&req->rq_list)) {
-		list_add_tail(&req->rq_list, &xprt->recv);
-		req->rq_received = 0;
-	}
 out_unlock:
 	spin_unlock_bh(&xprt->sock_lock);
 	return err;
@@ -1148,6 +1156,20 @@
 		*marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker)));
 	}
 
+	smp_rmb();
+	if (!req->rq_received) {
+		if (list_empty(&req->rq_list)) {
+			spin_lock_bh(&xprt->sock_lock);
+			/* Update the softirq receive buffer */
+			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+					sizeof(req->rq_private_buf));
+			/* Add request to the receive list */
+			list_add_tail(&req->rq_list, &xprt->recv);
+			spin_unlock_bh(&xprt->sock_lock);
+		}
+	} else if (!req->rq_bytes_sent)
+		return;
+
 	/* Continue transmitting the packet/record. We must be careful
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called xprt_sendmsg().
@@ -1162,8 +1184,12 @@
 		if (xprt->stream) {
 			req->rq_bytes_sent += status;
 
-			if (req->rq_bytes_sent >= req->rq_slen)
+			/* If we've sent the entire packet, immediately
+			 * reset the count of bytes sent. */
+			if (req->rq_bytes_sent >= req->rq_slen) {
+				req->rq_bytes_sent = 0;
 				goto out_receive;
+			}
 		} else {
 			if (status >= req->rq_slen)
 				goto out_receive;
@@ -1184,9 +1210,6 @@
 	 *	 hence there is no danger of the waking up task being put on
 	 *	 schedq, and being picked up by a parallel run of rpciod().
 	 */
-	if (req->rq_received)
-		goto out_release;
-
 	task->tk_status = status;
 
 	switch (status) {
@@ -1216,13 +1239,12 @@
 		if (xprt->stream)
 			xprt_disconnect(xprt);
 	}
- out_release:
 	xprt_release_write(xprt, task);
-	req->rq_bytes_sent = 0;
 	return;
  out_receive:
 	dprintk("RPC: %4d xmit complete\n", task->tk_pid);
 	/* Set the task's receive timeout value */
+	spin_lock_bh(&xprt->sock_lock);
 	if (!xprt->nocong) {
 		task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt,
 				task->tk_msg.rpc_proc->p_timer);
@@ -1231,7 +1253,6 @@
 			task->tk_timeout = req->rq_timeout.to_maxval;
 	} else
 		task->tk_timeout = req->rq_timeout.to_current;
-	spin_lock_bh(&xprt->sock_lock);
 	/* Don't race with disconnect */
 	if (!xprt_connected(xprt))
 		task->tk_status = -ENOTCONN;
@@ -1239,7 +1260,6 @@
 		rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
 	__xprt_release_write(xprt, task);
 	spin_unlock_bh(&xprt->sock_lock);
-	req->rq_bytes_sent = 0;
 }
 
 /*

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: NFS weirdness in 2.6.0-test1
  2003-07-29 17:53     ` Trond Myklebust
@ 2003-07-30  1:57       ` Paul Mundt
  0 siblings, 0 replies; 3+ messages in thread
From: Paul Mundt @ 2003-07-30  1:57 UTC (permalink / raw)
  To: Trond Myklebust; +Cc: Linux Kernel

[-- Attachment #1: Type: text/plain, Size: 447 bytes --]

On Tue, Jul 29, 2003 at 07:53:52PM +0200, Trond Myklebust wrote:
> > NFS: server cheating in read reply: count 1526 > recvd 1000
> > NFS: server cheating in read reply: count 4096 > recvd 1000
> > NFS: server cheating in read reply: count 1583 > recvd 1000
> 
> Does the following patch fix it?
> 
Yes, this seems to work fine. I've been running this for awhile now
under relative load, and haven't seen any data corruption.

Thanks!


[-- Attachment #2: Type: application/pgp-signature, Size: 232 bytes --]

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2003-07-30  1:58 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20030725151127.GA2947@linux-sh.org>
     [not found] ` <16161.25923.623651.618044@charged.uio.no>
     [not found]   ` <20030726015007.GA18944@linux-sh.org>
2003-07-28 19:48     ` NFS weirdness in 2.6.0-test1 Trond Myklebust
2003-07-29 17:53     ` Trond Myklebust
2003-07-30  1:57       ` Paul Mundt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).