All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] rpc: resolve softlockup in presence of iptables drop rule
@ 2014-09-24 18:07 Jason Baron
  2014-09-24 18:08 ` [PATCH v2 1/2] rpc: return sent and err from xs_sendpages() Jason Baron
  2014-09-24 18:08 ` [PATCH v2 2/2] rpc: Add -EPERM processing for xs_udp_send_request() Jason Baron
  0 siblings, 2 replies; 3+ messages in thread
From: Jason Baron @ 2014-09-24 18:07 UTC (permalink / raw)
  To: trond.myklebust; +Cc: bfields, Anna.Schumaker, linux-nfs

v2:
-return -EIO after major timeout on a soft mount, continue to retry if its a
hard mount
-fixup changelogs

Hi,

We've found a softlockup on the nfsv3 client (udp), while testing an iptables
rule, which simply drops traffic destined for the nfs server. The softlockup
occurs because xs_sendpages() returns a positive result (resulting in an
-EAGAIN), masking out the -EPERM that the lower level ip code returns.

The first patch restructures xs_sendpages() such that it can return a 'sent'
value in addition to an error value. It no longer over-loads the return value
as both a 'sent' and 'error' value. In this way the 'higher' level code can
make more informed decisions.

The second patch makes use of the new return value to propagate an -EIO
up instead of the -EAGAIN and avoids the cpu spinning.

Thanks,

-Jason


Jason Baron (2):
  rpc: return sent and err from xs_sendpages()
  rpc: Add -EPERM processing for xs_udp_send_request()

 net/sunrpc/clnt.c     |  2 ++
 net/sunrpc/xprtsock.c | 87 ++++++++++++++++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 39 deletions(-)

-- 
1.8.2.rc2


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/2] rpc: return sent and err from xs_sendpages()
  2014-09-24 18:07 [PATCH v2 0/2] rpc: resolve softlockup in presence of iptables drop rule Jason Baron
@ 2014-09-24 18:08 ` Jason Baron
  2014-09-24 18:08 ` [PATCH v2 2/2] rpc: Add -EPERM processing for xs_udp_send_request() Jason Baron
  1 sibling, 0 replies; 3+ messages in thread
From: Jason Baron @ 2014-09-24 18:08 UTC (permalink / raw)
  To: trond.myklebust; +Cc: bfields, Anna.Schumaker, linux-nfs

If an error is returned after the first bits of a packet have already been
successfully queued, xs_sendpages() will return a positive 'int' value
indicating success. Callers seem to treat this as -EAGAIN.

However, there are cases where its not a question of waiting for the write
queue to drain. For example, when there is an iptables rule dropping packets
to the destination, the lower level code can return -EPERM only after parts
of the packet have been successfully queued. In this case, we can end up
continuously retrying resulting in a kernel softlockup.

This patch is intended to make no changes in behavior but is in preparation for
subsequent patches that can make decisions based on both on the number of bytes
sent by xs_sendpages() and any errors that may have be returned.

Signed-off-by: Jason Baron <jbaron@akamai.com>
---
 net/sunrpc/xprtsock.c | 81 ++++++++++++++++++++++++++-------------------------
 1 file changed, 42 insertions(+), 39 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 43cd89e..348c786 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -399,13 +399,13 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
 	return kernel_sendmsg(sock, &msg, NULL, 0, 0);
 }
 
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
 {
 	ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
 			int offset, size_t size, int flags);
 	struct page **ppage;
 	unsigned int remainder;
-	int err, sent = 0;
+	int err;
 
 	remainder = xdr->page_len - base;
 	base += xdr->page_base;
@@ -424,15 +424,15 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
 		err = do_sendpage(sock, *ppage, base, len, flags);
 		if (remainder == 0 || err != len)
 			break;
-		sent += err;
+		*sent_p += err;
 		ppage++;
 		base = 0;
 	}
-	if (sent == 0)
-		return err;
-	if (err > 0)
-		sent += err;
-	return sent;
+	if (err > 0) {
+		*sent_p += err;
+		err = 0;
+	}
+	return err;
 }
 
 /**
@@ -443,12 +443,14 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
  * @xdr: buffer containing this request
  * @base: starting position in the buffer
  * @zerocopy: true if it is safe to use sendpage()
+ * @sent_p: return the total number of bytes successfully queued for sending
  *
  */
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
 {
 	unsigned int remainder = xdr->len - base;
-	int err, sent = 0;
+	int err = 0;
+	int sent = 0;
 
 	if (unlikely(!sock))
 		return -ENOTSOCK;
@@ -465,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
 		err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
 		if (remainder == 0 || err != len)
 			goto out;
-		sent += err;
+		*sent_p += err;
 		base = 0;
 	} else
 		base -= xdr->head[0].iov_len;
@@ -473,23 +475,23 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
 	if (base < xdr->page_len) {
 		unsigned int len = xdr->page_len - base;
 		remainder -= len;
-		err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
-		if (remainder == 0 || err != len)
+		err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
+		*sent_p += sent;
+		if (remainder == 0 || sent != len)
 			goto out;
-		sent += err;
 		base = 0;
 	} else
 		base -= xdr->page_len;
 
 	if (base >= xdr->tail[0].iov_len)
-		return sent;
+		return 0;
 	err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
 out:
-	if (sent == 0)
-		return err;
-	if (err > 0)
-		sent += err;
-	return sent;
+	if (err > 0) {
+		*sent_p += err;
+		err = 0;
+	}
+	return err;
 }
 
 static void xs_nospace_callback(struct rpc_task *task)
@@ -573,19 +575,20 @@ static int xs_local_send_request(struct rpc_task *task)
 				container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
 	int status;
+	int sent = 0;
 
 	xs_encode_stream_record_marker(&req->rq_snd_buf);
 
 	xs_pktdump("packet data:",
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
-	status = xs_sendpages(transport->sock, NULL, 0,
-						xdr, req->rq_bytes_sent, true);
+	status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+			      true, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
 			__func__, xdr->len - req->rq_bytes_sent, status);
-	if (likely(status >= 0)) {
-		req->rq_bytes_sent += status;
-		req->rq_xmit_bytes_sent += status;
+	if (likely(sent > 0) || status == 0) {
+		req->rq_bytes_sent += sent;
+		req->rq_xmit_bytes_sent += sent;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 			req->rq_bytes_sent = 0;
 			return 0;
@@ -626,6 +629,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
+	int sent = 0;
 	int status;
 
 	xs_pktdump("packet data:",
@@ -634,17 +638,15 @@ static int xs_udp_send_request(struct rpc_task *task)
 
 	if (!xprt_bound(xprt))
 		return -ENOTCONN;
-	status = xs_sendpages(transport->sock,
-			      xs_addr(xprt),
-			      xprt->addrlen, xdr,
-			      req->rq_bytes_sent, true);
+	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
+			      xdr, req->rq_bytes_sent, true, &sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
 
-	if (status >= 0) {
-		req->rq_xmit_bytes_sent += status;
-		if (status >= req->rq_slen)
+	if (sent > 0 || status == 0) {
+		req->rq_xmit_bytes_sent += sent;
+		if (sent >= req->rq_slen)
 			return 0;
 		/* Still some bytes left; set up for a retry later. */
 		status = -EAGAIN;
@@ -713,6 +715,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	struct xdr_buf *xdr = &req->rq_snd_buf;
 	bool zerocopy = true;
 	int status;
+	int sent;
 
 	xs_encode_stream_record_marker(&req->rq_snd_buf);
 
@@ -730,26 +733,26 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	 * to cope with writespace callbacks arriving _after_ we have
 	 * called sendmsg(). */
 	while (1) {
-		status = xs_sendpages(transport->sock,
-					NULL, 0, xdr, req->rq_bytes_sent,
-					zerocopy);
+		sent = 0;
+		status = xs_sendpages(transport->sock, NULL, 0, xdr,
+				      req->rq_bytes_sent, zerocopy, &sent);
 
 		dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
 				xdr->len - req->rq_bytes_sent, status);
 
-		if (unlikely(status < 0))
+		if (unlikely(sent == 0 && status < 0))
 			break;
 
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
-		req->rq_bytes_sent += status;
-		req->rq_xmit_bytes_sent += status;
+		req->rq_bytes_sent += sent;
+		req->rq_xmit_bytes_sent += sent;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 			req->rq_bytes_sent = 0;
 			return 0;
 		}
 
-		if (status != 0)
+		if (sent != 0)
 			continue;
 		status = -EAGAIN;
 		break;
-- 
1.8.2.rc2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] rpc: Add -EPERM processing for xs_udp_send_request()
  2014-09-24 18:07 [PATCH v2 0/2] rpc: resolve softlockup in presence of iptables drop rule Jason Baron
  2014-09-24 18:08 ` [PATCH v2 1/2] rpc: return sent and err from xs_sendpages() Jason Baron
@ 2014-09-24 18:08 ` Jason Baron
  1 sibling, 0 replies; 3+ messages in thread
From: Jason Baron @ 2014-09-24 18:08 UTC (permalink / raw)
  To: trond.myklebust; +Cc: bfields, Anna.Schumaker, linux-nfs

If an iptables drop rule is added for an nfs server, the client can end up in
a softlockup. Because of the way that xs_sendpages() is structured, the -EPERM
is ignored since the prior bits of the packet may have been successfully queued
and thus xs_sendpages() returns a non-zero value. Then, xs_udp_send_request()
thinks that because some bits were queued it should return -EAGAIN. We then try
the request again and again, resulting in cpu spinning. Reproducer:

1) open a file on the nfs server '/nfs/foo' (mounted using udp)
2) iptables -A OUTPUT -d <nfs server ip> -j DROP
3) write to /nfs/foo
4) close /nfs/foo
5) iptables -D OUTPUT -d <nfs server ip> -j DROP

The softlockup occurs in step 4 above.

The previous patch, allows xs_sendpages() to return both a sent count and
any error values that may have occurred. Thus, if we get an -EPERM, return
that to the higher level code.

With this patch in place we can successfully abort the above sequence and
avoid the softlockup.

I also tried the above test case on an nfs mount on tcp and although the system
does not softlockup, I still ended up with the 'hung_task' firing after 120
seconds, due to the i/o being stuck. The tcp case appears a bit harder to fix,
since -EPERM appears to get ignored much lower down in the stack and does not
propogate up to xs_sendpages(). This case is not quite as insidious as the
softlockup and it is not addressed here.

Reported-by: Yigong Lou <ylou@akamai.com>
Signed-off-by: Jason Baron <jbaron@akamai.com>
---
 net/sunrpc/clnt.c     | 2 ++
 net/sunrpc/xprtsock.c | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 488ddee..c4c6069 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1913,6 +1913,7 @@ call_transmit_status(struct rpc_task *task)
 	case -EHOSTDOWN:
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
+	case -EPERM:
 		if (RPC_IS_SOFTCONN(task)) {
 			xprt_end_transmit(task);
 			rpc_exit(task, task->tk_status);
@@ -2018,6 +2019,7 @@ call_status(struct rpc_task *task)
 	case -EHOSTDOWN:
 	case -EHOSTUNREACH:
 	case -ENETUNREACH:
+	case -EPERM:
 		if (RPC_IS_SOFTCONN(task)) {
 			rpc_exit(task, status);
 			break;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 348c786..a19e9e7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -644,6 +644,10 @@ static int xs_udp_send_request(struct rpc_task *task)
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
 			xdr->len - req->rq_bytes_sent, status);
 
+	/* firewall is blocking us, don't return -EAGAIN or we end up looping */
+	if (status == -EPERM)
+		goto process_status;
+
 	if (sent > 0 || status == 0) {
 		req->rq_xmit_bytes_sent += sent;
 		if (sent >= req->rq_slen)
@@ -652,6 +656,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 		status = -EAGAIN;
 	}
 
+process_status:
 	switch (status) {
 	case -ENOTSOCK:
 		status = -ENOTCONN;
@@ -667,6 +672,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 	case -ENOBUFS:
 	case -EPIPE:
 	case -ECONNREFUSED:
+	case -EPERM:
 		/* When the server has died, an ICMP port unreachable message
 		 * prompts ECONNREFUSED. */
 		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-- 
1.8.2.rc2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-09-24 18:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-24 18:07 [PATCH v2 0/2] rpc: resolve softlockup in presence of iptables drop rule Jason Baron
2014-09-24 18:08 ` [PATCH v2 1/2] rpc: return sent and err from xs_sendpages() Jason Baron
2014-09-24 18:08 ` [PATCH v2 2/2] rpc: Add -EPERM processing for xs_udp_send_request() Jason Baron

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.