All of lore.kernel.org
 help / color / mirror / Atom feed
* linux 2.4.18-5 nfs/rpc problem
@ 2003-10-08 22:42 Guzovsky, Eduard
  2003-10-10 17:12 ` David Jeffery
  0 siblings, 1 reply; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-08 22:42 UTC (permalink / raw)
  To: 'nfs@lists.sourceforge.net'; +Cc: Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 9567 bytes --]

We ran into a deadlock problem running Red Hat linux 2.4.18-5
on a diskless machine. Root file system is mounted over 
udp nfs v3. The problem is created by an application reading,
writing, renaming, deleting files. This machine has 256MB
of memory.

Bellow is some relevant info from the deadlocked machine.

It looks like "rpciod" (see the stack of thread ID 8 in the 
gdb listing) hangs in __wait_on_page(page=0xc1241d7c) trying 
to do nfs_commit_done() on a stale file handle. At the same 
time "kupdated" (see the stack of thread ID 6) is stuck in 
nfs_wait_on_request() while trying to do nfs_writepage()
on the same page rpciod hangs. The problem goes away if the 
file system is mounted with "sync" option. 

Is this a known problem ? Is a fix available ?

Thanks,

-Ed



rpc_show_tasks() output:
------------------------

-pid- proc flgs runs status -client- -prog- --rqstp- -timeout -rpcwait
-action- --exit--
33180 0004 0000 0002 000120 cff38aa0 100003 cff32160 00000000  <NULL>
c02090a0        0
32943 0021 0001 0002 -00116 cff38aa0 100003 cff32b58 00000000  <NULL>
0 c0176d10
32942 0021 0001 0002 000128 cff38aa0 100003 cff32078 00000000   schedq
c02090a0 c0176d10
32941 0021 0001 0002 000128 cff38aa0 100003 cff32418 00000000   schedq
c02090a0 c0176d10
00156 0000 0081 0002 -00110 cff38b20 100003        0 00000000   schedq
c016f890 c016f990
00006 0000 0081 0002 -00110 cff38aa0 100003        0 00000000   schedq
c016f890 c016f990

kernel gdb session:
-------------------

(gdb) info threads
  26 Thread 1064  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  25 Thread 1055  sys_wait4 (pid=0xffffffff, stat_addr=0x0, options=0x0, 
    ru=0x0) at exit.c:649
  24 Thread 855  schedule_timeout (timeout=0xcc1d5f00) at timer.c:879
  23 Thread 825  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  22 Thread 782  schedule_timeout (timeout=0xcc2cbf00) at timer.c:879
  21 Thread 781  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  20 Thread 763  sys_msgrcv (msqid=0x0, msgp=0xbffffc40, msgsz=0x100, 
    msgtyp=0x1, msgflg=0x0) at msg.c:816
  19 Thread 745  schedule_timeout (timeout=0xcc3b3f74) at timer.c:879
  18 Thread 711  schedule_timeout (timeout=0xcc3d1f74) at timer.c:879
  17 Thread 691  schedule_timeout (timeout=0xcc3dff00) at timer.c:879
  16 Thread 673  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  15 Thread 655  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  14 Thread 616  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  13 Thread 582  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  12 Thread 498  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  11 Thread 463  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  10 Thread 445  do_syslog (type=0x2, buf=0x804df20 "u\b^[-bèyS",
len=0xfff)^[-A
    at printk.c:191
  9 Thread 440  schedule_timeout (timeout=0x7fffffff) at timer.c:851
  8 Thread 8  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
  7 Thread 7  interruptible_sleep_on (q=0xc1393784) at sched.c:926
  6 Thread 6  nfs_wait_on_request (req=0xcff27ae0) at pagelist.c:223
  5 Thread 5  interruptible_sleep_on (q=0xc02c6d88) at sched.c:926
  4 Thread 4  schedule_timeout (timeout=0xc13a5f80) at timer.c:879
  3 Thread 3  ksoftirqd (__bind_cpu=0x0) at softirq.c:386
  2 Thread 2  context_thread (startup=0xc02f008c) at context.c:101
  1 Thread 1  schedule_timeout (timeout=0xc1357f00) at timer.c:879

[kupdated] info
---------------
(gdb) thread 6
[Switching to thread 6 (Thread 6)]#0  nfs_wait_on_request (req=0xcff27ae0)
    at pagelist.c:223
(gdb) bt
#0  nfs_wait_on_request (req=0xcff27ae0) at pagelist.c:223
#1  0xc0175636 in nfs_wait_on_requests (inode=0xcb931080, file=0x0, 
    idx_start=0x0, npages=0x1) at write.c:479
#2  0xc0177268 in nfs_sync_file (inode=0xcb931080, file=0x0, idx_start=0x0, 
    npages=0x1, how=0x6) at write.c:1307
#3  0xc0175436 in nfs_writepage (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/linux/nfs_fs.h:257
#4  0xc0127d14 in filemap_fdatasync (mapping=0xcb931134) at filemap.c:594
#5  0xc0149be1 in sync_unlocked_inodes () at inode.c:224
#6  0xc013b778 in sync_old_buffers () at buffer.c:2801
#7  0xc013ba72 in kupdate (startup=0xc02f0100) at buffer.c:2996
#8  0xc0107306 in kernel_thread (fn=0x202a2, arg=0xc1356000, flags=0x0)
    at process.c:499

(gdb) p *(struct nfs_page *)0xcff27ae0
$3 = {wb_hash = {next = 0xcff27ae0, prev = 0xcff27ae0}, wb_lru = {
    next = 0xcff27848, prev = 0xcff27788}, wb_list = {next = 0xcff27af0, 
    prev = 0xcff27af0}, wb_list_head = 0x0, wb_file = 0x0, 
  wb_inode = 0xcb931080, wb_cred = 0xcff38ba0, wb_page = 0xc1241d7c, 
  wb_wait = {lock = {gcc_is_buggy = 0x0}, task_list = {next = 0xc13a1eb0, 
      prev = 0xc13a1eb0}}, wb_timeout = 0xa785, wb_offset = 0x0, 
  wb_bytes = 0x400, wb_count = 0x3, wb_flags = 0x1, wb_verf = {
    committed = NFS_UNSTABLE, verifier = {0xbf766b3f, 0x87af0400}}}

deadlocking page info
---------------------
(gdb) p *(struct page *)(0xc1241d7c)
$6 = {list = {next = 0xcb931144, prev = 0xcb931144}, mapping = 0xcb931134, 
  index = 0x0, next_hash = 0x0, count = {counter = 0x4}, flags = 0x100004d, 
  lru = {next = 0xc125a468, prev = 0xc123b9dc}, age = 0x5, pte_chain = 0x0, 
  pprev_hash = 0xcffdee60, buffers = 0x0}
(gdb) p *((struct page *)(0xc1241d7c))->mapping
$7 = {clean_pages = {next = 0xcb931134, prev = 0xcb931134}, dirty_pages = {
    next = 0xcb93113c, prev = 0xcb93113c}, locked_pages = {next =
0xc1241d7c, 
    prev = 0xc1241d7c}, nrpages = 0x1, a_ops = 0xc02c8820, host =
0xcb931080, 
  i_mmap = 0x0, i_mmap_shared = 0x0, i_shared_lock = {gcc_is_buggy = 0x0}, 
  gfp_mask = 0x1d2}
(gdb) p *((struct page *)(0xc1241d7c))->mapping->a_ops
$8 = {writepage = 0xc0175400 <nfs_writepage>, 
  readpage = 0xc0174a50 <nfs_readpage>, 
  sync_page = 0xc016f400 <nfs_sync_page>, 
  prepare_write = 0xc016f3e0 <nfs_prepare_write>, 
  commit_write = 0xc016f3f0 <nfs_commit_write>, bmap = 0, flushpage = 0, 
  releasepage = 0, direct_IO = 0, removepage = 0}

[rpciod] info
-------------
(gdb) thread 8
[Switching to thread 8 (Thread 8)]#0  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
(gdb) bt
#0  ___wait_on_page (page=0xc1241d7c)
    at /usr/src/linux_2.4.18.5/include/asm/bitops.h:243
#1  0xc01278d5 in truncate_list_pages (head=0xcb931144, start=0x0, 
    partial=0xcff2df1c)
    at /usr/src/linux_2.4.18.5/include/linux/pagemap.h:97
#2  0xc012796e in truncate_inode_pages (mapping=0xcb931134, lstart=0x0)
    at filemap.c:349
#3  0xc014acb2 in iput (inode=0xcb931080) at inode.c:1086
#4  0xc0176f45 in nfs_commit_done (task=0xcfe284c0) at write.c:353
#5  0xc020c71a in __rpc_execute (task=0xcfe284c0) at sched.c:648
#6  0xc020c909 in __rpc_schedule () at sched.c:733
#7  0xc020d168 in rpciod (ptr=0xc02d954c) at sched.c:1076
#8  0xc0107306 in kernel_thread (fn=0xc0316660 <tcp_hashinfo>,
arg=0xc139fc00, 
    flags=0x0) at process.c:499
#9  0x00000008 in ?? ()

rpc_task (tk_pid 32943) info (note: tk_status = -116 ESTALE)
------------------------------------------------------------
(gdb) p *(struct rpc_task *)0xcfe284c0
$9 = {tk_list = {next = 0xc02d9514, prev = 0xc02d9514}, tk_magic = 0xf00baa,

  tk_task = {next = 0xcfe28c4c, prev = 0xcfe2804c}, tk_client = 0xcff38aa0, 
  tk_rqstp = 0xcff32b58, tk_status = 0xffffff8c, tk_rpcwait = 0x0, tk_msg =
{
    rpc_proc = 0x15, rpc_argp = 0xcfe28550, rpc_resp = 0xcfe285a8, 
    rpc_cred = 0xcff38ba0}, tk_buffer = 0xcff78000, tk_garb_retry = 0x2, 
  tk_cred_retry = 0x2, tk_suid_retry = 0x1, tk_timeout_fn = 0, 
  tk_callback = 0, tk_action = 0, tk_exit = 0xc0176d10 <nfs_commit_done>, 
  tk_release = 0xc0175070 <nfs_writedata_release>, tk_calldata = 0xcfe284c0,

  tk_timer = {list = {next = 0x0, prev = 0x0}, expires = 0xa7c8, 
    data = 0xcfe284c0, function = 0xc020bab0 <rpc_run_timer>}, tk_wait = {
    lock = {gcc_is_buggy = 0x0}, task_list = {next = 0xcfe2852c, 
      prev = 0xcfe2852c}}, tk_timeout = 0x0, tk_flags = 0x1, tk_lock = 0x0, 
  tk_active = 0x1, tk_wakeup = 0x0, tk_runstate = 0x2, tk_pid = 0x80af}
(gdb) p/d 0x80af
$10 = 32943

(gdb) p *((struct rpc_task *)0xcfe284c0)->tk_rqstp
$11 = {rq_xprt = 0xcff32000, rq_timeout = {to_current = 0x46, 
    to_initval = 0x46, to_maxval = 0x1770, to_increment = 0x0, 
    to_resrvval = 0x2ee0, to_retries = 0x3, to_exponential = 0x1}, 
  rq_snd_buf = {io_vec = {{iov_base = 0xcff78000, iov_len = 0x64}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}}, io_nr = 0x1, io_len = 0x64}, 
  rq_rcv_buf = {io_vec = {{iov_base = 0xcff78280, iov_len = 0x280}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}, {iov_base = 0x0, iov_len = 0x0}, {
        iov_base = 0x0, iov_len = 0x0}}, io_nr = 0x1, io_len = 0x280}, 
  rq_task = 0xcfe284c0, rq_xid = 0x424070ad, rq_next = 0x0, rq_cong = 0x0, 
  rq_received = 0x1, rq_creddata = {0x0, 0x0}, rq_bytes_sent = 0x0}

[-- Attachment #2: Type: text/html, Size: 15454 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: linux 2.4.18-5 nfs/rpc problem
  2003-10-08 22:42 linux 2.4.18-5 nfs/rpc problem Guzovsky, Eduard
@ 2003-10-10 17:12 ` David Jeffery
  2003-10-10 18:58   ` Trond Myklebust
  0 siblings, 1 reply; 7+ messages in thread
From: David Jeffery @ 2003-10-10 17:12 UTC (permalink / raw)
  To: Guzovsky, Eduard; +Cc: nfs

[-- Attachment #1: Type: text/plain, Size: 1021 bytes --]

Eduard,

I have also seen this problem with both Red Hat and stock kernels.  I've
got some patches That I'm testing to try and fix the problem for me, but
it takes a while for my setup to reproduce the problem.  How easy is it
for you to trigger?

I've attached two patches for 2.4.22 that I'm currently testing.  The
first (nfsiput.patch) is to fix the deadlocked processes you see by
moving the iput() call to happen after the waited on request is
unlocked.

But that simple fix is incomplete.  It creates another problem that
generates a BUG() in iput() as now instead of deadlocking, nfs can
continue on and end up calling iput() on a cleared inode.  The second
patch (nfsigrab.patch) checks for a failure when calling igrab() and
aborts an async write and instead converts it to a sync write which I
believe should fix the problem.  But, I haven't triggered it yet so this
patch is effectively untested.

If someone who knows nfs/vfs better could sanity check what I'm doing,
it would be appreciated.

David Jeffery

[-- Attachment #2: nfsiput.patch --]
[-- Type: text/plain, Size: 3053 bytes --]

--- write.c.orig	Thu Sep 25 07:33:34 2003
+++ write.c	Fri Sep 26 07:09:32 2003
@@ -318,14 +318,15 @@
 /*
  * Insert a write request into an inode
  */
-static inline void
+static inline int
 nfs_inode_remove_request(struct nfs_page *req)
 {
 	struct inode *inode;
+	int need_iput = 0;
 	spin_lock(&nfs_wreq_lock);
 	if (list_empty(&req->wb_hash)) {
 		spin_unlock(&nfs_wreq_lock);
-		return;
+		return 0;
 	}
 	if (!NFS_WBACK_BUSY(req))
 		printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
@@ -335,13 +336,12 @@
 	inode->u.nfs_i.npages--;
 	if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
 		printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
-	if (list_empty(&inode->u.nfs_i.writeback)) {
-		spin_unlock(&nfs_wreq_lock);
-		iput(inode);
-	} else
-		spin_unlock(&nfs_wreq_lock);
+	if (list_empty(&inode->u.nfs_i.writeback))
+		need_iput = 1;
+	spin_unlock(&nfs_wreq_lock);
 	nfs_clear_request(req);
 	nfs_release_request(req);
+	return need_iput;
 }
 
 /*
@@ -1049,6 +1049,7 @@
 	 */
 	nfs_write_attributes(inode, resp->fattr);
 	while (!list_empty(&data->pages)) {
+		int need_iput = 0;
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
 		page = req->wb_page;
@@ -1064,14 +1065,14 @@
 			SetPageError(page);
 			if (req->wb_file)
 				req->wb_file->f_error = task->tk_status;
-			nfs_inode_remove_request(req);
+			need_iput = nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
 
 #ifdef CONFIG_NFS_V3
 		if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
-			nfs_inode_remove_request(req);
+			need_iput = nfs_inode_remove_request(req);
 			dprintk(" OK\n");
 			goto next;
 		}
@@ -1080,10 +1081,12 @@
 		nfs_mark_request_commit(req);
 		dprintk(" marked for commit\n");
 #else
-		nfs_inode_remove_request(req);
+		need_iput = nfs_inode_remove_request(req);
 #endif
 	next:
 		nfs_unlock_request(req);
+		if(need_iput)
+			iput(inode);
 	}
 }
 
@@ -1203,6 +1206,7 @@
 
 	nfs_write_attributes(inode, resp->fattr);
 	while (!list_empty(&data->pages)) {
+		int need_iput = 0;
 		req = nfs_list_entry(data->pages.next);
 		nfs_list_remove_request(req);
 
@@ -1214,7 +1218,7 @@
 		if (task->tk_status < 0) {
 			if (req->wb_file)
 				req->wb_file->f_error = task->tk_status;
-			nfs_inode_remove_request(req);
+			need_iput = nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
@@ -1223,7 +1227,7 @@
 		 * returned by the server against all stored verfs. */
 		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
 			/* We have a match */
-			nfs_inode_remove_request(req);
+			need_iput = nfs_inode_remove_request(req);
 			dprintk(" OK\n");
 			goto next;
 		}
@@ -1232,6 +1236,8 @@
 		nfs_mark_request_dirty(req);
 	next:
 		nfs_unlock_request(req);
+		if(need_iput)
+			iput(inode);
 	}
 }
 #endif

[-- Attachment #3: nfsigrab.patch --]
[-- Type: text/plain, Size: 1961 bytes --]

--- linux-2.4.22/fs/nfs/write.c.fix4.notfinished	Tue Oct  7 08:15:56 2003
+++ linux-2.4.22/fs/nfs/write.c	Tue Oct  7 09:14:16 2003
@@ -248,6 +248,12 @@
 		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 		if (err >= 0)
 			err = 0;
+		else if(err == -EDEADLK){
+			printk(KERN_WARNING "NFS DEBUG: EDEADLK returned, doing nfs_writepage_sync\n");
+			err = nfs_writepage_sync(NULL, inode, page, 0, offset);
+			if (err == offset)
+				err = 0;
+		}
 	} else {
 		err = nfs_writepage_sync(NULL, inode, page, 0, offset); 
 		if (err == offset)
@@ -292,19 +298,22 @@
  *	 & co. for the 'write append' case. For 2.5 we may want to consider
  *	 some form of hashing so as to perform well on random writes.
  */
-static inline void
+static inline int
 nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
 	struct list_head *pos, *head;
 	unsigned long pg_idx = page_index(req->wb_page);
 
 	if (!list_empty(&req->wb_hash))
-		return;
+		return 0;
 	if (!NFS_WBACK_BUSY(req))
 		printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
 	head = &inode->u.nfs_i.writeback;
 	if (list_empty(head))
-		igrab(inode);
+		if(!igrab(inode)){
+			printk(KERN_WARNING "NFS DEBUG: igrab failed!  Aborting async writepage.\n");
+			return -EDEADLK;
+		}
 	list_for_each_prev(pos, head) {
 		struct nfs_page *entry = nfs_inode_wb_entry(pos);
 		if (page_index(entry->wb_page) < pg_idx)
@@ -313,6 +322,7 @@
 	inode->u.nfs_i.npages++;
 	list_add(&req->wb_hash, pos);
 	req->wb_count++;
+	return 0;
 }
 
 /*
@@ -659,9 +669,15 @@
 		}
 
 		if (new) {
+			int error;
 			nfs_lock_request_dontget(new);
-			nfs_inode_add_request(inode, new);
+			error = nfs_inode_add_request(inode, new);
 			spin_unlock(&nfs_wreq_lock);
+			if(error < 0){
+				nfs_unlock_request(new);
+				nfs_release_request(new);
+				return ERR_PTR(error);
+			}
 			nfs_mark_request_dirty(new);
 			return new;
 		}

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: linux 2.4.18-5 nfs/rpc problem
  2003-10-10 17:12 ` David Jeffery
@ 2003-10-10 18:58   ` Trond Myklebust
  2003-10-10 19:44     ` Trond Myklebust
  0 siblings, 1 reply; 7+ messages in thread
From: Trond Myklebust @ 2003-10-10 18:58 UTC (permalink / raw)
  To: David Jeffery; +Cc: Guzovsky, Eduard, nfs

>>>>> " " == David Jeffery <david_jeffery@adaptec.com> writes:

     > Eduard, I have also seen this problem with both Red Hat and
     > stock kernels.  I've got some patches That I'm testing to try
     > and fix the problem for me, but it takes a while for my setup
     > to reproduce the problem.  How easy is it for you to trigger?

     > I've attached two patches for 2.4.22 that I'm currently
     > testing.  The first (nfsiput.patch) is to fix the deadlocked
     > processes you see by moving the iput() call to happen after the
     > waited on request is unlocked.

I take it you're assuming that the problem is a deadlock due to
clear_inode() hanging on our page lock? That does indeed appear to be
a possible source of contention.

Instead of your 2 patches, though, how about just ensuring that
anybody calling nfs_wait_on_requests() already has a reference to the
inode?

There is indeed one case where this is not being done: in
page_launder() you only have a lock on the page. This means that you
may end up calling nfs_wait_on_request() in order to flush out a page
only to discover that it called iput() from beneath you and is now
waiting in clear_inode() on your page lock.

Cheers,
  Trond

--- linux-2.4.23-pre5/fs/nfs/write.c.orig	2003-07-09 14:11:04.000000000 -0400
+++ linux-2.4.23-pre5/fs/nfs/write.c	2003-10-10 14:54:16.000000000 -0400
@@ -225,8 +225,19 @@
 	struct inode *inode = page->mapping->host;
 	unsigned long end_index;
 	unsigned offset = PAGE_CACHE_SIZE;
+	int inode_referenced = 0;
 	int err;
 
+	/*
+	 * Note: We need to ensure that we have a reference to the inode
+	 *       if we are to do asynchronous writes. If not, waiting
+	 *       in nfs_wait_on_request() may deadlock with clear_inode().
+	 *
+	 *       If igrab() fails here, then it is in any case safe to
+	 *       call nfs_wb_page(), since there will be no pending writes.
+	 */
+	if (igrab(inode) != 0)
+		inode_referenced = 1;
 	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 
 	/* Ensure we've flushed out any previous writes */
@@ -244,7 +255,8 @@
 		goto out;
 do_it:
 	lock_kernel();
-	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) {
+	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) &&
+			inode_referenced) {
 		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 		if (err >= 0)
 			err = 0;
@@ -256,7 +268,9 @@
 	unlock_kernel();
 out:
 	UnlockPage(page);
-	return err; 
+	if (inode_referenced)
+		iput(inode);
+	return err;
 }
 
 /*


-------------------------------------------------------
This SF.net email is sponsored by: SF.net Giveback Program.
SourceForge.net hosts over 70,000 Open Source Projects.
See the people who have HELPED US provide better services:
Click here: http://sourceforge.net/supporters.php
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: linux 2.4.18-5 nfs/rpc problem
  2003-10-10 18:58   ` Trond Myklebust
@ 2003-10-10 19:44     ` Trond Myklebust
  2003-10-13 14:17       ` David Jeffery
  0 siblings, 1 reply; 7+ messages in thread
From: Trond Myklebust @ 2003-10-10 19:44 UTC (permalink / raw)
  To: David Jeffery; +Cc: Guzovsky, Eduard, nfs

>>>>> " " == Trond Myklebust <trond.myklebust@fys.uio.no> writes:

     > There is indeed one case where this is not being done: in
     > page_launder() you only have a lock on the page. This means
     > that you may end up calling nfs_wait_on_request() in order to
     > flush out a page only to discover that it called iput() from
     > beneath you and is now waiting in clear_inode() on your page
     > lock.

Sorry. I meant 'shrink_cache()'. page_launder() probably disappeared
around 2.4.10 8-).

The point was that writepage() should be the only case where the above
is a problem. I believe readpage does not suffer from the same issue
since it can only be called while the file is still open...

Cheers,
  Trond


-------------------------------------------------------
This SF.net email is sponsored by: SF.net Giveback Program.
SourceForge.net hosts over 70,000 Open Source Projects.
See the people who have HELPED US provide better services:
Click here: http://sourceforge.net/supporters.php
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: linux 2.4.18-5 nfs/rpc problem
  2003-10-10 19:44     ` Trond Myklebust
@ 2003-10-13 14:17       ` David Jeffery
  0 siblings, 0 replies; 7+ messages in thread
From: David Jeffery @ 2003-10-13 14:17 UTC (permalink / raw)
  To: nfs

On Fri, 2003-10-10 at 15:44, Trond Myklebust wrote:
> >>>>> " " == Trond Myklebust <trond.myklebust@fys.uio.no> writes:
> 
>      > There is indeed one case where this is not being done: in
>      > page_launder() you only have a lock on the page. This means
>      > that you may end up calling nfs_wait_on_request() in order to
>      > flush out a page only to discover that it called iput() from
>      > beneath you and is now waiting in clear_inode() on your page
>      > lock.
> 
> Sorry. I meant 'shrink_cache()'. page_launder() probably disappeared
> around 2.4.10 8-).
> 
> The point was that writepage() should be the only case where the above
> is a problem. I believe readpage does not suffer from the same issue
> since it can only be called while the file is still open...
> 
> Cheers,
>   Trond

Trond,

I don't see any holes in your much simpler patch.  I'm glad you spotted
a much simpler approach than mine.  I really didn't like the ugly
-EDEADLK game I was playing in my second patch. I'll post something if I
see a problem but I doubt I will. Thanks Trond.

David Jeffery






-------------------------------------------------------
This SF.net email is sponsored by: SF.net Giveback Program.
SourceForge.net hosts over 70,000 Open Source Projects.
See the people who have HELPED US provide better services:
Click here: http://sourceforge.net/supporters.php
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: linux 2.4.18-5 nfs/rpc problem
@ 2003-10-11  0:20 Guzovsky, Eduard
  0 siblings, 0 replies; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-11  0:20 UTC (permalink / raw)
  To: Guzovsky, Eduard, 'Trond Myklebust', 'David Jeffery'
  Cc: 'nfs@lists.sourceforge.net', Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 3894 bytes --]

Sorry, I meant 2.4.24.

-Ed


 > -----Original Message-----
 > From: Guzovsky, Eduard 
 > Sent: Friday, October 10, 2003 8:18 PM
 > To: 'Trond Myklebust'; David Jeffery
 > Cc: nfs@lists.sourceforge.net; Pariseau, Luc; A.K. Srikanth
 > Subject: RE: [NFS] linux 2.4.18-5 nfs/rpc problem
 > 
 > 
 > David, we were able to consistently reproduce the problem. 
 > After I made modifications Trond suggested we do not
 > see the problem any more. So I would say it is fixed.
 > Trond, will this fix make it into 2.2.24 ? 
 > 
 > Thank you very much for your help,
 > 
 > -Ed
 >  
 > 
 >  > -----Original Message-----
 >  > From: Trond Myklebust [mailto:trond.myklebust@fys.uio.no]
 >  > Sent: Friday, October 10, 2003 2:58 PM
 >  > To: David Jeffery
 >  > Cc: Guzovsky, Eduard; nfs@lists.sourceforge.net
 >  > Subject: Re: [NFS] linux 2.4.18-5 nfs/rpc problem
 >  > 
 >  > 
 >  > >>>>> " " == David Jeffery <david_jeffery@adaptec.com> writes:
 >  > 
 >  >      > Eduard, I have also seen this problem with both Red Hat and
 >  >      > stock kernels.  I've got some patches That I'm 
 > testing to try
 >  >      > and fix the problem for me, but it takes a while 
 > for my setup
 >  >      > to reproduce the problem.  How easy is it for you 
 > to trigger?
 >  > 
 >  >      > I've attached two patches for 2.4.22 that I'm currently
 >  >      > testing.  The first (nfsiput.patch) is to fix the 
 > deadlocked
 >  >      > processes you see by moving the iput() call to happen 
 >  > after the
 >  >      > waited on request is unlocked.
 >  > 
 >  > I take it you're assuming that the problem is a deadlock due to
 >  > clear_inode() hanging on our page lock? That does indeed 
 > appear to be
 >  > a possible source of contention.
 >  > 
 >  > Instead of your 2 patches, though, how about just ensuring that
 >  > anybody calling nfs_wait_on_requests() already has a 
 > reference to the
 >  > inode?
 >  > 
 >  > There is indeed one case where this is not being done: in
 >  > page_launder() you only have a lock on the page. This 
 > means that you
 >  > may end up calling nfs_wait_on_request() in order to 
 > flush out a page
 >  > only to discover that it called iput() from beneath you and is now
 >  > waiting in clear_inode() on your page lock.
 >  > 
 >  > Cheers,
 >  >   Trond
 >  > 
 >  > --- linux-2.4.23-pre5/fs/nfs/write.c.orig	2003-07-09 
 >  > 14:11:04.000000000 -0400
 >  > +++ linux-2.4.23-pre5/fs/nfs/write.c	2003-10-10 
 >  > 14:54:16.000000000 -0400
 >  > @@ -225,8 +225,19 @@
 >  >  	struct inode *inode = page->mapping->host;
 >  >  	unsigned long end_index;
 >  >  	unsigned offset = PAGE_CACHE_SIZE;
 >  > +	int inode_referenced = 0;
 >  >  	int err;
 >  >  
 >  > +	/*
 >  > +	 * Note: We need to ensure that we have a reference to the inode
 >  > +	 *       if we are to do asynchronous writes. If not, waiting
 >  > +	 *       in nfs_wait_on_request() may deadlock with 
 >  > clear_inode().
 >  > +	 *
 >  > +	 *       If igrab() fails here, then it is in any case safe to
 >  > +	 *       call nfs_wb_page(), since there will be no 
 >  > pending writes.
 >  > +	 */
 >  > +	if (igrab(inode) != 0)
 >  > +		inode_referenced = 1;
 >  >  	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 >  >  
 >  >  	/* Ensure we've flushed out any previous writes */
 >  > @@ -244,7 +255,8 @@
 >  >  		goto out;
 >  >  do_it:
 >  >  	lock_kernel();
 >  > -	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 >  > !IS_SYNC(inode)) {
 >  > +	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 >  > !IS_SYNC(inode) &&
 >  > +			inode_referenced) {
 >  >  		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 >  >  		if (err >= 0)
 >  >  			err = 0;
 >  > @@ -256,7 +268,9 @@
 >  >  	unlock_kernel();
 >  >  out:
 >  >  	UnlockPage(page);
 >  > -	return err; 
 >  > +	if (inode_referenced)
 >  > +		iput(inode);
 >  > +	return err;
 >  >  }
 >  >  
 >  >  /*
 >  > 
 > 

[-- Attachment #2: Type: text/html, Size: 9889 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: linux 2.4.18-5 nfs/rpc problem
@ 2003-10-11  0:18 Guzovsky, Eduard
  0 siblings, 0 replies; 7+ messages in thread
From: Guzovsky, Eduard @ 2003-10-11  0:18 UTC (permalink / raw)
  To: 'Trond Myklebust', David Jeffery
  Cc: nfs, Pariseau, Luc, A.K. Srikanth

[-- Attachment #1: Type: text/plain, Size: 3271 bytes --]

David, we were able to consistently reproduce the problem. 
After I made modifications Trond suggested we do not
see the problem any more. So I would say it is fixed.
Trond, will this fix make it into 2.2.24 ? 

Thank you very much for your help,

-Ed
 

 > -----Original Message-----
 > From: Trond Myklebust [mailto:trond.myklebust@fys.uio.no]
 > Sent: Friday, October 10, 2003 2:58 PM
 > To: David Jeffery
 > Cc: Guzovsky, Eduard; nfs@lists.sourceforge.net
 > Subject: Re: [NFS] linux 2.4.18-5 nfs/rpc problem
 > 
 > 
 > >>>>> " " == David Jeffery <david_jeffery@adaptec.com> writes:
 > 
 >      > Eduard, I have also seen this problem with both Red Hat and
 >      > stock kernels.  I've got some patches That I'm testing to try
 >      > and fix the problem for me, but it takes a while for my setup
 >      > to reproduce the problem.  How easy is it for you to trigger?
 > 
 >      > I've attached two patches for 2.4.22 that I'm currently
 >      > testing.  The first (nfsiput.patch) is to fix the deadlocked
 >      > processes you see by moving the iput() call to happen 
 > after the
 >      > waited on request is unlocked.
 > 
 > I take it you're assuming that the problem is a deadlock due to
 > clear_inode() hanging on our page lock? That does indeed appear to be
 > a possible source of contention.
 > 
 > Instead of your 2 patches, though, how about just ensuring that
 > anybody calling nfs_wait_on_requests() already has a reference to the
 > inode?
 > 
 > There is indeed one case where this is not being done: in
 > page_launder() you only have a lock on the page. This means that you
 > may end up calling nfs_wait_on_request() in order to flush out a page
 > only to discover that it called iput() from beneath you and is now
 > waiting in clear_inode() on your page lock.
 > 
 > Cheers,
 >   Trond
 > 
 > --- linux-2.4.23-pre5/fs/nfs/write.c.orig	2003-07-09 
 > 14:11:04.000000000 -0400
 > +++ linux-2.4.23-pre5/fs/nfs/write.c	2003-10-10 
 > 14:54:16.000000000 -0400
 > @@ -225,8 +225,19 @@
 >  	struct inode *inode = page->mapping->host;
 >  	unsigned long end_index;
 >  	unsigned offset = PAGE_CACHE_SIZE;
 > +	int inode_referenced = 0;
 >  	int err;
 >  
 > +	/*
 > +	 * Note: We need to ensure that we have a reference to the inode
 > +	 *       if we are to do asynchronous writes. If not, waiting
 > +	 *       in nfs_wait_on_request() may deadlock with 
 > clear_inode().
 > +	 *
 > +	 *       If igrab() fails here, then it is in any case safe to
 > +	 *       call nfs_wb_page(), since there will be no 
 > pending writes.
 > +	 */
 > +	if (igrab(inode) != 0)
 > +		inode_referenced = 1;
 >  	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 >  
 >  	/* Ensure we've flushed out any previous writes */
 > @@ -244,7 +255,8 @@
 >  		goto out;
 >  do_it:
 >  	lock_kernel();
 > -	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 > !IS_SYNC(inode)) {
 > +	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && 
 > !IS_SYNC(inode) &&
 > +			inode_referenced) {
 >  		err = nfs_writepage_async(NULL, inode, page, 0, offset);
 >  		if (err >= 0)
 >  			err = 0;
 > @@ -256,7 +268,9 @@
 >  	unlock_kernel();
 >  out:
 >  	UnlockPage(page);
 > -	return err; 
 > +	if (inode_referenced)
 > +		iput(inode);
 > +	return err;
 >  }
 >  
 >  /*
 > 

[-- Attachment #2: Type: text/html, Size: 8278 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2003-10-13 14:15 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-10-08 22:42 linux 2.4.18-5 nfs/rpc problem Guzovsky, Eduard
2003-10-10 17:12 ` David Jeffery
2003-10-10 18:58   ` Trond Myklebust
2003-10-10 19:44     ` Trond Myklebust
2003-10-13 14:17       ` David Jeffery
2003-10-11  0:18 Guzovsky, Eduard
2003-10-11  0:20 Guzovsky, Eduard

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.