All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Chen <mchen-JuQBKiYWLL8cww2/fHdDyodd74u8MsAO@public.gmane.org>
To: linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org,
	trond.myklebust-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org,
	ezk-2jbElX+0AsNKU+I/JbwozwkEoixk4Qys@public.gmane.org,
	nfs-ganesha-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org,
	Ming Chen
	<mchen-JuQBKiYWLL8cww2/fHdDyodd74u8MsAO@public.gmane.org>
Subject: [PATCH] nfs: avoid nfs_wait_on_seqid() for NFSv4.1
Date: Mon,  3 Nov 2014 11:39:26 -0500	[thread overview]
Message-ID: <1415032766-15673-1-git-send-email-mchen@cs.stonybrook.edu> (raw)

seqid, introduced in NFSv4.0, requires state-changing operations be performed
synchronously, and thus limits parallelism. NFSv4.1 supports "unlimited
parallelism" by using sessions and slots; seqid is no longer used and must be
ignored by NFSv4.1 server. However, the current nfs client always call
nfs_wait_on_seqid() no matter the version is 4.0 or 4.1.

nfs_wait_on_seqid() can be very slow in high-latency network. Using the
Filebench file server workload and the following systemtap script, we measured
the "Seqid_waitqueue" introduced an average 344ms delay in a 10ms-rtt network.

global sleep_count;
global sleep_time;
global sleep_duration;

// called in '__rpc_sleep_on_priority()'
probe kernel.trace("rpc_task_sleep") {
        name = kernel_string($q->name);
        sleep_time[name, $task] = gettimeofday_us();
}

// called in '__rpc_do_wake_up_task()'
probe kernel.trace("rpc_task_wakeup") {
        name = kernel_string($q->name);
        now = gettimeofday_us();
        old = sleep_time[name, $task];
        if (old) {
                sleep_count[name] += 1;
                sleep_duration[name] += now - old;
                delete sleep_time[name, $task];
        }
}

probe end {
        foreach (name in sleep_count) {
                printf("\"%s\" -- sleep count: %d; sleep time: %ld us\n",
                                name, sleep_count[name],
                                sleep_duration[name] / sleep_count[name]);
        }
}

Systemtap output:
        "xprt_pending" -- sleep count: 20051; sleep time: 10453 us
        "xprt_sending" -- sleep count: 2489; sleep time: 43 us
        "ForeChannel Slot table" -- sleep count: 37; sleep time: 731 us
        "Seqid_waitqueue" -- sleep count: 7428; sleep time: 343774 us

This patch avoids the unnecessary nfs_wait_on_seqid() operations for NFSv4.1.
It improves the speed of the Filebench file server workload from 175 ops/sec
to 1550 ops/sec.

This patch is based on 3.18-rc3, and has been tested in 3.14.17 and 3.18-rc3.

Signed-off-by: Ming Chen <mchen-JuQBKiYWLL8cww2/fHdDyodd74u8MsAO@public.gmane.org>
---
 fs/nfs/nfs4proc.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 405bd95..be06010 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1778,7 +1778,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	struct nfs4_state_owner *sp = data->owner;
 	struct nfs_client *clp = sp->so_server->nfs_client;
 
-	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
+	if (!nfs4_get_session(sp->so_server) &&
+	    nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		goto out_wait;
 	/*
 	 * Check if we still need to send an OPEN call, or if we can use
@@ -2617,7 +2618,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	int call_close = 0;
 
 	dprintk("%s: begin!\n", __func__);
-	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
+	if (!nfs4_get_session(state->owner->so_server) &&
+	    nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 
 	task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
@@ -5399,7 +5401,8 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
 
-	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
+	if (!nfs4_get_session(calldata->server) &&
+	    nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 	if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
 		/* Note: exit _without_ running nfs4_locku_done */
@@ -5566,11 +5569,13 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	struct nfs4_state *state = data->lsp->ls_state;
 
 	dprintk("%s: begin!\n", __func__);
-	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
+	if (!nfs4_get_session(data->server) &&
+	    nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
 		goto out_wait;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
+		if (!nfs4_get_session(data->server) &&
+		    nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
 			goto out_release_lock_seqid;
 		}
 		data->arg.open_stateid = &state->open_stateid;
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

WARNING: multiple messages have this Message-ID (diff)
From: Ming Chen <mchen@cs.stonybrook.edu>
To: linux-nfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: bfields@fieldses.org, trond.myklebust@primarydata.com,
	ezk@fsl.cs.stonybrook.edu,
	nfs-ganesha-devel@lists.sourceforge.net,
	Ming Chen <mchen@cs.stonybrook.edu>
Subject: [PATCH] nfs: avoid nfs_wait_on_seqid() for NFSv4.1
Date: Mon,  3 Nov 2014 11:39:26 -0500	[thread overview]
Message-ID: <1415032766-15673-1-git-send-email-mchen@cs.stonybrook.edu> (raw)

seqid, introduced in NFSv4.0, requires state-changing operations be performed
synchronously, and thus limits parallelism. NFSv4.1 supports "unlimited
parallelism" by using sessions and slots; seqid is no longer used and must be
ignored by NFSv4.1 server. However, the current nfs client always call
nfs_wait_on_seqid() no matter the version is 4.0 or 4.1.

nfs_wait_on_seqid() can be very slow in high-latency network. Using the
Filebench file server workload and the following systemtap script, we measured
the "Seqid_waitqueue" introduced an average 344ms delay in a 10ms-rtt network.

global sleep_count;
global sleep_time;
global sleep_duration;

// called in '__rpc_sleep_on_priority()'
probe kernel.trace("rpc_task_sleep") {
        name = kernel_string($q->name);
        sleep_time[name, $task] = gettimeofday_us();
}

// called in '__rpc_do_wake_up_task()'
probe kernel.trace("rpc_task_wakeup") {
        name = kernel_string($q->name);
        now = gettimeofday_us();
        old = sleep_time[name, $task];
        if (old) {
                sleep_count[name] += 1;
                sleep_duration[name] += now - old;
                delete sleep_time[name, $task];
        }
}

probe end {
        foreach (name in sleep_count) {
                printf("\"%s\" -- sleep count: %d; sleep time: %ld us\n",
                                name, sleep_count[name],
                                sleep_duration[name] / sleep_count[name]);
        }
}

Systemtap output:
        "xprt_pending" -- sleep count: 20051; sleep time: 10453 us
        "xprt_sending" -- sleep count: 2489; sleep time: 43 us
        "ForeChannel Slot table" -- sleep count: 37; sleep time: 731 us
        "Seqid_waitqueue" -- sleep count: 7428; sleep time: 343774 us

This patch avoids the unnecessary nfs_wait_on_seqid() operations for NFSv4.1.
It improves the speed of the Filebench file server workload from 175 ops/sec
to 1550 ops/sec.

This patch is based on 3.18-rc3, and has been tested in 3.14.17 and 3.18-rc3.

Signed-off-by: Ming Chen <mchen@cs.stonybrook.edu>
---
 fs/nfs/nfs4proc.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 405bd95..be06010 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1778,7 +1778,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	struct nfs4_state_owner *sp = data->owner;
 	struct nfs_client *clp = sp->so_server->nfs_client;
 
-	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
+	if (!nfs4_get_session(sp->so_server) &&
+	    nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		goto out_wait;
 	/*
 	 * Check if we still need to send an OPEN call, or if we can use
@@ -2617,7 +2618,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	int call_close = 0;
 
 	dprintk("%s: begin!\n", __func__);
-	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
+	if (!nfs4_get_session(state->owner->so_server) &&
+	    nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 
 	task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
@@ -5399,7 +5401,8 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
 
-	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
+	if (!nfs4_get_session(calldata->server) &&
+	    nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		goto out_wait;
 	if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
 		/* Note: exit _without_ running nfs4_locku_done */
@@ -5566,11 +5569,13 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	struct nfs4_state *state = data->lsp->ls_state;
 
 	dprintk("%s: begin!\n", __func__);
-	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
+	if (!nfs4_get_session(data->server) &&
+	    nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
 		goto out_wait;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
-		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
+		if (!nfs4_get_session(data->server) &&
+		    nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
 			goto out_release_lock_seqid;
 		}
 		data->arg.open_stateid = &state->open_stateid;
-- 
1.8.1.2


             reply	other threads:[~2014-11-03 16:39 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-03 16:39 Ming Chen [this message]
2014-11-03 16:39 ` [PATCH] nfs: avoid nfs_wait_on_seqid() for NFSv4.1 Ming Chen
2015-07-02 22:47 Ming Chen
     [not found] ` <1435877253-1497-1-git-send-email-mchen-JuQBKiYWLL8cww2/fHdDyodd74u8MsAO@public.gmane.org>
2015-07-02 23:23   ` Trond Myklebust
2015-07-02 23:23     ` Trond Myklebust
2015-07-03 21:10     ` Ming Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1415032766-15673-1-git-send-email-mchen@cs.stonybrook.edu \
    --to=mchen-juqbkiywll8cww2/fhddyodd74u8msao@public.gmane.org \
    --cc=bfields-uC3wQj2KruNg9hUCZPvPmw@public.gmane.org \
    --cc=ezk-2jbElX+0AsNKU+I/JbwozwkEoixk4Qys@public.gmane.org \
    --cc=linux-fsdevel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-nfs-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=nfs-ganesha-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
    --cc=trond.myklebust-7I+n7zu2hftEKMMhf/gKZA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.