linux-cifs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* cifs: Deferred close for files
@ 2021-03-09  9:11 Rohith Surabattula
  2021-03-11 13:47 ` Shyam Prasad N
  2021-04-12 17:23 ` Steve French
  0 siblings, 2 replies; 16+ messages in thread
From: Rohith Surabattula @ 2021-03-09  9:11 UTC (permalink / raw)
  To: linux-cifs, Steve French, Pavel Shilovsky, Shyam Prasad N,
	sribhat.msa, ronnie sahlberg, aaptel

[-- Attachment #1: Type: text/plain, Size: 362 bytes --]

Hi All,

Please find the attached patch which will defer the close to server.
So, performance can be improved.

i.e When file is open, write, close, open, read, close....
As close is deferred and oplock is held, cache will not be invalidated
and same handle can be used for second open.

Please review the changes and let me know your thoughts.

Regards,
Rohith

[-- Attachment #2: 0001-PATCH-cifs-Deferred-close-for-files.patch --]
[-- Type: application/octet-stream, Size: 10858 bytes --]

From 4ee04d9711959aace19f5c6043659e77cd7360c5 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 8 Mar 2021 16:28:09 +0000
Subject: [PATCH] [PATCH] cifs: Deferred close for files

When file is closed, SMB2 close request is not sent to server
immediately and is deferred for 5 seconds interval. When file is
reopened by same process for read or write, previous file handle
can be used until oplock is held.

When same file is reopened by another client during 5 second
interval, oplock break is sent by server and file is closed immediately
if reference count is zero or else oplock is downgraded.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
---
 fs/cifs/cifsfs.c    | 13 +++++++++-
 fs/cifs/cifsglob.h  | 12 +++++++++
 fs/cifs/cifsproto.h |  8 ++++++
 fs/cifs/connect.c   |  1 +
 fs/cifs/file.c      | 62 +++++++++++++++++++++++++++++++++++++++++++--
 fs/cifs/misc.c      | 47 ++++++++++++++++++++++++++++++++++
 6 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6f33ff3f625f..607106479ff3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,6 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -1594,9 +1595,16 @@ init_cifs(void)
 		goto out_destroy_fileinfo_put_wq;
 	}
 
+	deferredclose_wq = alloc_workqueue("deferredclose",
+					   WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	if (!deferredclose_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cifsoplockd_wq;
+	}
+
 	rc = cifs_fscache_register();
 	if (rc)
-		goto out_destroy_cifsoplockd_wq;
+		goto out_destroy_deferredclose_wq;
 
 	rc = cifs_init_inodecache();
 	if (rc)
@@ -1664,6 +1672,8 @@ init_cifs(void)
 	cifs_destroy_inodecache();
 out_unreg_fscache:
 	cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+	destroy_workqueue(deferredclose_wq);
 out_destroy_cifsoplockd_wq:
 	destroy_workqueue(cifsoplockd_wq);
 out_destroy_fileinfo_put_wq:
@@ -1698,6 +1708,7 @@ exit_cifs(void)
 	cifs_destroy_mids();
 	cifs_destroy_inodecache();
 	cifs_fscache_unregister();
+	destroy_workqueue(deferredclose_wq);
 	destroy_workqueue(cifsoplockd_wq);
 	destroy_workqueue(decrypt_wq);
 	destroy_workqueue(fileinfo_put_wq);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3de3c5908a72..72563ef4dd28 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1097,6 +1097,8 @@ struct cifs_tcon {
 #ifdef CONFIG_CIFS_SWN_UPCALL
 	bool use_witness:1; /* use witness protocol */
 #endif
+	struct list_head deferred_closes; /* list of deferred closes */
+	spinlock_t deferred_lock; /* protection on deferred list */
 };
 
 /*
@@ -1154,6 +1156,14 @@ struct cifs_pending_open {
 	__u32 oplock;
 };
 
+struct cifs_deferred_close {
+	struct list_head dlist;
+	struct tcon_link *tlink;
+	__u16  netfid;
+	__u64  persistent_fid;
+	__u64  volatile_fid;
+};
+
 /*
  * This info hangs off the cifsFileInfo structure, pointed to by llist.
  * This is used to track byte stream locks on the file
@@ -1248,6 +1258,7 @@ struct cifsFileInfo {
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
+	bool oplock_deferred_close; /* Flag to indicate oplock break */
 };
 
 struct cifs_io_parms {
@@ -1897,6 +1908,7 @@ extern struct workqueue_struct *cifsiod_wq;
 extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *fileinfo_put_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 32f7a013402e..5f085ff7a7c9 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -254,6 +254,14 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+				struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index cd6dbeaf2166..d96177aafbe0 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2206,6 +2206,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 	tcon->nodelete = ctx->nodelete;
 	tcon->local_lease = ctx->local_lease;
 	INIT_LIST_HEAD(&tcon->pending_opens);
+	INIT_LIST_HEAD(&tcon->deferred_closes);
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 26de4329d161..178ea0ea2555 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -321,6 +321,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
+	cfile->oplock_deferred_close = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
@@ -562,6 +563,17 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
+	spin_lock(&tcon->deferred_lock);
+	/* Get the cached handle as SMB2 close is deferred */
+	rc = cifs_get_readable_path(tcon, full_path, &cfile);
+	if (rc == 0) {
+		file->private_data = cfile;
+		cifs_del_deferred_close(cfile);
+		spin_unlock(&tcon->deferred_lock);
+		goto out;
+	}
+	spin_unlock(&tcon->deferred_lock);
+
 	if (server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
@@ -842,11 +854,44 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 	return rc;
 }
 
+struct smb2_deferred_work {
+	struct delayed_work deferred;
+	struct cifsFileInfo *cfile;
+};
+
+void smb2_deferred_work_close(struct work_struct *work)
+{
+	struct smb2_deferred_work *dwork = container_of(work,
+			struct smb2_deferred_work, deferred.work);
+
+	spin_lock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+	cifs_del_deferred_close(dwork->cfile);
+	spin_unlock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+	_cifsFileInfo_put(dwork->cfile, true, false);
+}
+
 int cifs_close(struct inode *inode, struct file *file)
 {
+	struct smb2_deferred_work *dwork;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+
+	dwork = kmalloc(sizeof(struct smb2_deferred_work), GFP_KERNEL);
+
+	INIT_DELAYED_WORK(&dwork->deferred, smb2_deferred_work_close);
+
 	if (file->private_data != NULL) {
-		_cifsFileInfo_put(file->private_data, true, false);
+		dwork->cfile = file->private_data;
 		file->private_data = NULL;
+		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) ||
+		    (cinode->oplock == CIFS_CACHE_RH_FLG)) {
+			spin_lock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+			cifs_add_deferred_close(dwork->cfile);
+			spin_unlock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+			/* Deferred close for files */
+			queue_delayed_work(deferredclose_wq, &dwork->deferred, 5 * HZ);
+		} else {
+			_cifsFileInfo_put(dwork->cfile, true, false);
+		}
 	}
 
 	/* return code from the ->release op is always ignored */
@@ -1943,7 +1988,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if (!open_file->invalidHandle) {
+			if ((!open_file->invalidHandle) &&
+				(!open_file->oplock_deferred_close)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -4746,6 +4792,8 @@ void cifs_oplock_break(struct work_struct *work)
 	struct TCP_Server_Info *server = tcon->ses->server;
 	int rc = 0;
 	bool purge_cache = false;
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
 
 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
 			TASK_UNINTERRUPTIBLE);
@@ -4793,6 +4841,16 @@ void cifs_oplock_break(struct work_struct *work)
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
+	/*
+	 * When oplock break is received and there are no active
+	 * file handles but cached, then set the flag oplock_deferred_close.
+	 * So, new open will not use cached handle.
+	 */
+	spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred)
+		cfile->oplock_deferred_close = true;
+	spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 82e176720ca6..298cc8b54857 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -136,6 +136,7 @@ tconInfoAlloc(void)
 	spin_lock_init(&ret_buf->stat_lock);
 	atomic_set(&ret_buf->num_local_opens, 0);
 	atomic_set(&ret_buf->num_remote_opens, 0);
+	spin_lock_init(&ret_buf->deferred_lock);
 
 	return ret_buf;
 }
@@ -672,6 +673,52 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(dclose, &tlink_tcon(cfile->tlink)->deferred_closes, dlist) {
+		if ((dclose->netfid == cfile->fid.netfid) &&
+			(dclose->persistent_fid == cfile->fid.persistent_fid) &&
+			(dclose->volatile_fid == cfile->fid.volatile_fid)) {
+			*pdclose = dclose;
+			return true;
+		}
+	}
+	return false;
+}
+
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred)
+		return;
+
+	dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+	dclose->tlink = cfile->tlink;
+	dclose->netfid = cfile->fid.netfid;
+	dclose->persistent_fid = cfile->fid.persistent_fid;
+	dclose->volatile_fid = cfile->fid.volatile_fid;
+	list_add_tail(&dclose->dlist, &tlink_tcon(dclose->tlink)->deferred_closes);
+}
+
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (!is_deferred)
+		return;
+	list_del(&dclose->dlist);
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-09  9:11 cifs: Deferred close for files Rohith Surabattula
@ 2021-03-11 13:47 ` Shyam Prasad N
  2021-03-11 17:55   ` Tom Talpey
  2021-04-12 17:23 ` Steve French
  1 sibling, 1 reply; 16+ messages in thread
From: Shyam Prasad N @ 2021-03-11 13:47 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: linux-cifs, Steve French, Pavel Shilovsky, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

Hi Rohith,

The changes look good at a high level.

Just a few points worth checking:
1. In cifs_open(), should be perform deferred close for certain cases
like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
perform less data caching. By deferring close, aren't we delaying
flushing dirty pages? @Steve French ?
2. I see that you're maintaining a new list of files for deferred
closing. Since there could be a large number of such files for a big
share with sufficient I/O, maybe we should think of a structure with
faster lookups (rb trees?).
I know we already have a bunch of linked lists in cifs.ko, and we need
to review perf impact for all those lists. But this one sounds like a
candidate for faster lookups.
3. Minor comment. Maybe change the field name oplock_deferred_close to
oplock_break_received?

Regards,
Shyam

On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> Hi All,
>
> Please find the attached patch which will defer the close to server.
> So, performance can be improved.
>
> i.e When file is open, write, close, open, read, close....
> As close is deferred and oplock is held, cache will not be invalidated
> and same handle can be used for second open.
>
> Please review the changes and let me know your thoughts.
>
> Regards,
> Rohith



-- 
Regards,
Shyam

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-11 13:47 ` Shyam Prasad N
@ 2021-03-11 17:55   ` Tom Talpey
  2021-03-22 17:07     ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Tom Talpey @ 2021-03-11 17:55 UTC (permalink / raw)
  To: Shyam Prasad N, Rohith Surabattula
  Cc: linux-cifs, Steve French, Pavel Shilovsky, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> Hi Rohith,
> 
> The changes look good at a high level.
> 
> Just a few points worth checking:
> 1. In cifs_open(), should be perform deferred close for certain cases
> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> perform less data caching. By deferring close, aren't we delaying
> flushing dirty pages? @Steve French ?
> 2. I see that you're maintaining a new list of files for deferred
> closing. Since there could be a large number of such files for a big
> share with sufficient I/O, maybe we should think of a structure with
> faster lookups (rb trees?).
> I know we already have a bunch of linked lists in cifs.ko, and we need
> to review perf impact for all those lists. But this one sounds like a
> candidate for faster lookups.
> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> oplock_break_received?
> 
> Regards,
> Shyam

I wonder why the choice of 5 seconds? It seems to me a more natural
interval on the order of one or more of
- attribute cache timeout
- lease time
- echo_interval

Also, this wording is rather confusing:

> When file is closed, SMB2 close request is not sent to server
> immediately and is deferred for 5 seconds interval. When file is
> reopened by same process for read or write, previous file handle
> can be used until oplock is held.

It's not a "previous" filehandle if it's open, and "can be used" is
a rather passive statement. A better wording may be "the filehandle
is reused".

And, "until oplock is held" is similarly awkward. Do you mean "*if*
an oplock is held", or "*provided" an oplock is held"?

> When same file is reopened by another client during 5 second
> interval, oplock break is sent by server and file is closed immediately
> if reference count is zero or else oplock is downgraded.

Only the second part of the sentence is relevant to the patch. Also,
what about lease break?

What happens if the handle is durable or persistent, and the connection
to the server times out? Is the handle recovered, then closed?

Tom.


> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
>>
>> Hi All,
>>
>> Please find the attached patch which will defer the close to server.
>> So, performance can be improved.
>>
>> i.e When file is open, write, close, open, read, close....
>> As close is deferred and oplock is held, cache will not be invalidated
>> and same handle can be used for second open.
>>
>> Please review the changes and let me know your thoughts.
>>
>> Regards,
>> Rohith
> 
> 
> 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-11 17:55   ` Tom Talpey
@ 2021-03-22 17:07     ` Rohith Surabattula
  2021-03-24 14:20       ` Tom Talpey
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-03-22 17:07 UTC (permalink / raw)
  To: Tom Talpey
  Cc: Shyam Prasad N, linux-cifs, Steve French, Pavel Shilovsky,
	sribhat.msa, ronnie sahlberg, Aurélien Aptel

[-- Attachment #1: Type: text/plain, Size: 5371 bytes --]

On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> Hi Rohith,
>
> The changes look good at a high level.
>
> Just a few points worth checking:
> 1. In cifs_open(), should be perform deferred close for certain cases
> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> perform less data caching. By deferring close, aren't we delaying
> flushing dirty pages? @Steve French ?

With O_DIRECT flag, data is not cached locally and will be sent to
server immediately.

> 2. I see that you're maintaining a new list of files for deferred
> closing. Since there could be a large number of such files for a big
> share with sufficient I/O, maybe we should think of a structure with
> faster lookups (rb trees?).
> I know we already have a bunch of linked lists in cifs.ko, and we need
> to review perf impact for all those lists. But this one sounds like a
> candidate for faster lookups.

Entries will be added into this list only once file is closed and will
remain for acregmax amount interval.
So,  Will this affect performance i.e during lookups ?

> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> oplock_break_received?

Addressed. Attached the patch.
>
> Regards,
> Shyam

>I wonder why the choice of 5 seconds? It seems to me a more natural
>interval on the order of one or more of
>- attribute cache timeout
>- lease time
>- echo_interval

Yes, This sounds good. I modified the patch to defer by attribute
cache timeout interval.

>Also, this wording is rather confusing:

>> When file is closed, SMB2 close request is not sent to server
>> immediately and is deferred for 5 seconds interval. When file is
>> reopened by same process for read or write, previous file handle
>> can be used until oplock is held.

>It's not a "previous" filehandle if it's open, and "can be used" is
>a rather passive statement. A better wording may be "the filehandle
>is reused".

>And, "until oplock is held" is similarly awkward. Do you mean "*if*
>an oplock is held", or "*provided" an oplock is held"?

>> When same file is reopened by another client during 5 second
>> interval, oplock break is sent by server and file is closed immediately
>> if reference count is zero or else oplock is downgraded.

>Only the second part of the sentence is relevant to the patch. Also,
>what about lease break?

Modified the patch.

>What happens if the handle is durable or persistent, and the connection
>to the server times out? Is the handle recovered, then closed?

Do you mean when the session gets reconnected, the deferred handle
will be recovered and closed?

Regards,
Rohith

On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
>
> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > Hi Rohith,
> >
> > The changes look good at a high level.
> >
> > Just a few points worth checking:
> > 1. In cifs_open(), should be perform deferred close for certain cases
> > like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > perform less data caching. By deferring close, aren't we delaying
> > flushing dirty pages? @Steve French ?
> > 2. I see that you're maintaining a new list of files for deferred
> > closing. Since there could be a large number of such files for a big
> > share with sufficient I/O, maybe we should think of a structure with
> > faster lookups (rb trees?).
> > I know we already have a bunch of linked lists in cifs.ko, and we need
> > to review perf impact for all those lists. But this one sounds like a
> > candidate for faster lookups.
> > 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > oplock_break_received?
> >
> > Regards,
> > Shyam
>
> I wonder why the choice of 5 seconds? It seems to me a more natural
> interval on the order of one or more of
> - attribute cache timeout
> - lease time
> - echo_interval
>
> Also, this wording is rather confusing:
>
> > When file is closed, SMB2 close request is not sent to server
> > immediately and is deferred for 5 seconds interval. When file is
> > reopened by same process for read or write, previous file handle
> > can be used until oplock is held.
>
> It's not a "previous" filehandle if it's open, and "can be used" is
> a rather passive statement. A better wording may be "the filehandle
> is reused".
>
> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> an oplock is held", or "*provided" an oplock is held"?
>
> > When same file is reopened by another client during 5 second
> > interval, oplock break is sent by server and file is closed immediately
> > if reference count is zero or else oplock is downgraded.
>
> Only the second part of the sentence is relevant to the patch. Also,
> what about lease break?
>
> What happens if the handle is durable or persistent, and the connection
> to the server times out? Is the handle recovered, then closed?
>
> Tom.
>
>
> > On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> > <rohiths.msft@gmail.com> wrote:
> >>
> >> Hi All,
> >>
> >> Please find the attached patch which will defer the close to server.
> >> So, performance can be improved.
> >>
> >> i.e When file is open, write, close, open, read, close....
> >> As close is deferred and oplock is held, cache will not be invalidated
> >> and same handle can be used for second open.
> >>
> >> Please review the changes and let me know your thoughts.
> >>
> >> Regards,
> >> Rohith
> >
> >
> >

[-- Attachment #2: 0001-cifs-Deferred-close-for-files.patch --]
[-- Type: application/octet-stream, Size: 10855 bytes --]

From ca42cf0388c194e835e414b5811443b3a27de4de Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 8 Mar 2021 16:28:09 +0000
Subject: [PATCH] cifs: Deferred close for files

When file is closed, SMB2 close request is not sent to server
immediately and is deferred for acregmax defined interval. When file is
reopened by same process for read or write, the file handle
is reused if an oplock is held.

When client receives a oplock/lease break, file is closed immediately
if reference count is zero, else oplock is downgraded.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
---
 fs/cifs/cifsfs.c    | 13 +++++++++-
 fs/cifs/cifsglob.h  | 12 +++++++++
 fs/cifs/cifsproto.h |  8 ++++++
 fs/cifs/connect.c   |  1 +
 fs/cifs/file.c      | 63 +++++++++++++++++++++++++++++++++++++++++++--
 fs/cifs/misc.c      | 47 +++++++++++++++++++++++++++++++++
 6 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 099ad9f3660b..3a426efba94e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,6 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -1605,9 +1606,16 @@ init_cifs(void)
 		goto out_destroy_fileinfo_put_wq;
 	}
 
+	deferredclose_wq = alloc_workqueue("deferredclose",
+					   WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	if (!deferredclose_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cifsoplockd_wq;
+	}
+
 	rc = cifs_fscache_register();
 	if (rc)
-		goto out_destroy_cifsoplockd_wq;
+		goto out_destroy_deferredclose_wq;
 
 	rc = cifs_init_inodecache();
 	if (rc)
@@ -1675,6 +1683,8 @@ init_cifs(void)
 	cifs_destroy_inodecache();
 out_unreg_fscache:
 	cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+	destroy_workqueue(deferredclose_wq);
 out_destroy_cifsoplockd_wq:
 	destroy_workqueue(cifsoplockd_wq);
 out_destroy_fileinfo_put_wq:
@@ -1709,6 +1719,7 @@ exit_cifs(void)
 	cifs_destroy_mids();
 	cifs_destroy_inodecache();
 	cifs_fscache_unregister();
+	destroy_workqueue(deferredclose_wq);
 	destroy_workqueue(cifsoplockd_wq);
 	destroy_workqueue(decrypt_wq);
 	destroy_workqueue(fileinfo_put_wq);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 31fc8695abd6..48858e31b746 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1097,6 +1097,8 @@ struct cifs_tcon {
 #ifdef CONFIG_CIFS_SWN_UPCALL
 	bool use_witness:1; /* use witness protocol */
 #endif
+	struct list_head deferred_closes; /* list of deferred closes */
+	spinlock_t deferred_lock; /* protection on deferred list */
 };
 
 /*
@@ -1154,6 +1156,14 @@ struct cifs_pending_open {
 	__u32 oplock;
 };
 
+struct cifs_deferred_close {
+	struct list_head dlist;
+	struct tcon_link *tlink;
+	__u16  netfid;
+	__u64  persistent_fid;
+	__u64  volatile_fid;
+};
+
 /*
  * This info hangs off the cifsFileInfo structure, pointed to by llist.
  * This is used to track byte stream locks on the file
@@ -1248,6 +1258,7 @@ struct cifsFileInfo {
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
+	bool oplock_break_received; /* Flag to indicate oplock break */
 };
 
 struct cifs_io_parms {
@@ -1898,6 +1909,7 @@ extern struct workqueue_struct *cifsiod_wq;
 extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *fileinfo_put_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 75ce6f742b8d..a35b599d53a5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -256,6 +256,14 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+				struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index eec8a2052da2..2804634d4040 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2228,6 +2228,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 	tcon->nodelete = ctx->nodelete;
 	tcon->local_lease = ctx->local_lease;
 	INIT_LIST_HEAD(&tcon->pending_opens);
+	INIT_LIST_HEAD(&tcon->deferred_closes);
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 26de4329d161..5c140dbe9a6b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -321,6 +321,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
+	cfile->oplock_break_received = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
@@ -562,6 +563,17 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
+	spin_lock(&tcon->deferred_lock);
+	/* Get the cached handle as SMB2 close is deferred */
+	rc = cifs_get_readable_path(tcon, full_path, &cfile);
+	if (rc == 0) {
+		file->private_data = cfile;
+		cifs_del_deferred_close(cfile);
+		spin_unlock(&tcon->deferred_lock);
+		goto out;
+	}
+	spin_unlock(&tcon->deferred_lock);
+
 	if (server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
@@ -842,11 +854,45 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 	return rc;
 }
 
+struct smb2_deferred_work {
+	struct delayed_work deferred;
+	struct cifsFileInfo *cfile;
+};
+
+void smb2_deferred_work_close(struct work_struct *work)
+{
+	struct smb2_deferred_work *dwork = container_of(work,
+			struct smb2_deferred_work, deferred.work);
+
+	spin_lock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+	cifs_del_deferred_close(dwork->cfile);
+	spin_unlock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+	_cifsFileInfo_put(dwork->cfile, true, false);
+}
+
 int cifs_close(struct inode *inode, struct file *file)
 {
+	struct smb2_deferred_work *dwork;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	dwork = kmalloc(sizeof(struct smb2_deferred_work), GFP_KERNEL);
+
+	INIT_DELAYED_WORK(&dwork->deferred, smb2_deferred_work_close);
+
 	if (file->private_data != NULL) {
-		_cifsFileInfo_put(file->private_data, true, false);
+		dwork->cfile = file->private_data;
 		file->private_data = NULL;
+		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) ||
+		    (cinode->oplock == CIFS_CACHE_RH_FLG)) {
+			spin_lock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+			cifs_add_deferred_close(dwork->cfile);
+			spin_unlock(&tlink_tcon(dwork->cfile->tlink)->deferred_lock);
+			/* Deferred close for files */
+			queue_delayed_work(deferredclose_wq, &dwork->deferred, cifs_sb->ctx->acregmax);
+		} else {
+			_cifsFileInfo_put(dwork->cfile, true, false);
+		}
 	}
 
 	/* return code from the ->release op is always ignored */
@@ -1943,7 +1989,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if (!open_file->invalidHandle) {
+			if ((!open_file->invalidHandle) &&
+				(!open_file->oplock_break_received)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -4746,6 +4793,8 @@ void cifs_oplock_break(struct work_struct *work)
 	struct TCP_Server_Info *server = tcon->ses->server;
 	int rc = 0;
 	bool purge_cache = false;
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
 
 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
 			TASK_UNINTERRUPTIBLE);
@@ -4793,6 +4842,16 @@ void cifs_oplock_break(struct work_struct *work)
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
+	/*
+	 * When oplock break is received and there are no active
+	 * file handles but cached, then set the flag oplock_break_received.
+	 * So, new open will not use cached handle.
+	 */
+	spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred)
+		cfile->oplock_break_received = true;
+	spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 82e176720ca6..298cc8b54857 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -136,6 +136,7 @@ tconInfoAlloc(void)
 	spin_lock_init(&ret_buf->stat_lock);
 	atomic_set(&ret_buf->num_local_opens, 0);
 	atomic_set(&ret_buf->num_remote_opens, 0);
+	spin_lock_init(&ret_buf->deferred_lock);
 
 	return ret_buf;
 }
@@ -672,6 +673,52 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(dclose, &tlink_tcon(cfile->tlink)->deferred_closes, dlist) {
+		if ((dclose->netfid == cfile->fid.netfid) &&
+			(dclose->persistent_fid == cfile->fid.persistent_fid) &&
+			(dclose->volatile_fid == cfile->fid.volatile_fid)) {
+			*pdclose = dclose;
+			return true;
+		}
+	}
+	return false;
+}
+
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred)
+		return;
+
+	dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+	dclose->tlink = cfile->tlink;
+	dclose->netfid = cfile->fid.netfid;
+	dclose->persistent_fid = cfile->fid.persistent_fid;
+	dclose->volatile_fid = cfile->fid.volatile_fid;
+	list_add_tail(&dclose->dlist, &tlink_tcon(dclose->tlink)->deferred_closes);
+}
+
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (!is_deferred)
+		return;
+	list_del(&dclose->dlist);
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-22 17:07     ` Rohith Surabattula
@ 2021-03-24 14:20       ` Tom Talpey
  2021-03-25  2:42         ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Tom Talpey @ 2021-03-24 14:20 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: Shyam Prasad N, linux-cifs, Steve French, Pavel Shilovsky,
	sribhat.msa, ronnie sahlberg, Aurélien Aptel

On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
>> Hi Rohith,
>>
>> The changes look good at a high level.
>>
>> Just a few points worth checking:
>> 1. In cifs_open(), should be perform deferred close for certain cases
>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
>> perform less data caching. By deferring close, aren't we delaying
>> flushing dirty pages? @Steve French ?
> 
> With O_DIRECT flag, data is not cached locally and will be sent to
> server immediately.
> 
>> 2. I see that you're maintaining a new list of files for deferred
>> closing. Since there could be a large number of such files for a big
>> share with sufficient I/O, maybe we should think of a structure with
>> faster lookups (rb trees?).
>> I know we already have a bunch of linked lists in cifs.ko, and we need
>> to review perf impact for all those lists. But this one sounds like a
>> candidate for faster lookups.
> 
> Entries will be added into this list only once file is closed and will
> remain for acregmax amount interval.

I think you mean once the "file descriptor" is closed, right? But now
that you mention it, caching the attributes sounds a lot like the
NFS close-to-open semantic, which is itself optional with the "nocto"
mount option.

Because some applications use close() to perform flush, there may be
some silent side effects as well. I don't see anything special in the
patch regarding this. Have you considered it?

> So,  Will this affect performance i.e during lookups ?
> 
>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
>> oplock_break_received?
> 
> Addressed. Attached the patch.
>>
>> Regards,
>> Shyam
> 
>> I wonder why the choice of 5 seconds? It seems to me a more natural
>> interval on the order of one or more of
>> - attribute cache timeout
>> - lease time
>> - echo_interval
> 
> Yes, This sounds good. I modified the patch to defer by attribute
> cache timeout interval.
> 
>> Also, this wording is rather confusing:
> 
>>> When file is closed, SMB2 close request is not sent to server
>>> immediately and is deferred for 5 seconds interval. When file is
>>> reopened by same process for read or write, previous file handle
>>> can be used until oplock is held.
> 
>> It's not a "previous" filehandle if it's open, and "can be used" is
>> a rather passive statement. A better wording may be "the filehandle
>> is reused".
> 
>> And, "until oplock is held" is similarly awkward. Do you mean "*if*
>> an oplock is held", or "*provided" an oplock is held"?
> 
>>> When same file is reopened by another client during 5 second
>>> interval, oplock break is sent by server and file is closed immediately
>>> if reference count is zero or else oplock is downgraded.
> 
>> Only the second part of the sentence is relevant to the patch. Also,
>> what about lease break?
> 
> Modified the patch.

The change to promptly close the handle on oplock or lease break looks
reasonable. The rewording in the patch description is better too.

>> What happens if the handle is durable or persistent, and the connection
>> to the server times out? Is the handle recovered, then closed?
> 
> Do you mean when the session gets reconnected, the deferred handle
> will be recovered and closed?

Yes, because I expect the client to attempt to reclaim its handles upon
reconnection. I don't see any particular code change regarding this.

My question is, if there's a deferred close pending, should it do that,
or should it simply cut to the chase and close any such handle(s)?

Tom.

> Regards,
> Rohith
> 
> On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
>>
>> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
>>> Hi Rohith,
>>>
>>> The changes look good at a high level.
>>>
>>> Just a few points worth checking:
>>> 1. In cifs_open(), should be perform deferred close for certain cases
>>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
>>> perform less data caching. By deferring close, aren't we delaying
>>> flushing dirty pages? @Steve French ?
>>> 2. I see that you're maintaining a new list of files for deferred
>>> closing. Since there could be a large number of such files for a big
>>> share with sufficient I/O, maybe we should think of a structure with
>>> faster lookups (rb trees?).
>>> I know we already have a bunch of linked lists in cifs.ko, and we need
>>> to review perf impact for all those lists. But this one sounds like a
>>> candidate for faster lookups.
>>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
>>> oplock_break_received?
>>>
>>> Regards,
>>> Shyam
>>
>> I wonder why the choice of 5 seconds? It seems to me a more natural
>> interval on the order of one or more of
>> - attribute cache timeout
>> - lease time
>> - echo_interval
>>
>> Also, this wording is rather confusing:
>>
>>> When file is closed, SMB2 close request is not sent to server
>>> immediately and is deferred for 5 seconds interval. When file is
>>> reopened by same process for read or write, previous file handle
>>> can be used until oplock is held.
>>
>> It's not a "previous" filehandle if it's open, and "can be used" is
>> a rather passive statement. A better wording may be "the filehandle
>> is reused".
>>
>> And, "until oplock is held" is similarly awkward. Do you mean "*if*
>> an oplock is held", or "*provided" an oplock is held"?
>>
>>> When same file is reopened by another client during 5 second
>>> interval, oplock break is sent by server and file is closed immediately
>>> if reference count is zero or else oplock is downgraded.
>>
>> Only the second part of the sentence is relevant to the patch. Also,
>> what about lease break?
>>
>> What happens if the handle is durable or persistent, and the connection
>> to the server times out? Is the handle recovered, then closed?
>>
>> Tom.
>>
>>
>>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
>>> <rohiths.msft@gmail.com> wrote:
>>>>
>>>> Hi All,
>>>>
>>>> Please find the attached patch which will defer the close to server.
>>>> So, performance can be improved.
>>>>
>>>> i.e When file is open, write, close, open, read, close....
>>>> As close is deferred and oplock is held, cache will not be invalidated
>>>> and same handle can be used for second open.
>>>>
>>>> Please review the changes and let me know your thoughts.
>>>>
>>>> Regards,
>>>> Rohith
>>>
>>>
>>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-24 14:20       ` Tom Talpey
@ 2021-03-25  2:42         ` Rohith Surabattula
  2021-04-07 14:57           ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-03-25  2:42 UTC (permalink / raw)
  To: Tom Talpey
  Cc: Shyam Prasad N, linux-cifs, Steve French, Pavel Shilovsky,
	sribhat.msa, ronnie sahlberg, Aurélien Aptel

>>> Hi Rohith,
>>>
>>> The changes look good at a high level.
>>>
>>> Just a few points worth checking:
>>> 1. In cifs_open(), should be perform deferred close for certain cases
>>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
>>> perform less data caching. By deferring close, aren't we delaying
>>> flushing dirty pages? @Steve French ?
>>
>> With O_DIRECT flag, data is not cached locally and will be sent to
>> server immediately.
>>
>>> 2. I see that you're maintaining a new list of files for deferred
>>> closing. Since there could be a large number of such files for a big
>>> share with sufficient I/O, maybe we should think of a structure with
>>> faster lookups (rb trees?).
>>> I know we already have a bunch of linked lists in cifs.ko, and we need
>>> to review perf impact for all those lists. But this one sounds like a
>>> candidate for faster lookups.
>>
>> Entries will be added into this list only once file is closed and will
>> remain for acregmax amount interval.

>I think you mean once the "file descriptor" is closed, right? But now
>that you mention it, caching the attributes sounds a lot like the
>NFS close-to-open semantic, which is itself optional with the "nocto"
>mount option.

>Because some applications use close() to perform flush, there may be
>some silent side effects as well. I don't see anything special in the
>patch regarding this. Have you considered it?
>The change to promptly close the handle on oplock or lease break looks
>reasonable. The rewording in the patch description is better too.

Yes, as the handle is closed immediately when oplock/lease breaks,
will there be any
silent side effects still?

>>> What happens if the handle is durable or persistent, and the connection
>>> to the server times out? Is the handle recovered, then closed?
>>
>> Do you mean when the session gets reconnected, the deferred handle
>> will be recovered and closed?

>Yes, because I expect the client to attempt to reclaim its handles upon
>reconnection. I don't see any particular code change regarding this.

>My question is, if there's a deferred close pending, should it do that,
>or should it simply cut to the chase and close any such handle(s)?

As the handles are persistent once the session gets reconnected,
applications can reclaim
the handle. So, i believe no need to close handles immediately until
timeout(i.e acregmax interval)

Regards,
Rohith

On Wed, Mar 24, 2021 at 7:50 PM Tom Talpey <tom@talpey.com> wrote:
>
> On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> > On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> >> Hi Rohith,
> >>
> >> The changes look good at a high level.
> >>
> >> Just a few points worth checking:
> >> 1. In cifs_open(), should be perform deferred close for certain cases
> >> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> >> perform less data caching. By deferring close, aren't we delaying
> >> flushing dirty pages? @Steve French ?
> >
> > With O_DIRECT flag, data is not cached locally and will be sent to
> > server immediately.
> >
> >> 2. I see that you're maintaining a new list of files for deferred
> >> closing. Since there could be a large number of such files for a big
> >> share with sufficient I/O, maybe we should think of a structure with
> >> faster lookups (rb trees?).
> >> I know we already have a bunch of linked lists in cifs.ko, and we need
> >> to review perf impact for all those lists. But this one sounds like a
> >> candidate for faster lookups.
> >
> > Entries will be added into this list only once file is closed and will
> > remain for acregmax amount interval.
>
> I think you mean once the "file descriptor" is closed, right? But now
> that you mention it, caching the attributes sounds a lot like the
> NFS close-to-open semantic, which is itself optional with the "nocto"
> mount option.
>
> Because some applications use close() to perform flush, there may be
> some silent side effects as well. I don't see anything special in the
> patch regarding this. Have you considered it?
>
> > So,  Will this affect performance i.e during lookups ?
> >
> >> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> >> oplock_break_received?
> >
> > Addressed. Attached the patch.
> >>
> >> Regards,
> >> Shyam
> >
> >> I wonder why the choice of 5 seconds? It seems to me a more natural
> >> interval on the order of one or more of
> >> - attribute cache timeout
> >> - lease time
> >> - echo_interval
> >
> > Yes, This sounds good. I modified the patch to defer by attribute
> > cache timeout interval.
> >
> >> Also, this wording is rather confusing:
> >
> >>> When file is closed, SMB2 close request is not sent to server
> >>> immediately and is deferred for 5 seconds interval. When file is
> >>> reopened by same process for read or write, previous file handle
> >>> can be used until oplock is held.
> >
> >> It's not a "previous" filehandle if it's open, and "can be used" is
> >> a rather passive statement. A better wording may be "the filehandle
> >> is reused".
> >
> >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> >> an oplock is held", or "*provided" an oplock is held"?
> >
> >>> When same file is reopened by another client during 5 second
> >>> interval, oplock break is sent by server and file is closed immediately
> >>> if reference count is zero or else oplock is downgraded.
> >
> >> Only the second part of the sentence is relevant to the patch. Also,
> >> what about lease break?
> >
> > Modified the patch.
>
> The change to promptly close the handle on oplock or lease break looks
> reasonable. The rewording in the patch description is better too.
>
> >> What happens if the handle is durable or persistent, and the connection
> >> to the server times out? Is the handle recovered, then closed?
> >
> > Do you mean when the session gets reconnected, the deferred handle
> > will be recovered and closed?
>
> Yes, because I expect the client to attempt to reclaim its handles upon
> reconnection. I don't see any particular code change regarding this.
>
> My question is, if there's a deferred close pending, should it do that,
> or should it simply cut to the chase and close any such handle(s)?
>
> Tom.
>
> > Regards,
> > Rohith
> >
> > On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
> >>
> >> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> >>> Hi Rohith,
> >>>
> >>> The changes look good at a high level.
> >>>
> >>> Just a few points worth checking:
> >>> 1. In cifs_open(), should be perform deferred close for certain cases
> >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> >>> perform less data caching. By deferring close, aren't we delaying
> >>> flushing dirty pages? @Steve French ?
> >>> 2. I see that you're maintaining a new list of files for deferred
> >>> closing. Since there could be a large number of such files for a big
> >>> share with sufficient I/O, maybe we should think of a structure with
> >>> faster lookups (rb trees?).
> >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> >>> to review perf impact for all those lists. But this one sounds like a
> >>> candidate for faster lookups.
> >>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> >>> oplock_break_received?
> >>>
> >>> Regards,
> >>> Shyam
> >>
> >> I wonder why the choice of 5 seconds? It seems to me a more natural
> >> interval on the order of one or more of
> >> - attribute cache timeout
> >> - lease time
> >> - echo_interval
> >>
> >> Also, this wording is rather confusing:
> >>
> >>> When file is closed, SMB2 close request is not sent to server
> >>> immediately and is deferred for 5 seconds interval. When file is
> >>> reopened by same process for read or write, previous file handle
> >>> can be used until oplock is held.
> >>
> >> It's not a "previous" filehandle if it's open, and "can be used" is
> >> a rather passive statement. A better wording may be "the filehandle
> >> is reused".
> >>
> >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> >> an oplock is held", or "*provided" an oplock is held"?
> >>
> >>> When same file is reopened by another client during 5 second
> >>> interval, oplock break is sent by server and file is closed immediately
> >>> if reference count is zero or else oplock is downgraded.
> >>
> >> Only the second part of the sentence is relevant to the patch. Also,
> >> what about lease break?
> >>
> >> What happens if the handle is durable or persistent, and the connection
> >> to the server times out? Is the handle recovered, then closed?
> >>
> >> Tom.
> >>
> >>
> >>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> >>> <rohiths.msft@gmail.com> wrote:
> >>>>
> >>>> Hi All,
> >>>>
> >>>> Please find the attached patch which will defer the close to server.
> >>>> So, performance can be improved.
> >>>>
> >>>> i.e When file is open, write, close, open, read, close....
> >>>> As close is deferred and oplock is held, cache will not be invalidated
> >>>> and same handle can be used for second open.
> >>>>
> >>>> Please review the changes and let me know your thoughts.
> >>>>
> >>>> Regards,
> >>>> Rohith
> >>>
> >>>
> >>>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-25  2:42         ` Rohith Surabattula
@ 2021-04-07 14:57           ` Rohith Surabattula
  2021-04-11 12:19             ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-04-07 14:57 UTC (permalink / raw)
  To: Tom Talpey
  Cc: Shyam Prasad N, linux-cifs, Steve French, Pavel Shilovsky,
	sribhat.msa, ronnie sahlberg, Aurélien Aptel

[-- Attachment #1: Type: text/plain, Size: 9956 bytes --]

Hi All,

Have updated the patch.

patch v2:
1)  Close the file immediately during unlink of file.
2)  Update the reference count properly when deferred work is already scheduled.

Regards,
Rohith

On Thu, Mar 25, 2021 at 8:12 AM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> >>> Hi Rohith,
> >>>
> >>> The changes look good at a high level.
> >>>
> >>> Just a few points worth checking:
> >>> 1. In cifs_open(), should be perform deferred close for certain cases
> >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> >>> perform less data caching. By deferring close, aren't we delaying
> >>> flushing dirty pages? @Steve French ?
> >>
> >> With O_DIRECT flag, data is not cached locally and will be sent to
> >> server immediately.
> >>
> >>> 2. I see that you're maintaining a new list of files for deferred
> >>> closing. Since there could be a large number of such files for a big
> >>> share with sufficient I/O, maybe we should think of a structure with
> >>> faster lookups (rb trees?).
> >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> >>> to review perf impact for all those lists. But this one sounds like a
> >>> candidate for faster lookups.
> >>
> >> Entries will be added into this list only once file is closed and will
> >> remain for acregmax amount interval.
>
> >I think you mean once the "file descriptor" is closed, right? But now
> >that you mention it, caching the attributes sounds a lot like the
> >NFS close-to-open semantic, which is itself optional with the "nocto"
> >mount option.
>
> >Because some applications use close() to perform flush, there may be
> >some silent side effects as well. I don't see anything special in the
> >patch regarding this. Have you considered it?
> >The change to promptly close the handle on oplock or lease break looks
> >reasonable. The rewording in the patch description is better too.
>
> Yes, as the handle is closed immediately when oplock/lease breaks,
> will there be any
> silent side effects still?
>
> >>> What happens if the handle is durable or persistent, and the connection
> >>> to the server times out? Is the handle recovered, then closed?
> >>
> >> Do you mean when the session gets reconnected, the deferred handle
> >> will be recovered and closed?
>
> >Yes, because I expect the client to attempt to reclaim its handles upon
> >reconnection. I don't see any particular code change regarding this.
>
> >My question is, if there's a deferred close pending, should it do that,
> >or should it simply cut to the chase and close any such handle(s)?
>
> As the handles are persistent once the session gets reconnected,
> applications can reclaim
> the handle. So, i believe no need to close handles immediately until
> timeout(i.e acregmax interval)
>
> Regards,
> Rohith
>
> On Wed, Mar 24, 2021 at 7:50 PM Tom Talpey <tom@talpey.com> wrote:
> >
> > On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> > > On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > >> Hi Rohith,
> > >>
> > >> The changes look good at a high level.
> > >>
> > >> Just a few points worth checking:
> > >> 1. In cifs_open(), should be perform deferred close for certain cases
> > >> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > >> perform less data caching. By deferring close, aren't we delaying
> > >> flushing dirty pages? @Steve French ?
> > >
> > > With O_DIRECT flag, data is not cached locally and will be sent to
> > > server immediately.
> > >
> > >> 2. I see that you're maintaining a new list of files for deferred
> > >> closing. Since there could be a large number of such files for a big
> > >> share with sufficient I/O, maybe we should think of a structure with
> > >> faster lookups (rb trees?).
> > >> I know we already have a bunch of linked lists in cifs.ko, and we need
> > >> to review perf impact for all those lists. But this one sounds like a
> > >> candidate for faster lookups.
> > >
> > > Entries will be added into this list only once file is closed and will
> > > remain for acregmax amount interval.
> >
> > I think you mean once the "file descriptor" is closed, right? But now
> > that you mention it, caching the attributes sounds a lot like the
> > NFS close-to-open semantic, which is itself optional with the "nocto"
> > mount option.
> >
> > Because some applications use close() to perform flush, there may be
> > some silent side effects as well. I don't see anything special in the
> > patch regarding this. Have you considered it?
> >
> > > So,  Will this affect performance i.e during lookups ?
> > >
> > >> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > >> oplock_break_received?
> > >
> > > Addressed. Attached the patch.
> > >>
> > >> Regards,
> > >> Shyam
> > >
> > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > >> interval on the order of one or more of
> > >> - attribute cache timeout
> > >> - lease time
> > >> - echo_interval
> > >
> > > Yes, This sounds good. I modified the patch to defer by attribute
> > > cache timeout interval.
> > >
> > >> Also, this wording is rather confusing:
> > >
> > >>> When file is closed, SMB2 close request is not sent to server
> > >>> immediately and is deferred for 5 seconds interval. When file is
> > >>> reopened by same process for read or write, previous file handle
> > >>> can be used until oplock is held.
> > >
> > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > >> a rather passive statement. A better wording may be "the filehandle
> > >> is reused".
> > >
> > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > >> an oplock is held", or "*provided" an oplock is held"?
> > >
> > >>> When same file is reopened by another client during 5 second
> > >>> interval, oplock break is sent by server and file is closed immediately
> > >>> if reference count is zero or else oplock is downgraded.
> > >
> > >> Only the second part of the sentence is relevant to the patch. Also,
> > >> what about lease break?
> > >
> > > Modified the patch.
> >
> > The change to promptly close the handle on oplock or lease break looks
> > reasonable. The rewording in the patch description is better too.
> >
> > >> What happens if the handle is durable or persistent, and the connection
> > >> to the server times out? Is the handle recovered, then closed?
> > >
> > > Do you mean when the session gets reconnected, the deferred handle
> > > will be recovered and closed?
> >
> > Yes, because I expect the client to attempt to reclaim its handles upon
> > reconnection. I don't see any particular code change regarding this.
> >
> > My question is, if there's a deferred close pending, should it do that,
> > or should it simply cut to the chase and close any such handle(s)?
> >
> > Tom.
> >
> > > Regards,
> > > Rohith
> > >
> > > On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
> > >>
> > >> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > >>> Hi Rohith,
> > >>>
> > >>> The changes look good at a high level.
> > >>>
> > >>> Just a few points worth checking:
> > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > >>> perform less data caching. By deferring close, aren't we delaying
> > >>> flushing dirty pages? @Steve French ?
> > >>> 2. I see that you're maintaining a new list of files for deferred
> > >>> closing. Since there could be a large number of such files for a big
> > >>> share with sufficient I/O, maybe we should think of a structure with
> > >>> faster lookups (rb trees?).
> > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > >>> to review perf impact for all those lists. But this one sounds like a
> > >>> candidate for faster lookups.
> > >>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > >>> oplock_break_received?
> > >>>
> > >>> Regards,
> > >>> Shyam
> > >>
> > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > >> interval on the order of one or more of
> > >> - attribute cache timeout
> > >> - lease time
> > >> - echo_interval
> > >>
> > >> Also, this wording is rather confusing:
> > >>
> > >>> When file is closed, SMB2 close request is not sent to server
> > >>> immediately and is deferred for 5 seconds interval. When file is
> > >>> reopened by same process for read or write, previous file handle
> > >>> can be used until oplock is held.
> > >>
> > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > >> a rather passive statement. A better wording may be "the filehandle
> > >> is reused".
> > >>
> > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > >> an oplock is held", or "*provided" an oplock is held"?
> > >>
> > >>> When same file is reopened by another client during 5 second
> > >>> interval, oplock break is sent by server and file is closed immediately
> > >>> if reference count is zero or else oplock is downgraded.
> > >>
> > >> Only the second part of the sentence is relevant to the patch. Also,
> > >> what about lease break?
> > >>
> > >> What happens if the handle is durable or persistent, and the connection
> > >> to the server times out? Is the handle recovered, then closed?
> > >>
> > >> Tom.
> > >>
> > >>
> > >>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> > >>> <rohiths.msft@gmail.com> wrote:
> > >>>>
> > >>>> Hi All,
> > >>>>
> > >>>> Please find the attached patch which will defer the close to server.
> > >>>> So, performance can be improved.
> > >>>>
> > >>>> i.e When file is open, write, close, open, read, close....
> > >>>> As close is deferred and oplock is held, cache will not be invalidated
> > >>>> and same handle can be used for second open.
> > >>>>
> > >>>> Please review the changes and let me know your thoughts.
> > >>>>
> > >>>> Regards,
> > >>>> Rohith
> > >>>
> > >>>
> > >>>

[-- Attachment #2: 0001-cifs-Deferred-close-for-files.patch --]
[-- Type: application/octet-stream, Size: 12824 bytes --]

From a7ba12da28245190d139b3ac6ef30395c49e73d2 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 8 Mar 2021 16:28:09 +0000
Subject: [PATCH] cifs: Deferred close for files

When file is closed, SMB2 close request is not sent to server
immediately and is deferred for acregmax defined interval. When file is
reopened by same process for read or write, the file handle
is reused if an oplock is held.

When client receives a oplock/lease break, file is closed immediately
if reference count is zero, else oplock is downgraded.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Ubuntu <lxsmbadmin@ubuntu-test.m3bfnqbqqh3ezdu5wssul2safh.bx.internal.cloudapp.net>
---
 fs/cifs/cifsfs.c    | 13 +++++++-
 fs/cifs/cifsglob.h  | 15 +++++++++
 fs/cifs/cifsproto.h | 10 ++++++
 fs/cifs/connect.c   |  1 +
 fs/cifs/file.c      | 74 +++++++++++++++++++++++++++++++++++++++++++--
 fs/cifs/inode.c     |  1 +
 fs/cifs/misc.c      | 62 +++++++++++++++++++++++++++++++++++++
 7 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 099ad9f3660b..3a426efba94e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,6 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -1605,9 +1606,16 @@ init_cifs(void)
 		goto out_destroy_fileinfo_put_wq;
 	}
 
+	deferredclose_wq = alloc_workqueue("deferredclose",
+					   WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	if (!deferredclose_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cifsoplockd_wq;
+	}
+
 	rc = cifs_fscache_register();
 	if (rc)
-		goto out_destroy_cifsoplockd_wq;
+		goto out_destroy_deferredclose_wq;
 
 	rc = cifs_init_inodecache();
 	if (rc)
@@ -1675,6 +1683,8 @@ init_cifs(void)
 	cifs_destroy_inodecache();
 out_unreg_fscache:
 	cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+	destroy_workqueue(deferredclose_wq);
 out_destroy_cifsoplockd_wq:
 	destroy_workqueue(cifsoplockd_wq);
 out_destroy_fileinfo_put_wq:
@@ -1709,6 +1719,7 @@ exit_cifs(void)
 	cifs_destroy_mids();
 	cifs_destroy_inodecache();
 	cifs_fscache_unregister();
+	destroy_workqueue(deferredclose_wq);
 	destroy_workqueue(cifsoplockd_wq);
 	destroy_workqueue(decrypt_wq);
 	destroy_workqueue(fileinfo_put_wq);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 31fc8695abd6..2f0e22aad842 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1097,6 +1097,8 @@ struct cifs_tcon {
 #ifdef CONFIG_CIFS_SWN_UPCALL
 	bool use_witness:1; /* use witness protocol */
 #endif
+	struct list_head deferred_closes; /* list of deferred closes */
+	spinlock_t deferred_lock; /* protection on deferred list */
 };
 
 /*
@@ -1154,6 +1156,14 @@ struct cifs_pending_open {
 	__u32 oplock;
 };
 
+struct cifs_deferred_close {
+	struct list_head dlist;
+	struct tcon_link *tlink;
+	__u16  netfid;
+	__u64  persistent_fid;
+	__u64  volatile_fid;
+};
+
 /*
  * This info hangs off the cifsFileInfo structure, pointed to by llist.
  * This is used to track byte stream locks on the file
@@ -1248,6 +1258,9 @@ struct cifsFileInfo {
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
+	struct delayed_work deferred; /* deferred work for close */
+	bool oplock_break_received; /* Flag to indicate oplock break */
+	bool deferred_scheduled; /* Flag to indicate deferred work is scheduled */
 };
 
 struct cifs_io_parms {
@@ -1892,12 +1905,14 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
 
 void cifs_oplock_break(struct work_struct *work);
 void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
+void smb2_deferred_work_close(struct work_struct *work);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
 extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *fileinfo_put_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 75ce6f742b8d..dafbd21336a5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -256,6 +256,16 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+				struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index eec8a2052da2..2804634d4040 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2228,6 +2228,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 	tcon->nodelete = ctx->nodelete;
 	tcon->local_lease = ctx->local_lease;
 	INIT_LIST_HEAD(&tcon->pending_opens);
+	INIT_LIST_HEAD(&tcon->deferred_closes);
 
 	spin_lock(&cifs_tcp_ses_lock);
 	list_add(&tcon->tcon_list, &ses->tcon_list);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 26de4329d161..783dcdffdd5d 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -321,9 +321,12 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
+	cfile->oplock_break_received = false;
+	cfile->deferred_scheduled = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 	mutex_init(&cfile->fh_mutex);
 	spin_lock_init(&cfile->file_info_lock);
 
@@ -562,6 +565,24 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
+	spin_lock(&tcon->deferred_lock);
+	/* Get the cached handle as SMB2 close is deferred */
+	rc = cifs_get_readable_path(tcon, full_path, &cfile);
+	if (rc == 0) {
+		if (file->f_flags == cfile->f_flags) {
+			file->private_data = cfile;
+			cifs_del_deferred_close(cfile);
+			spin_unlock(&tcon->deferred_lock);
+			goto out;
+		} else {
+			spin_unlock(&tcon->deferred_lock);
+			_cifsFileInfo_put(cfile, true, false);
+			cifs_close_deferred_file(CIFS_I(inode));
+		}
+	} else {
+		spin_unlock(&tcon->deferred_lock);
+	}
+
 	if (server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
@@ -842,11 +863,45 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 	return rc;
 }
 
+void smb2_deferred_work_close(struct work_struct *work)
+{
+	struct cifsFileInfo *cfile = container_of(work,
+			struct cifsFileInfo, deferred.work);
+
+	spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	cifs_del_deferred_close(cfile);
+	cfile->deferred_scheduled = false;
+	spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	_cifsFileInfo_put(cfile, true, false);
+}
+
 int cifs_close(struct inode *inode, struct file *file)
 {
+	struct cifsFileInfo *cfile;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
 	if (file->private_data != NULL) {
-		_cifsFileInfo_put(file->private_data, true, false);
+		cfile = file->private_data;
 		file->private_data = NULL;
+		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) ||
+		    (cinode->oplock == CIFS_CACHE_RH_FLG)) {
+			spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+			cifs_add_deferred_close(cfile);
+			if (cfile->deferred_scheduled) {
+				mod_delayed_work(deferredclose_wq, &cfile->deferred, cifs_sb->ctx->acregmax);
+			} else {
+				/* Deferred close for files */
+				queue_delayed_work(deferredclose_wq, &cfile->deferred, cifs_sb->ctx->acregmax);
+				cfile->deferred_scheduled = true;
+				spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
+				return 0;
+			}
+			spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
+			_cifsFileInfo_put(cfile, true, false);
+		} else {
+			_cifsFileInfo_put(cfile, true, false);
+		}
 	}
 
 	/* return code from the ->release op is always ignored */
@@ -1943,7 +1998,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if (!open_file->invalidHandle) {
+			if ((!open_file->invalidHandle) &&
+				(!open_file->oplock_break_received)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -4746,6 +4802,8 @@ void cifs_oplock_break(struct work_struct *work)
 	struct TCP_Server_Info *server = tcon->ses->server;
 	int rc = 0;
 	bool purge_cache = false;
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
 
 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
 			TASK_UNINTERRUPTIBLE);
@@ -4793,6 +4851,18 @@ void cifs_oplock_break(struct work_struct *work)
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
+	/*
+	 * When oplock break is received and there are no active
+	 * file handles but cached, then set the flag oplock_break_received.
+	 * So, new open will not use cached handle.
+	 */
+	spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred) {
+		cfile->oplock_break_received = true;
+		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+	}
+	spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f2df4422e54a..ce7d7173550c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1643,6 +1643,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 		goto unlink_out;
 	}
 
+	cifs_close_deferred_file(CIFS_I(inode));
 	if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = CIFSPOSIXDelFile(xid, tcon, full_path,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 82e176720ca6..fe5a247a1453 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -136,6 +136,7 @@ tconInfoAlloc(void)
 	spin_lock_init(&ret_buf->stat_lock);
 	atomic_set(&ret_buf->num_local_opens, 0);
 	atomic_set(&ret_buf->num_remote_opens, 0);
+	spin_lock_init(&ret_buf->deferred_lock);
 
 	return ret_buf;
 }
@@ -672,6 +673,67 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(dclose, &tlink_tcon(cfile->tlink)->deferred_closes, dlist) {
+		if ((dclose->netfid == cfile->fid.netfid) &&
+			(dclose->persistent_fid == cfile->fid.persistent_fid) &&
+			(dclose->volatile_fid == cfile->fid.volatile_fid)) {
+			*pdclose = dclose;
+			return true;
+		}
+	}
+	return false;
+}
+
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred)
+		return;
+
+	dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+	dclose->tlink = cfile->tlink;
+	dclose->netfid = cfile->fid.netfid;
+	dclose->persistent_fid = cfile->fid.persistent_fid;
+	dclose->volatile_fid = cfile->fid.volatile_fid;
+	list_add_tail(&dclose->dlist, &tlink_tcon(dclose->tlink)->deferred_closes);
+}
+
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (!is_deferred)
+		return;
+	list_del(&dclose->dlist);
+}
+
+void
+cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+{
+	struct cifsFileInfo *cfile = NULL;
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
+		spin_lock(&tlink_tcon(cfile->tlink)->deferred_lock);
+		if (cifs_is_deferred_close(cfile, &dclose)) {
+			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		}
+		spin_unlock(&tlink_tcon(cfile->tlink)->deferred_lock);
+	}
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-07 14:57           ` Rohith Surabattula
@ 2021-04-11 12:19             ` Rohith Surabattula
  2021-04-11 18:49               ` Steve French
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-04-11 12:19 UTC (permalink / raw)
  To: Tom Talpey
  Cc: Shyam Prasad N, linux-cifs, Steve French, Pavel Shilovsky,
	sribhat.msa, ronnie sahlberg, Aurélien Aptel

[-- Attachment #1: Type: text/plain, Size: 10689 bytes --]

Hi All,

Have updated the patch with some optimizations.

Patch V3:
Moved the deferred file list and lock from TCON to inode structures.

Regards,
Rohith

On Wed, Apr 7, 2021 at 8:27 PM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> Hi All,
>
> Have updated the patch.
>
> patch v2:
> 1)  Close the file immediately during unlink of file.
> 2)  Update the reference count properly when deferred work is already scheduled.
>
> Regards,
> Rohith
>
> On Thu, Mar 25, 2021 at 8:12 AM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
> >
> > >>> Hi Rohith,
> > >>>
> > >>> The changes look good at a high level.
> > >>>
> > >>> Just a few points worth checking:
> > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > >>> perform less data caching. By deferring close, aren't we delaying
> > >>> flushing dirty pages? @Steve French ?
> > >>
> > >> With O_DIRECT flag, data is not cached locally and will be sent to
> > >> server immediately.
> > >>
> > >>> 2. I see that you're maintaining a new list of files for deferred
> > >>> closing. Since there could be a large number of such files for a big
> > >>> share with sufficient I/O, maybe we should think of a structure with
> > >>> faster lookups (rb trees?).
> > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > >>> to review perf impact for all those lists. But this one sounds like a
> > >>> candidate for faster lookups.
> > >>
> > >> Entries will be added into this list only once file is closed and will
> > >> remain for acregmax amount interval.
> >
> > >I think you mean once the "file descriptor" is closed, right? But now
> > >that you mention it, caching the attributes sounds a lot like the
> > >NFS close-to-open semantic, which is itself optional with the "nocto"
> > >mount option.
> >
> > >Because some applications use close() to perform flush, there may be
> > >some silent side effects as well. I don't see anything special in the
> > >patch regarding this. Have you considered it?
> > >The change to promptly close the handle on oplock or lease break looks
> > >reasonable. The rewording in the patch description is better too.
> >
> > Yes, as the handle is closed immediately when oplock/lease breaks,
> > will there be any
> > silent side effects still?
> >
> > >>> What happens if the handle is durable or persistent, and the connection
> > >>> to the server times out? Is the handle recovered, then closed?
> > >>
> > >> Do you mean when the session gets reconnected, the deferred handle
> > >> will be recovered and closed?
> >
> > >Yes, because I expect the client to attempt to reclaim its handles upon
> > >reconnection. I don't see any particular code change regarding this.
> >
> > >My question is, if there's a deferred close pending, should it do that,
> > >or should it simply cut to the chase and close any such handle(s)?
> >
> > As the handles are persistent once the session gets reconnected,
> > applications can reclaim
> > the handle. So, i believe no need to close handles immediately until
> > timeout(i.e acregmax interval)
> >
> > Regards,
> > Rohith
> >
> > On Wed, Mar 24, 2021 at 7:50 PM Tom Talpey <tom@talpey.com> wrote:
> > >
> > > On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> > > > On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > >> Hi Rohith,
> > > >>
> > > >> The changes look good at a high level.
> > > >>
> > > >> Just a few points worth checking:
> > > >> 1. In cifs_open(), should be perform deferred close for certain cases
> > > >> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > >> perform less data caching. By deferring close, aren't we delaying
> > > >> flushing dirty pages? @Steve French ?
> > > >
> > > > With O_DIRECT flag, data is not cached locally and will be sent to
> > > > server immediately.
> > > >
> > > >> 2. I see that you're maintaining a new list of files for deferred
> > > >> closing. Since there could be a large number of such files for a big
> > > >> share with sufficient I/O, maybe we should think of a structure with
> > > >> faster lookups (rb trees?).
> > > >> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > >> to review perf impact for all those lists. But this one sounds like a
> > > >> candidate for faster lookups.
> > > >
> > > > Entries will be added into this list only once file is closed and will
> > > > remain for acregmax amount interval.
> > >
> > > I think you mean once the "file descriptor" is closed, right? But now
> > > that you mention it, caching the attributes sounds a lot like the
> > > NFS close-to-open semantic, which is itself optional with the "nocto"
> > > mount option.
> > >
> > > Because some applications use close() to perform flush, there may be
> > > some silent side effects as well. I don't see anything special in the
> > > patch regarding this. Have you considered it?
> > >
> > > > So,  Will this affect performance i.e during lookups ?
> > > >
> > > >> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > >> oplock_break_received?
> > > >
> > > > Addressed. Attached the patch.
> > > >>
> > > >> Regards,
> > > >> Shyam
> > > >
> > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > >> interval on the order of one or more of
> > > >> - attribute cache timeout
> > > >> - lease time
> > > >> - echo_interval
> > > >
> > > > Yes, This sounds good. I modified the patch to defer by attribute
> > > > cache timeout interval.
> > > >
> > > >> Also, this wording is rather confusing:
> > > >
> > > >>> When file is closed, SMB2 close request is not sent to server
> > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > >>> reopened by same process for read or write, previous file handle
> > > >>> can be used until oplock is held.
> > > >
> > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > >> a rather passive statement. A better wording may be "the filehandle
> > > >> is reused".
> > > >
> > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > >> an oplock is held", or "*provided" an oplock is held"?
> > > >
> > > >>> When same file is reopened by another client during 5 second
> > > >>> interval, oplock break is sent by server and file is closed immediately
> > > >>> if reference count is zero or else oplock is downgraded.
> > > >
> > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > >> what about lease break?
> > > >
> > > > Modified the patch.
> > >
> > > The change to promptly close the handle on oplock or lease break looks
> > > reasonable. The rewording in the patch description is better too.
> > >
> > > >> What happens if the handle is durable or persistent, and the connection
> > > >> to the server times out? Is the handle recovered, then closed?
> > > >
> > > > Do you mean when the session gets reconnected, the deferred handle
> > > > will be recovered and closed?
> > >
> > > Yes, because I expect the client to attempt to reclaim its handles upon
> > > reconnection. I don't see any particular code change regarding this.
> > >
> > > My question is, if there's a deferred close pending, should it do that,
> > > or should it simply cut to the chase and close any such handle(s)?
> > >
> > > Tom.
> > >
> > > > Regards,
> > > > Rohith
> > > >
> > > > On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
> > > >>
> > > >> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > >>> Hi Rohith,
> > > >>>
> > > >>> The changes look good at a high level.
> > > >>>
> > > >>> Just a few points worth checking:
> > > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > >>> perform less data caching. By deferring close, aren't we delaying
> > > >>> flushing dirty pages? @Steve French ?
> > > >>> 2. I see that you're maintaining a new list of files for deferred
> > > >>> closing. Since there could be a large number of such files for a big
> > > >>> share with sufficient I/O, maybe we should think of a structure with
> > > >>> faster lookups (rb trees?).
> > > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > >>> to review perf impact for all those lists. But this one sounds like a
> > > >>> candidate for faster lookups.
> > > >>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > >>> oplock_break_received?
> > > >>>
> > > >>> Regards,
> > > >>> Shyam
> > > >>
> > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > >> interval on the order of one or more of
> > > >> - attribute cache timeout
> > > >> - lease time
> > > >> - echo_interval
> > > >>
> > > >> Also, this wording is rather confusing:
> > > >>
> > > >>> When file is closed, SMB2 close request is not sent to server
> > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > >>> reopened by same process for read or write, previous file handle
> > > >>> can be used until oplock is held.
> > > >>
> > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > >> a rather passive statement. A better wording may be "the filehandle
> > > >> is reused".
> > > >>
> > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > >> an oplock is held", or "*provided" an oplock is held"?
> > > >>
> > > >>> When same file is reopened by another client during 5 second
> > > >>> interval, oplock break is sent by server and file is closed immediately
> > > >>> if reference count is zero or else oplock is downgraded.
> > > >>
> > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > >> what about lease break?
> > > >>
> > > >> What happens if the handle is durable or persistent, and the connection
> > > >> to the server times out? Is the handle recovered, then closed?
> > > >>
> > > >> Tom.
> > > >>
> > > >>
> > > >>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> > > >>> <rohiths.msft@gmail.com> wrote:
> > > >>>>
> > > >>>> Hi All,
> > > >>>>
> > > >>>> Please find the attached patch which will defer the close to server.
> > > >>>> So, performance can be improved.
> > > >>>>
> > > >>>> i.e When file is open, write, close, open, read, close....
> > > >>>> As close is deferred and oplock is held, cache will not be invalidated
> > > >>>> and same handle can be used for second open.
> > > >>>>
> > > >>>> Please review the changes and let me know your thoughts.
> > > >>>>
> > > >>>> Regards,
> > > >>>> Rohith
> > > >>>
> > > >>>
> > > >>>

[-- Attachment #2: 0001-cifs-Deferred-close-for-files.patch --]
[-- Type: application/octet-stream, Size: 13616 bytes --]

From fedf687b27f1c80156d5983eb6a618d1aa441f34 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 8 Mar 2021 16:28:09 +0000
Subject: [PATCH] cifs: Deferred close for files

When file is closed, SMB2 close request is not sent to server
immediately and is deferred for acregmax defined interval. When file is
reopened by same process for read or write, the file handle
is reused if an oplock is held.

When client receives a oplock/lease break, file is closed immediately
if reference count is zero, else oplock is downgraded.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
---
 fs/cifs/cifsfs.c    | 15 +++++++-
 fs/cifs/cifsglob.h  | 16 +++++++++
 fs/cifs/cifsproto.h | 11 ++++++
 fs/cifs/file.c      | 84 +++++++++++++++++++++++++++++++++++++++++++--
 fs/cifs/inode.c     |  1 +
 fs/cifs/misc.c      | 62 +++++++++++++++++++++++++++++++++
 6 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 099ad9f3660b..35cf51810cbf 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,6 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
@@ -364,6 +365,8 @@ cifs_alloc_inode(struct super_block *sb)
 	/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
 	INIT_LIST_HEAD(&cifs_inode->openFileList);
 	INIT_LIST_HEAD(&cifs_inode->llist);
+	INIT_LIST_HEAD(&cifs_inode->deferred_closes);
+	spin_lock_init(&cifs_inode->deferred_lock);
 	return &cifs_inode->vfs_inode;
 }
 
@@ -1605,9 +1608,16 @@ init_cifs(void)
 		goto out_destroy_fileinfo_put_wq;
 	}
 
+	deferredclose_wq = alloc_workqueue("deferredclose",
+					   WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+	if (!deferredclose_wq) {
+		rc = -ENOMEM;
+		goto out_destroy_cifsoplockd_wq;
+	}
+
 	rc = cifs_fscache_register();
 	if (rc)
-		goto out_destroy_cifsoplockd_wq;
+		goto out_destroy_deferredclose_wq;
 
 	rc = cifs_init_inodecache();
 	if (rc)
@@ -1675,6 +1685,8 @@ init_cifs(void)
 	cifs_destroy_inodecache();
 out_unreg_fscache:
 	cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+	destroy_workqueue(deferredclose_wq);
 out_destroy_cifsoplockd_wq:
 	destroy_workqueue(cifsoplockd_wq);
 out_destroy_fileinfo_put_wq:
@@ -1709,6 +1721,7 @@ exit_cifs(void)
 	cifs_destroy_mids();
 	cifs_destroy_inodecache();
 	cifs_fscache_unregister();
+	destroy_workqueue(deferredclose_wq);
 	destroy_workqueue(cifsoplockd_wq);
 	destroy_workqueue(decrypt_wq);
 	destroy_workqueue(fileinfo_put_wq);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 31fc8695abd6..a6c6c7d257eb 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1154,6 +1154,14 @@ struct cifs_pending_open {
 	__u32 oplock;
 };
 
+struct cifs_deferred_close {
+	struct list_head dlist;
+	struct tcon_link *tlink;
+	__u16  netfid;
+	__u64  persistent_fid;
+	__u64  volatile_fid;
+};
+
 /*
  * This info hangs off the cifsFileInfo structure, pointed to by llist.
  * This is used to track byte stream locks on the file
@@ -1248,6 +1256,9 @@ struct cifsFileInfo {
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
+	struct delayed_work deferred;
+	bool oplock_break_received; /* Flag to indicate oplock break */
+	bool deferred_scheduled;
 };
 
 struct cifs_io_parms {
@@ -1396,6 +1407,7 @@ struct cifsInodeInfo {
 #define CIFS_INO_DELETE_PENDING		  (3) /* delete pending on server */
 #define CIFS_INO_INVALID_MAPPING	  (4) /* pagecache is invalid */
 #define CIFS_INO_LOCK			  (5) /* lock bit for synchronization */
+#define CIFS_INO_MODIFIED_ATTR            (6) /* Indicate change in mtime/ctime */
 	unsigned long flags;
 	spinlock_t writers_lock;
 	unsigned int writers;		/* Number of writers on this inode */
@@ -1408,6 +1420,8 @@ struct cifsInodeInfo {
 	struct fscache_cookie *fscache;
 #endif
 	struct inode vfs_inode;
+	struct list_head deferred_closes; /* list of deferred closes */
+	spinlock_t deferred_lock; /* protection on deferred list */
 };
 
 static inline struct cifsInodeInfo *
@@ -1892,12 +1906,14 @@ GLOBAL_EXTERN spinlock_t gidsidlock;
 
 void cifs_oplock_break(struct work_struct *work);
 void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
+void smb2_deferred_work_close(struct work_struct *work);
 
 extern const struct slow_work_ops cifs_oplock_break_ops;
 extern struct workqueue_struct *cifsiod_wq;
 extern struct workqueue_struct *decrypt_wq;
 extern struct workqueue_struct *fileinfo_put_wq;
 extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
 extern __u32 cifs_lock_secret;
 
 extern mempool_t *cifs_mid_poolp;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 75ce6f742b8d..e2e56e1f66f5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -256,6 +256,17 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+				struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile,
+				struct cifs_deferred_close *dclose);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 26de4329d161..744cce9357ba 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -321,9 +321,12 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
+	cfile->oplock_break_received = false;
+	cfile->deferred_scheduled = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
 	mutex_init(&cfile->fh_mutex);
 	spin_lock_init(&cfile->file_info_lock);
 
@@ -562,6 +565,23 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
+	spin_lock(&CIFS_I(inode)->deferred_lock);
+	/* Get the cached handle as SMB2 close is deferred */
+	rc = cifs_get_readable_path(tcon, full_path, &cfile);
+	if (rc == 0) {
+		if (file->f_flags == cfile->f_flags) {
+			file->private_data = cfile;
+			cifs_del_deferred_close(cfile);
+			spin_unlock(&CIFS_I(inode)->deferred_lock);
+			goto out;
+		} else {
+			spin_unlock(&CIFS_I(inode)->deferred_lock);
+			_cifsFileInfo_put(cfile, true, false);
+		}
+	} else {
+		spin_unlock(&CIFS_I(inode)->deferred_lock);
+	}
+
 	if (server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
@@ -842,11 +862,52 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 	return rc;
 }
 
+void smb2_deferred_work_close(struct work_struct *work)
+{
+	struct cifsFileInfo *cfile = container_of(work,
+			struct cifsFileInfo, deferred.work);
+
+	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+	cifs_del_deferred_close(cfile);
+	cfile->deferred_scheduled = false;
+	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+	_cifsFileInfo_put(cfile, true, false);
+}
+
 int cifs_close(struct inode *inode, struct file *file)
 {
+	struct cifsFileInfo *cfile;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifs_deferred_close *dclose;
+
 	if (file->private_data != NULL) {
-		_cifsFileInfo_put(file->private_data, true, false);
+		cfile = file->private_data;
 		file->private_data = NULL;
+		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+		    dclose) {
+			if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
+				inode->i_ctime = inode->i_mtime = current_time(inode);
+			spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+			cifs_add_deferred_close(cfile, dclose);
+			if (cfile->deferred_scheduled) {
+				mod_delayed_work(deferredclose_wq,
+						&cfile->deferred, cifs_sb->ctx->acregmax);
+			} else {
+				/* Deferred close for files */
+				queue_delayed_work(deferredclose_wq,
+						&cfile->deferred, cifs_sb->ctx->acregmax);
+				cfile->deferred_scheduled = true;
+				spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+				return 0;
+			}
+			spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+			_cifsFileInfo_put(cfile, true, false);
+		} else {
+			_cifsFileInfo_put(cfile, true, false);
+			kfree(dclose);
+		}
 	}
 
 	/* return code from the ->release op is always ignored */
@@ -1943,7 +2004,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if (!open_file->invalidHandle) {
+			if ((!open_file->invalidHandle) &&
+				(!open_file->oplock_break_received)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -2478,6 +2540,8 @@ static int cifs_writepages(struct address_space *mapping,
 	if (cfile)
 		cifsFileInfo_put(cfile);
 	free_xid(xid);
+	/* Indication to update ctime and mtime as close is deferred */
+	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
 	return rc;
 }
 
@@ -2586,6 +2650,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
 
 	unlock_page(page);
 	put_page(page);
+	/* Indication to update ctime and mtime as close is deferred */
+	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
 
 	return rc;
 }
@@ -4746,6 +4812,8 @@ void cifs_oplock_break(struct work_struct *work)
 	struct TCP_Server_Info *server = tcon->ses->server;
 	int rc = 0;
 	bool purge_cache = false;
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
 
 	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
 			TASK_UNINTERRUPTIBLE);
@@ -4793,6 +4861,18 @@ void cifs_oplock_break(struct work_struct *work)
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
+	/*
+	 * When oplock break is received and there are no active
+	 * file handles but cached, then set the flag oplock_break_received.
+	 * So, new open will not use cached handle.
+	 */
+	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (is_deferred) {
+		cfile->oplock_break_received = true;
+		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+	}
+	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f2df4422e54a..ce7d7173550c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1643,6 +1643,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 		goto unlink_out;
 	}
 
+	cifs_close_deferred_file(CIFS_I(inode));
 	if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = CIFSPOSIXDelFile(xid, tcon, full_path,
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 82e176720ca6..5892748b34e6 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -672,6 +672,68 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(dclose, &CIFS_I(d_inode(cfile->dentry))->deferred_closes, dlist) {
+		if ((dclose->netfid == cfile->fid.netfid) &&
+			(dclose->persistent_fid == cfile->fid.persistent_fid) &&
+			(dclose->volatile_fid == cfile->fid.volatile_fid)) {
+			*pdclose = dclose;
+			return true;
+		}
+	}
+	return false;
+}
+
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *pdclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &pdclose);
+	if (is_deferred) {
+		kfree(dclose);
+		return;
+	}
+
+	dclose->tlink = cfile->tlink;
+	dclose->netfid = cfile->fid.netfid;
+	dclose->persistent_fid = cfile->fid.persistent_fid;
+	dclose->volatile_fid = cfile->fid.volatile_fid;
+	list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
+}
+
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+	bool is_deferred = false;
+	struct cifs_deferred_close *dclose;
+
+	is_deferred = cifs_is_deferred_close(cfile, &dclose);
+	if (!is_deferred)
+		return;
+	list_del(&dclose->dlist);
+	kfree(dclose);
+}
+
+void
+cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+{
+	struct cifsFileInfo *cfile = NULL;
+	struct cifs_deferred_close *dclose;
+
+	list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
+		spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+		if (cifs_is_deferred_close(cfile, &dclose))
+			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+	}
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-11 12:19             ` Rohith Surabattula
@ 2021-04-11 18:49               ` Steve French
  2021-04-12  3:43                 ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Steve French @ 2021-04-11 18:49 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: Shyam Prasad N, linux-cifs, sribhat.msa, ronnie sahlberg,
	Aurélien Aptel

Could you rerun test 422 to Windows? It just failed with:

    -Space used before and after writeback is equal
    +Files /mnt/scratch/422.before and /mnt/scratch/422.after differ

and tests 023 and 024 to Samba and 024 to Windows which failed with:

    -samedir  tree/none -> none/tree.
    +samedir  tree/none -> Permission denied

and git tests 0000, 0002, 0003 and 0022 to Windows

See http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com/#/builders/2/builds/557

It may be unrelated but worked without the patch the previous day:

http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com/#/builders/2/builds/556

On Sun, Apr 11, 2021 at 7:19 AM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> Hi All,
>
> Have updated the patch with some optimizations.
>
> Patch V3:
> Moved the deferred file list and lock from TCON to inode structures.
>
> Regards,
> Rohith
>
> On Wed, Apr 7, 2021 at 8:27 PM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
> >
> > Hi All,
> >
> > Have updated the patch.
> >
> > patch v2:
> > 1)  Close the file immediately during unlink of file.
> > 2)  Update the reference count properly when deferred work is already scheduled.
> >
> > Regards,
> > Rohith
> >
> > On Thu, Mar 25, 2021 at 8:12 AM Rohith Surabattula
> > <rohiths.msft@gmail.com> wrote:
> > >
> > > >>> Hi Rohith,
> > > >>>
> > > >>> The changes look good at a high level.
> > > >>>
> > > >>> Just a few points worth checking:
> > > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > >>> perform less data caching. By deferring close, aren't we delaying
> > > >>> flushing dirty pages? @Steve French ?
> > > >>
> > > >> With O_DIRECT flag, data is not cached locally and will be sent to
> > > >> server immediately.
> > > >>
> > > >>> 2. I see that you're maintaining a new list of files for deferred
> > > >>> closing. Since there could be a large number of such files for a big
> > > >>> share with sufficient I/O, maybe we should think of a structure with
> > > >>> faster lookups (rb trees?).
> > > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > >>> to review perf impact for all those lists. But this one sounds like a
> > > >>> candidate for faster lookups.
> > > >>
> > > >> Entries will be added into this list only once file is closed and will
> > > >> remain for acregmax amount interval.
> > >
> > > >I think you mean once the "file descriptor" is closed, right? But now
> > > >that you mention it, caching the attributes sounds a lot like the
> > > >NFS close-to-open semantic, which is itself optional with the "nocto"
> > > >mount option.
> > >
> > > >Because some applications use close() to perform flush, there may be
> > > >some silent side effects as well. I don't see anything special in the
> > > >patch regarding this. Have you considered it?
> > > >The change to promptly close the handle on oplock or lease break looks
> > > >reasonable. The rewording in the patch description is better too.
> > >
> > > Yes, as the handle is closed immediately when oplock/lease breaks,
> > > will there be any
> > > silent side effects still?
> > >
> > > >>> What happens if the handle is durable or persistent, and the connection
> > > >>> to the server times out? Is the handle recovered, then closed?
> > > >>
> > > >> Do you mean when the session gets reconnected, the deferred handle
> > > >> will be recovered and closed?
> > >
> > > >Yes, because I expect the client to attempt to reclaim its handles upon
> > > >reconnection. I don't see any particular code change regarding this.
> > >
> > > >My question is, if there's a deferred close pending, should it do that,
> > > >or should it simply cut to the chase and close any such handle(s)?
> > >
> > > As the handles are persistent once the session gets reconnected,
> > > applications can reclaim
> > > the handle. So, i believe no need to close handles immediately until
> > > timeout(i.e acregmax interval)
> > >
> > > Regards,
> > > Rohith
> > >
> > > On Wed, Mar 24, 2021 at 7:50 PM Tom Talpey <tom@talpey.com> wrote:
> > > >
> > > > On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> > > > > On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > > >> Hi Rohith,
> > > > >>
> > > > >> The changes look good at a high level.
> > > > >>
> > > > >> Just a few points worth checking:
> > > > >> 1. In cifs_open(), should be perform deferred close for certain cases
> > > > >> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > > >> perform less data caching. By deferring close, aren't we delaying
> > > > >> flushing dirty pages? @Steve French ?
> > > > >
> > > > > With O_DIRECT flag, data is not cached locally and will be sent to
> > > > > server immediately.
> > > > >
> > > > >> 2. I see that you're maintaining a new list of files for deferred
> > > > >> closing. Since there could be a large number of such files for a big
> > > > >> share with sufficient I/O, maybe we should think of a structure with
> > > > >> faster lookups (rb trees?).
> > > > >> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > > >> to review perf impact for all those lists. But this one sounds like a
> > > > >> candidate for faster lookups.
> > > > >
> > > > > Entries will be added into this list only once file is closed and will
> > > > > remain for acregmax amount interval.
> > > >
> > > > I think you mean once the "file descriptor" is closed, right? But now
> > > > that you mention it, caching the attributes sounds a lot like the
> > > > NFS close-to-open semantic, which is itself optional with the "nocto"
> > > > mount option.
> > > >
> > > > Because some applications use close() to perform flush, there may be
> > > > some silent side effects as well. I don't see anything special in the
> > > > patch regarding this. Have you considered it?
> > > >
> > > > > So,  Will this affect performance i.e during lookups ?
> > > > >
> > > > >> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > > >> oplock_break_received?
> > > > >
> > > > > Addressed. Attached the patch.
> > > > >>
> > > > >> Regards,
> > > > >> Shyam
> > > > >
> > > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > > >> interval on the order of one or more of
> > > > >> - attribute cache timeout
> > > > >> - lease time
> > > > >> - echo_interval
> > > > >
> > > > > Yes, This sounds good. I modified the patch to defer by attribute
> > > > > cache timeout interval.
> > > > >
> > > > >> Also, this wording is rather confusing:
> > > > >
> > > > >>> When file is closed, SMB2 close request is not sent to server
> > > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > > >>> reopened by same process for read or write, previous file handle
> > > > >>> can be used until oplock is held.
> > > > >
> > > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > > >> a rather passive statement. A better wording may be "the filehandle
> > > > >> is reused".
> > > > >
> > > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > > >> an oplock is held", or "*provided" an oplock is held"?
> > > > >
> > > > >>> When same file is reopened by another client during 5 second
> > > > >>> interval, oplock break is sent by server and file is closed immediately
> > > > >>> if reference count is zero or else oplock is downgraded.
> > > > >
> > > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > > >> what about lease break?
> > > > >
> > > > > Modified the patch.
> > > >
> > > > The change to promptly close the handle on oplock or lease break looks
> > > > reasonable. The rewording in the patch description is better too.
> > > >
> > > > >> What happens if the handle is durable or persistent, and the connection
> > > > >> to the server times out? Is the handle recovered, then closed?
> > > > >
> > > > > Do you mean when the session gets reconnected, the deferred handle
> > > > > will be recovered and closed?
> > > >
> > > > Yes, because I expect the client to attempt to reclaim its handles upon
> > > > reconnection. I don't see any particular code change regarding this.
> > > >
> > > > My question is, if there's a deferred close pending, should it do that,
> > > > or should it simply cut to the chase and close any such handle(s)?
> > > >
> > > > Tom.
> > > >
> > > > > Regards,
> > > > > Rohith
> > > > >
> > > > > On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
> > > > >>
> > > > >> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > > >>> Hi Rohith,
> > > > >>>
> > > > >>> The changes look good at a high level.
> > > > >>>
> > > > >>> Just a few points worth checking:
> > > > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > > > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > > >>> perform less data caching. By deferring close, aren't we delaying
> > > > >>> flushing dirty pages? @Steve French ?
> > > > >>> 2. I see that you're maintaining a new list of files for deferred
> > > > >>> closing. Since there could be a large number of such files for a big
> > > > >>> share with sufficient I/O, maybe we should think of a structure with
> > > > >>> faster lookups (rb trees?).
> > > > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > > >>> to review perf impact for all those lists. But this one sounds like a
> > > > >>> candidate for faster lookups.
> > > > >>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > > >>> oplock_break_received?
> > > > >>>
> > > > >>> Regards,
> > > > >>> Shyam
> > > > >>
> > > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > > >> interval on the order of one or more of
> > > > >> - attribute cache timeout
> > > > >> - lease time
> > > > >> - echo_interval
> > > > >>
> > > > >> Also, this wording is rather confusing:
> > > > >>
> > > > >>> When file is closed, SMB2 close request is not sent to server
> > > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > > >>> reopened by same process for read or write, previous file handle
> > > > >>> can be used until oplock is held.
> > > > >>
> > > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > > >> a rather passive statement. A better wording may be "the filehandle
> > > > >> is reused".
> > > > >>
> > > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > > >> an oplock is held", or "*provided" an oplock is held"?
> > > > >>
> > > > >>> When same file is reopened by another client during 5 second
> > > > >>> interval, oplock break is sent by server and file is closed immediately
> > > > >>> if reference count is zero or else oplock is downgraded.
> > > > >>
> > > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > > >> what about lease break?
> > > > >>
> > > > >> What happens if the handle is durable or persistent, and the connection
> > > > >> to the server times out? Is the handle recovered, then closed?
> > > > >>
> > > > >> Tom.
> > > > >>
> > > > >>
> > > > >>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> > > > >>> <rohiths.msft@gmail.com> wrote:
> > > > >>>>
> > > > >>>> Hi All,
> > > > >>>>
> > > > >>>> Please find the attached patch which will defer the close to server.
> > > > >>>> So, performance can be improved.
> > > > >>>>
> > > > >>>> i.e When file is open, write, close, open, read, close....
> > > > >>>> As close is deferred and oplock is held, cache will not be invalidated
> > > > >>>> and same handle can be used for second open.
> > > > >>>>
> > > > >>>> Please review the changes and let me know your thoughts.
> > > > >>>>
> > > > >>>> Regards,
> > > > >>>> Rohith
> > > > >>>
> > > > >>>
> > > > >>>



-- 
Thanks,

Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-11 18:49               ` Steve French
@ 2021-04-12  3:43                 ` Rohith Surabattula
  2021-04-12 16:57                   ` Aurélien Aptel
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-04-12  3:43 UTC (permalink / raw)
  To: Steve French
  Cc: Shyam Prasad N, linux-cifs, sribhat.msa, ronnie sahlberg,
	Aurélien Aptel

Sure Steve.

On Mon, Apr 12, 2021 at 12:19 AM Steve French <smfrench@gmail.com> wrote:
>
> Could you rerun test 422 to Windows? It just failed with:
>
>     -Space used before and after writeback is equal
>     +Files /mnt/scratch/422.before and /mnt/scratch/422.after differ
>
> and tests 023 and 024 to Samba and 024 to Windows which failed with:
>
>     -samedir  tree/none -> none/tree.
>     +samedir  tree/none -> Permission denied
>
> and git tests 0000, 0002, 0003 and 0022 to Windows
>
> See http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com/#/builders/2/builds/557
>
> It may be unrelated but worked without the patch the previous day:
>
> http://smb3-test-rhel-75.southcentralus.cloudapp.azure.com/#/builders/2/builds/556
>
> On Sun, Apr 11, 2021 at 7:19 AM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
> >
> > Hi All,
> >
> > Have updated the patch with some optimizations.
> >
> > Patch V3:
> > Moved the deferred file list and lock from TCON to inode structures.
> >
> > Regards,
> > Rohith
> >
> > On Wed, Apr 7, 2021 at 8:27 PM Rohith Surabattula
> > <rohiths.msft@gmail.com> wrote:
> > >
> > > Hi All,
> > >
> > > Have updated the patch.
> > >
> > > patch v2:
> > > 1)  Close the file immediately during unlink of file.
> > > 2)  Update the reference count properly when deferred work is already scheduled.
> > >
> > > Regards,
> > > Rohith
> > >
> > > On Thu, Mar 25, 2021 at 8:12 AM Rohith Surabattula
> > > <rohiths.msft@gmail.com> wrote:
> > > >
> > > > >>> Hi Rohith,
> > > > >>>
> > > > >>> The changes look good at a high level.
> > > > >>>
> > > > >>> Just a few points worth checking:
> > > > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > > > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > > >>> perform less data caching. By deferring close, aren't we delaying
> > > > >>> flushing dirty pages? @Steve French ?
> > > > >>
> > > > >> With O_DIRECT flag, data is not cached locally and will be sent to
> > > > >> server immediately.
> > > > >>
> > > > >>> 2. I see that you're maintaining a new list of files for deferred
> > > > >>> closing. Since there could be a large number of such files for a big
> > > > >>> share with sufficient I/O, maybe we should think of a structure with
> > > > >>> faster lookups (rb trees?).
> > > > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > > >>> to review perf impact for all those lists. But this one sounds like a
> > > > >>> candidate for faster lookups.
> > > > >>
> > > > >> Entries will be added into this list only once file is closed and will
> > > > >> remain for acregmax amount interval.
> > > >
> > > > >I think you mean once the "file descriptor" is closed, right? But now
> > > > >that you mention it, caching the attributes sounds a lot like the
> > > > >NFS close-to-open semantic, which is itself optional with the "nocto"
> > > > >mount option.
> > > >
> > > > >Because some applications use close() to perform flush, there may be
> > > > >some silent side effects as well. I don't see anything special in the
> > > > >patch regarding this. Have you considered it?
> > > > >The change to promptly close the handle on oplock or lease break looks
> > > > >reasonable. The rewording in the patch description is better too.
> > > >
> > > > Yes, as the handle is closed immediately when oplock/lease breaks,
> > > > will there be any
> > > > silent side effects still?
> > > >
> > > > >>> What happens if the handle is durable or persistent, and the connection
> > > > >>> to the server times out? Is the handle recovered, then closed?
> > > > >>
> > > > >> Do you mean when the session gets reconnected, the deferred handle
> > > > >> will be recovered and closed?
> > > >
> > > > >Yes, because I expect the client to attempt to reclaim its handles upon
> > > > >reconnection. I don't see any particular code change regarding this.
> > > >
> > > > >My question is, if there's a deferred close pending, should it do that,
> > > > >or should it simply cut to the chase and close any such handle(s)?
> > > >
> > > > As the handles are persistent once the session gets reconnected,
> > > > applications can reclaim
> > > > the handle. So, i believe no need to close handles immediately until
> > > > timeout(i.e acregmax interval)
> > > >
> > > > Regards,
> > > > Rohith
> > > >
> > > > On Wed, Mar 24, 2021 at 7:50 PM Tom Talpey <tom@talpey.com> wrote:
> > > > >
> > > > > On 3/22/2021 1:07 PM, Rohith Surabattula wrote:
> > > > > > On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > > > >> Hi Rohith,
> > > > > >>
> > > > > >> The changes look good at a high level.
> > > > > >>
> > > > > >> Just a few points worth checking:
> > > > > >> 1. In cifs_open(), should be perform deferred close for certain cases
> > > > > >> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > > > >> perform less data caching. By deferring close, aren't we delaying
> > > > > >> flushing dirty pages? @Steve French ?
> > > > > >
> > > > > > With O_DIRECT flag, data is not cached locally and will be sent to
> > > > > > server immediately.
> > > > > >
> > > > > >> 2. I see that you're maintaining a new list of files for deferred
> > > > > >> closing. Since there could be a large number of such files for a big
> > > > > >> share with sufficient I/O, maybe we should think of a structure with
> > > > > >> faster lookups (rb trees?).
> > > > > >> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > > > >> to review perf impact for all those lists. But this one sounds like a
> > > > > >> candidate for faster lookups.
> > > > > >
> > > > > > Entries will be added into this list only once file is closed and will
> > > > > > remain for acregmax amount interval.
> > > > >
> > > > > I think you mean once the "file descriptor" is closed, right? But now
> > > > > that you mention it, caching the attributes sounds a lot like the
> > > > > NFS close-to-open semantic, which is itself optional with the "nocto"
> > > > > mount option.
> > > > >
> > > > > Because some applications use close() to perform flush, there may be
> > > > > some silent side effects as well. I don't see anything special in the
> > > > > patch regarding this. Have you considered it?
> > > > >
> > > > > > So,  Will this affect performance i.e during lookups ?
> > > > > >
> > > > > >> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > > > >> oplock_break_received?
> > > > > >
> > > > > > Addressed. Attached the patch.
> > > > > >>
> > > > > >> Regards,
> > > > > >> Shyam
> > > > > >
> > > > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > > > >> interval on the order of one or more of
> > > > > >> - attribute cache timeout
> > > > > >> - lease time
> > > > > >> - echo_interval
> > > > > >
> > > > > > Yes, This sounds good. I modified the patch to defer by attribute
> > > > > > cache timeout interval.
> > > > > >
> > > > > >> Also, this wording is rather confusing:
> > > > > >
> > > > > >>> When file is closed, SMB2 close request is not sent to server
> > > > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > > > >>> reopened by same process for read or write, previous file handle
> > > > > >>> can be used until oplock is held.
> > > > > >
> > > > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > > > >> a rather passive statement. A better wording may be "the filehandle
> > > > > >> is reused".
> > > > > >
> > > > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > > > >> an oplock is held", or "*provided" an oplock is held"?
> > > > > >
> > > > > >>> When same file is reopened by another client during 5 second
> > > > > >>> interval, oplock break is sent by server and file is closed immediately
> > > > > >>> if reference count is zero or else oplock is downgraded.
> > > > > >
> > > > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > > > >> what about lease break?
> > > > > >
> > > > > > Modified the patch.
> > > > >
> > > > > The change to promptly close the handle on oplock or lease break looks
> > > > > reasonable. The rewording in the patch description is better too.
> > > > >
> > > > > >> What happens if the handle is durable or persistent, and the connection
> > > > > >> to the server times out? Is the handle recovered, then closed?
> > > > > >
> > > > > > Do you mean when the session gets reconnected, the deferred handle
> > > > > > will be recovered and closed?
> > > > >
> > > > > Yes, because I expect the client to attempt to reclaim its handles upon
> > > > > reconnection. I don't see any particular code change regarding this.
> > > > >
> > > > > My question is, if there's a deferred close pending, should it do that,
> > > > > or should it simply cut to the chase and close any such handle(s)?
> > > > >
> > > > > Tom.
> > > > >
> > > > > > Regards,
> > > > > > Rohith
> > > > > >
> > > > > > On Thu, Mar 11, 2021 at 11:25 PM Tom Talpey <tom@talpey.com> wrote:
> > > > > >>
> > > > > >> On 3/11/2021 8:47 AM, Shyam Prasad N wrote:
> > > > > >>> Hi Rohith,
> > > > > >>>
> > > > > >>> The changes look good at a high level.
> > > > > >>>
> > > > > >>> Just a few points worth checking:
> > > > > >>> 1. In cifs_open(), should be perform deferred close for certain cases
> > > > > >>> like O_DIRECT? AFAIK, O_DIRECT is just a hint to the filesystem to
> > > > > >>> perform less data caching. By deferring close, aren't we delaying
> > > > > >>> flushing dirty pages? @Steve French ?
> > > > > >>> 2. I see that you're maintaining a new list of files for deferred
> > > > > >>> closing. Since there could be a large number of such files for a big
> > > > > >>> share with sufficient I/O, maybe we should think of a structure with
> > > > > >>> faster lookups (rb trees?).
> > > > > >>> I know we already have a bunch of linked lists in cifs.ko, and we need
> > > > > >>> to review perf impact for all those lists. But this one sounds like a
> > > > > >>> candidate for faster lookups.
> > > > > >>> 3. Minor comment. Maybe change the field name oplock_deferred_close to
> > > > > >>> oplock_break_received?
> > > > > >>>
> > > > > >>> Regards,
> > > > > >>> Shyam
> > > > > >>
> > > > > >> I wonder why the choice of 5 seconds? It seems to me a more natural
> > > > > >> interval on the order of one or more of
> > > > > >> - attribute cache timeout
> > > > > >> - lease time
> > > > > >> - echo_interval
> > > > > >>
> > > > > >> Also, this wording is rather confusing:
> > > > > >>
> > > > > >>> When file is closed, SMB2 close request is not sent to server
> > > > > >>> immediately and is deferred for 5 seconds interval. When file is
> > > > > >>> reopened by same process for read or write, previous file handle
> > > > > >>> can be used until oplock is held.
> > > > > >>
> > > > > >> It's not a "previous" filehandle if it's open, and "can be used" is
> > > > > >> a rather passive statement. A better wording may be "the filehandle
> > > > > >> is reused".
> > > > > >>
> > > > > >> And, "until oplock is held" is similarly awkward. Do you mean "*if*
> > > > > >> an oplock is held", or "*provided" an oplock is held"?
> > > > > >>
> > > > > >>> When same file is reopened by another client during 5 second
> > > > > >>> interval, oplock break is sent by server and file is closed immediately
> > > > > >>> if reference count is zero or else oplock is downgraded.
> > > > > >>
> > > > > >> Only the second part of the sentence is relevant to the patch. Also,
> > > > > >> what about lease break?
> > > > > >>
> > > > > >> What happens if the handle is durable or persistent, and the connection
> > > > > >> to the server times out? Is the handle recovered, then closed?
> > > > > >>
> > > > > >> Tom.
> > > > > >>
> > > > > >>
> > > > > >>> On Tue, Mar 9, 2021 at 2:41 PM Rohith Surabattula
> > > > > >>> <rohiths.msft@gmail.com> wrote:
> > > > > >>>>
> > > > > >>>> Hi All,
> > > > > >>>>
> > > > > >>>> Please find the attached patch which will defer the close to server.
> > > > > >>>> So, performance can be improved.
> > > > > >>>>
> > > > > >>>> i.e When file is open, write, close, open, read, close....
> > > > > >>>> As close is deferred and oplock is held, cache will not be invalidated
> > > > > >>>> and same handle can be used for second open.
> > > > > >>>>
> > > > > >>>> Please review the changes and let me know your thoughts.
> > > > > >>>>
> > > > > >>>> Regards,
> > > > > >>>> Rohith
> > > > > >>>
> > > > > >>>
> > > > > >>>
>
>
>
> --
> Thanks,
>
> Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-12  3:43                 ` Rohith Surabattula
@ 2021-04-12 16:57                   ` Aurélien Aptel
  0 siblings, 0 replies; 16+ messages in thread
From: Aurélien Aptel @ 2021-04-12 16:57 UTC (permalink / raw)
  To: Rohith Surabattula, Steve French
  Cc: Shyam Prasad N, linux-cifs, sribhat.msa, ronnie sahlberg


I have added test cifs/105 that triggers oplock breaks. I think it could
be used as a base or be modified to test your code (unless you already
have some script). In anycase I would like to make sure we can trigger
the codepath you are adding on the buildbot. My script is in
/root/buildbot/scripts/fedora29/cifs/105 and it simply calls the
optest.py script in the same directory.

The code is essentially:

    for x in range(n_children):
        pid = os.fork()
        if pid == 0:
            for i in range(n_it):
                a = open(fna, 'w+') # file accessed by first mount point A
                b = open(fnb, 'r')  # same file via 2nd mount point B
                b.close()
                a.close()
            exit(0)

That python script mounts the same share twice but with -o nosharesock,
to disable connection reuse (so you have 2 TCP connection).

Then it creates n_children, and each children is opening/closing for
read or write the same file (but accessed through different mount
point) in a loop (n_it times).

If a process has opened it for READ it will open it with a READ lease,
so that multiple client can cache reads.

As soon as one process opens it for writes, it will make the server
notify clients that have a READ lease on the file to downgrade it. This
notification is the oplock break.

Cheers,
-- 
Aurélien Aptel / SUSE Labs Samba Team
GPG: 1839 CB5F 9F5B FB9B AA97  8C99 03C8 A49B 521B D5D3
SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg, DE
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah HRB 247165 (AG München)


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-03-09  9:11 cifs: Deferred close for files Rohith Surabattula
  2021-03-11 13:47 ` Shyam Prasad N
@ 2021-04-12 17:23 ` Steve French
  2021-04-12 17:24   ` Steve French
  1 sibling, 1 reply; 16+ messages in thread
From: Steve French @ 2021-04-12 17:23 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: linux-cifs, Pavel Shilovsky, Shyam Prasad N, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

I saw some problems in dmesg ( when running test generic/005) multiple
similar errors.  Anyone else see them?

Rohith,
Can you repro that?

[171877.491187] run fstests generic/005 at 2021-04-12 12:20:57
[171878.245576] ------------[ cut here ]------------
[171878.245578] WARNING: CPU: 5 PID: 546266 at
arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x33/0xa0
[171878.245583] Modules linked in: cifs(OE) md4 rfcomm nls_utf8 libdes
ccm cachefiles fscache nf_tables nfnetlink cmac algif_hash
algif_skcipher af_alg bnep nls_iso8859_1 mei_hdcp intel_rapl_msr
x86_pkg_temp_thermal intel_powerclamp snd_sof_pci_intel_cnl
snd_sof_intel_hda_common coretemp snd_soc_hdac_hda soundwire_intel
soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda
kvm_intel snd_sof_pci nouveau snd_sof iwlmvm kvm snd_sof_xtensa_dsp
snd_hda_ext_core snd_soc_acpi_intel_match mac80211 snd_soc_acpi
crct10dif_pclmul soundwire_bus ghash_clmulni_intel aesni_intel
snd_hda_codec_realtek snd_soc_core mxm_wmi snd_hda_codec_generic
drm_ttm_helper crypto_simd ttm cryptd snd_compress snd_hda_codec_hdmi
libarc4 ac97_bus drm_kms_helper snd_pcm_dmaengine rapl btusb cec
intel_cstate snd_hda_intel uvcvideo btrtl rc_core btbcm
videobuf2_vmalloc btintel videobuf2_memops iwlwifi snd_intel_dspcfg
videobuf2_v4l2 fb_sys_fops snd_intel_sdw_acpi thinkpad_acpi serio_raw
videobuf2_common syscopyarea
[171878.245630]  bluetooth snd_hda_codec processor_thermal_device
efi_pstore videodev processor_thermal_rfim sysfillrect
processor_thermal_mbox snd_seq_midi processor_thermal_rapl sysimgblt
snd_hda_core ecdh_generic snd_seq_midi_event snd_hwdep nvidiafb
ucsi_acpi nvram intel_rapl_common input_leds mc ecc mei_me
platform_profile intel_wmi_thunderbolt vgastate typec_ucsi wmi_bmof
elan_i2c ee1004 ledtrig_audio cfg80211 8250_dw fb_ddc joydev snd_pcm
mei i2c_algo_bit intel_soc_dts_iosf intel_pch_thermal snd_rawmidi
typec snd_seq snd_seq_device snd_timer snd soundcore int3403_thermal
int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad mac_hid
sch_fq_codel parport_pc ppdev lp parport drm sunrpc ip_tables x_tables
autofs4 wacom hid_generic usbhid hid xfs btrfs blake2b_generic xor
raid6_pq libcrc32c rtsx_pci_sdmmc crc32_pclmul nvme psmouse e1000e
i2c_i801 intel_lpss_pci i2c_smbus rtsx_pci nvme_core intel_lpss
xhci_pci idma64 xhci_pci_renesas wmi video pinctrl_cannonlake
[171878.245679]  [last unloaded: cifs]
[171878.245680] CPU: 5 PID: 546266 Comm: kworker/5:1 Tainted: G
W  OE     5.12.0-051200rc6-generic #202104042231
[171878.245682] Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS
N2CET54W (1.37 ) 06/20/2020
[171878.245684] Workqueue: cifsoplockd cifs_oplock_break [cifs]
[171878.245720] RIP: 0010:kfence_protect_page+0x33/0xa0
[171878.245724] Code: 53 89 f3 48 8d 75 e4 48 83 ec 10 65 48 8b 04 25
28 00 00 00 48 89 45 e8 31 c0 e8 d8 f4 d9 ff 48 85 c0 74 06 83 7d e4
01 74 06 <0f> 0b 31 c0 eb 39 48 8b 38 48 89 c2 84 db 75 47 48 89 f8 0f
1f 40
[171878.245726] RSP: 0018:ffffa33b84d47c20 EFLAGS: 00010046
[171878.245727] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
ffffa33b84d47c24
[171878.245729] RDX: ffffa33b84d47c24 RSI: 0000000000000000 RDI:
0000000000000000
[171878.245730] RBP: ffffa33b84d47c40 R08: 0000000000000000 R09:
0000000000000000
[171878.245731] R10: 0000000000000000 R11: 0000000000000000 R12:
0000000000000000
[171878.245732] R13: ffffa33b84d47d68 R14: ffffa33b84d47d68 R15:
0000000000000000
[171878.245734] FS:  0000000000000000(0000) GS:ffff8da17bb40000(0000)
knlGS:0000000000000000
[171878.245736] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[171878.245737] CR2: 0000000000000268 CR3: 0000000484e10003 CR4:
00000000003706e0
[171878.245739] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[171878.245740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[171878.245741] Call Trace:
[171878.245744]  kfence_unprotect+0x17/0x30
[171878.245746]  kfence_handle_page_fault+0x97/0x250
[171878.245748]  page_fault_oops+0x88/0x130
[171878.245751]  do_user_addr_fault+0x323/0x670
[171878.245754]  ? cifsFileInfo_put_final+0x10a/0x120 [cifs]
[171878.245784]  exc_page_fault+0x6c/0x150
[171878.245808]  asm_exc_page_fault+0x1e/0x30
[171878.245811] RIP: 0010:_raw_spin_lock+0xc/0x30
[171878.245814] Code: ba 01 00 00 00 f0 0f b1 17 75 01 c3 55 89 c6 48
89 e5 e8 d7 42 4b ff 66 90 5d c3 0f 1f 00 0f 1f 44 00 00 31 c0 ba 01
00 00 00 <f0> 0f b1 17 75 01 c3 55 89 c6 48 89 e5 e8 b2 42 4b ff 66 90
5d c3
[171878.245830] RSP: 0018:ffffa33b84d47e10 EFLAGS: 00010246
[171878.245831] RAX: 0000000000000000 RBX: ffff8d9a0994d510 RCX:
000000008020001e
[171878.245832] RDX: 0000000000000001 RSI: 000000008020001e RDI:
0000000000000268
[171878.245833] RBP: ffffa33b84d47e70 R08: 0000000000000001 R09:
0000000000000001
[171878.245834] R10: ffffffffaec73500 R11: 0000000000000000 R12:
0000000000000000
[171878.245835] R13: ffff8d9a0994d400 R14: ffff8d9f9e1f9c20 R15:
ffff8d9a092b1800
[171878.245838]  ? cifs_oplock_break+0x1e9/0x5d0 [cifs]
[171878.245871]  process_one_work+0x220/0x3c0
[171878.245874]  worker_thread+0x50/0x370
[171878.245876]  kthread+0x12f/0x150
[171878.245878]  ? process_one_work+0x3c0/0x3c0
[171878.245880]  ? __kthread_bind_mask+0x70/0x70
[171878.245882]  ret_from_fork+0x22/0x30
[171878.245887] ---[ end trace fefd5c5bed217748 ]---
[171878.245892] ------------[ cut here ]------------

On Tue, Mar 9, 2021 at 3:11 AM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> Hi All,
>
> Please find the attached patch which will defer the close to server.
> So, performance can be improved.
>
> i.e When file is open, write, close, open, read, close....
> As close is deferred and oplock is held, cache will not be invalidated
> and same handle can be used for second open.
>
> Please review the changes and let me know your thoughts.
>
> Regards,
> Rohith



-- 
Thanks,

Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-12 17:23 ` Steve French
@ 2021-04-12 17:24   ` Steve French
  2021-04-12 19:35     ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Steve French @ 2021-04-12 17:24 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: linux-cifs, Pavel Shilovsky, Shyam Prasad N, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

I was running to Samba server as the target

On Mon, Apr 12, 2021 at 12:23 PM Steve French <smfrench@gmail.com> wrote:
>
> I saw some problems in dmesg ( when running test generic/005) multiple
> similar errors.  Anyone else see them?
>
> Rohith,
> Can you repro that?
>
> [171877.491187] run fstests generic/005 at 2021-04-12 12:20:57
> [171878.245576] ------------[ cut here ]------------
> [171878.245578] WARNING: CPU: 5 PID: 546266 at
> arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x33/0xa0
> [171878.245583] Modules linked in: cifs(OE) md4 rfcomm nls_utf8 libdes
> ccm cachefiles fscache nf_tables nfnetlink cmac algif_hash
> algif_skcipher af_alg bnep nls_iso8859_1 mei_hdcp intel_rapl_msr
> x86_pkg_temp_thermal intel_powerclamp snd_sof_pci_intel_cnl
> snd_sof_intel_hda_common coretemp snd_soc_hdac_hda soundwire_intel
> soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda
> kvm_intel snd_sof_pci nouveau snd_sof iwlmvm kvm snd_sof_xtensa_dsp
> snd_hda_ext_core snd_soc_acpi_intel_match mac80211 snd_soc_acpi
> crct10dif_pclmul soundwire_bus ghash_clmulni_intel aesni_intel
> snd_hda_codec_realtek snd_soc_core mxm_wmi snd_hda_codec_generic
> drm_ttm_helper crypto_simd ttm cryptd snd_compress snd_hda_codec_hdmi
> libarc4 ac97_bus drm_kms_helper snd_pcm_dmaengine rapl btusb cec
> intel_cstate snd_hda_intel uvcvideo btrtl rc_core btbcm
> videobuf2_vmalloc btintel videobuf2_memops iwlwifi snd_intel_dspcfg
> videobuf2_v4l2 fb_sys_fops snd_intel_sdw_acpi thinkpad_acpi serio_raw
> videobuf2_common syscopyarea
> [171878.245630]  bluetooth snd_hda_codec processor_thermal_device
> efi_pstore videodev processor_thermal_rfim sysfillrect
> processor_thermal_mbox snd_seq_midi processor_thermal_rapl sysimgblt
> snd_hda_core ecdh_generic snd_seq_midi_event snd_hwdep nvidiafb
> ucsi_acpi nvram intel_rapl_common input_leds mc ecc mei_me
> platform_profile intel_wmi_thunderbolt vgastate typec_ucsi wmi_bmof
> elan_i2c ee1004 ledtrig_audio cfg80211 8250_dw fb_ddc joydev snd_pcm
> mei i2c_algo_bit intel_soc_dts_iosf intel_pch_thermal snd_rawmidi
> typec snd_seq snd_seq_device snd_timer snd soundcore int3403_thermal
> int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad mac_hid
> sch_fq_codel parport_pc ppdev lp parport drm sunrpc ip_tables x_tables
> autofs4 wacom hid_generic usbhid hid xfs btrfs blake2b_generic xor
> raid6_pq libcrc32c rtsx_pci_sdmmc crc32_pclmul nvme psmouse e1000e
> i2c_i801 intel_lpss_pci i2c_smbus rtsx_pci nvme_core intel_lpss
> xhci_pci idma64 xhci_pci_renesas wmi video pinctrl_cannonlake
> [171878.245679]  [last unloaded: cifs]
> [171878.245680] CPU: 5 PID: 546266 Comm: kworker/5:1 Tainted: G
> W  OE     5.12.0-051200rc6-generic #202104042231
> [171878.245682] Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS
> N2CET54W (1.37 ) 06/20/2020
> [171878.245684] Workqueue: cifsoplockd cifs_oplock_break [cifs]
> [171878.245720] RIP: 0010:kfence_protect_page+0x33/0xa0
> [171878.245724] Code: 53 89 f3 48 8d 75 e4 48 83 ec 10 65 48 8b 04 25
> 28 00 00 00 48 89 45 e8 31 c0 e8 d8 f4 d9 ff 48 85 c0 74 06 83 7d e4
> 01 74 06 <0f> 0b 31 c0 eb 39 48 8b 38 48 89 c2 84 db 75 47 48 89 f8 0f
> 1f 40
> [171878.245726] RSP: 0018:ffffa33b84d47c20 EFLAGS: 00010046
> [171878.245727] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
> ffffa33b84d47c24
> [171878.245729] RDX: ffffa33b84d47c24 RSI: 0000000000000000 RDI:
> 0000000000000000
> [171878.245730] RBP: ffffa33b84d47c40 R08: 0000000000000000 R09:
> 0000000000000000
> [171878.245731] R10: 0000000000000000 R11: 0000000000000000 R12:
> 0000000000000000
> [171878.245732] R13: ffffa33b84d47d68 R14: ffffa33b84d47d68 R15:
> 0000000000000000
> [171878.245734] FS:  0000000000000000(0000) GS:ffff8da17bb40000(0000)
> knlGS:0000000000000000
> [171878.245736] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [171878.245737] CR2: 0000000000000268 CR3: 0000000484e10003 CR4:
> 00000000003706e0
> [171878.245739] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> 0000000000000000
> [171878.245740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> 0000000000000400
> [171878.245741] Call Trace:
> [171878.245744]  kfence_unprotect+0x17/0x30
> [171878.245746]  kfence_handle_page_fault+0x97/0x250
> [171878.245748]  page_fault_oops+0x88/0x130
> [171878.245751]  do_user_addr_fault+0x323/0x670
> [171878.245754]  ? cifsFileInfo_put_final+0x10a/0x120 [cifs]
> [171878.245784]  exc_page_fault+0x6c/0x150
> [171878.245808]  asm_exc_page_fault+0x1e/0x30
> [171878.245811] RIP: 0010:_raw_spin_lock+0xc/0x30
> [171878.245814] Code: ba 01 00 00 00 f0 0f b1 17 75 01 c3 55 89 c6 48
> 89 e5 e8 d7 42 4b ff 66 90 5d c3 0f 1f 00 0f 1f 44 00 00 31 c0 ba 01
> 00 00 00 <f0> 0f b1 17 75 01 c3 55 89 c6 48 89 e5 e8 b2 42 4b ff 66 90
> 5d c3
> [171878.245830] RSP: 0018:ffffa33b84d47e10 EFLAGS: 00010246
> [171878.245831] RAX: 0000000000000000 RBX: ffff8d9a0994d510 RCX:
> 000000008020001e
> [171878.245832] RDX: 0000000000000001 RSI: 000000008020001e RDI:
> 0000000000000268
> [171878.245833] RBP: ffffa33b84d47e70 R08: 0000000000000001 R09:
> 0000000000000001
> [171878.245834] R10: ffffffffaec73500 R11: 0000000000000000 R12:
> 0000000000000000
> [171878.245835] R13: ffff8d9a0994d400 R14: ffff8d9f9e1f9c20 R15:
> ffff8d9a092b1800
> [171878.245838]  ? cifs_oplock_break+0x1e9/0x5d0 [cifs]
> [171878.245871]  process_one_work+0x220/0x3c0
> [171878.245874]  worker_thread+0x50/0x370
> [171878.245876]  kthread+0x12f/0x150
> [171878.245878]  ? process_one_work+0x3c0/0x3c0
> [171878.245880]  ? __kthread_bind_mask+0x70/0x70
> [171878.245882]  ret_from_fork+0x22/0x30
> [171878.245887] ---[ end trace fefd5c5bed217748 ]---
> [171878.245892] ------------[ cut here ]------------
>
> On Tue, Mar 9, 2021 at 3:11 AM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
> >
> > Hi All,
> >
> > Please find the attached patch which will defer the close to server.
> > So, performance can be improved.
> >
> > i.e When file is open, write, close, open, read, close....
> > As close is deferred and oplock is held, cache will not be invalidated
> > and same handle can be used for second open.
> >
> > Please review the changes and let me know your thoughts.
> >
> > Regards,
> > Rohith
>
>
>
> --
> Thanks,
>
> Steve



-- 
Thanks,

Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-12 17:24   ` Steve French
@ 2021-04-12 19:35     ` Rohith Surabattula
  2021-04-19 23:03       ` Steve French
  0 siblings, 1 reply; 16+ messages in thread
From: Rohith Surabattula @ 2021-04-12 19:35 UTC (permalink / raw)
  To: Steve French
  Cc: linux-cifs, Pavel Shilovsky, Shyam Prasad N, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

Hi Steve,

I didn't observe when I ran locally against Azure.
Let me try to reproduce.

Regards,
Rohith

On Mon, Apr 12, 2021 at 10:54 PM Steve French <smfrench@gmail.com> wrote:
>
> I was running to Samba server as the target
>
> On Mon, Apr 12, 2021 at 12:23 PM Steve French <smfrench@gmail.com> wrote:
> >
> > I saw some problems in dmesg ( when running test generic/005) multiple
> > similar errors.  Anyone else see them?
> >
> > Rohith,
> > Can you repro that?
> >
> > [171877.491187] run fstests generic/005 at 2021-04-12 12:20:57
> > [171878.245576] ------------[ cut here ]------------
> > [171878.245578] WARNING: CPU: 5 PID: 546266 at
> > arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x33/0xa0
> > [171878.245583] Modules linked in: cifs(OE) md4 rfcomm nls_utf8 libdes
> > ccm cachefiles fscache nf_tables nfnetlink cmac algif_hash
> > algif_skcipher af_alg bnep nls_iso8859_1 mei_hdcp intel_rapl_msr
> > x86_pkg_temp_thermal intel_powerclamp snd_sof_pci_intel_cnl
> > snd_sof_intel_hda_common coretemp snd_soc_hdac_hda soundwire_intel
> > soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda
> > kvm_intel snd_sof_pci nouveau snd_sof iwlmvm kvm snd_sof_xtensa_dsp
> > snd_hda_ext_core snd_soc_acpi_intel_match mac80211 snd_soc_acpi
> > crct10dif_pclmul soundwire_bus ghash_clmulni_intel aesni_intel
> > snd_hda_codec_realtek snd_soc_core mxm_wmi snd_hda_codec_generic
> > drm_ttm_helper crypto_simd ttm cryptd snd_compress snd_hda_codec_hdmi
> > libarc4 ac97_bus drm_kms_helper snd_pcm_dmaengine rapl btusb cec
> > intel_cstate snd_hda_intel uvcvideo btrtl rc_core btbcm
> > videobuf2_vmalloc btintel videobuf2_memops iwlwifi snd_intel_dspcfg
> > videobuf2_v4l2 fb_sys_fops snd_intel_sdw_acpi thinkpad_acpi serio_raw
> > videobuf2_common syscopyarea
> > [171878.245630]  bluetooth snd_hda_codec processor_thermal_device
> > efi_pstore videodev processor_thermal_rfim sysfillrect
> > processor_thermal_mbox snd_seq_midi processor_thermal_rapl sysimgblt
> > snd_hda_core ecdh_generic snd_seq_midi_event snd_hwdep nvidiafb
> > ucsi_acpi nvram intel_rapl_common input_leds mc ecc mei_me
> > platform_profile intel_wmi_thunderbolt vgastate typec_ucsi wmi_bmof
> > elan_i2c ee1004 ledtrig_audio cfg80211 8250_dw fb_ddc joydev snd_pcm
> > mei i2c_algo_bit intel_soc_dts_iosf intel_pch_thermal snd_rawmidi
> > typec snd_seq snd_seq_device snd_timer snd soundcore int3403_thermal
> > int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad mac_hid
> > sch_fq_codel parport_pc ppdev lp parport drm sunrpc ip_tables x_tables
> > autofs4 wacom hid_generic usbhid hid xfs btrfs blake2b_generic xor
> > raid6_pq libcrc32c rtsx_pci_sdmmc crc32_pclmul nvme psmouse e1000e
> > i2c_i801 intel_lpss_pci i2c_smbus rtsx_pci nvme_core intel_lpss
> > xhci_pci idma64 xhci_pci_renesas wmi video pinctrl_cannonlake
> > [171878.245679]  [last unloaded: cifs]
> > [171878.245680] CPU: 5 PID: 546266 Comm: kworker/5:1 Tainted: G
> > W  OE     5.12.0-051200rc6-generic #202104042231
> > [171878.245682] Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS
> > N2CET54W (1.37 ) 06/20/2020
> > [171878.245684] Workqueue: cifsoplockd cifs_oplock_break [cifs]
> > [171878.245720] RIP: 0010:kfence_protect_page+0x33/0xa0
> > [171878.245724] Code: 53 89 f3 48 8d 75 e4 48 83 ec 10 65 48 8b 04 25
> > 28 00 00 00 48 89 45 e8 31 c0 e8 d8 f4 d9 ff 48 85 c0 74 06 83 7d e4
> > 01 74 06 <0f> 0b 31 c0 eb 39 48 8b 38 48 89 c2 84 db 75 47 48 89 f8 0f
> > 1f 40
> > [171878.245726] RSP: 0018:ffffa33b84d47c20 EFLAGS: 00010046
> > [171878.245727] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
> > ffffa33b84d47c24
> > [171878.245729] RDX: ffffa33b84d47c24 RSI: 0000000000000000 RDI:
> > 0000000000000000
> > [171878.245730] RBP: ffffa33b84d47c40 R08: 0000000000000000 R09:
> > 0000000000000000
> > [171878.245731] R10: 0000000000000000 R11: 0000000000000000 R12:
> > 0000000000000000
> > [171878.245732] R13: ffffa33b84d47d68 R14: ffffa33b84d47d68 R15:
> > 0000000000000000
> > [171878.245734] FS:  0000000000000000(0000) GS:ffff8da17bb40000(0000)
> > knlGS:0000000000000000
> > [171878.245736] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [171878.245737] CR2: 0000000000000268 CR3: 0000000484e10003 CR4:
> > 00000000003706e0
> > [171878.245739] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> > 0000000000000000
> > [171878.245740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> > 0000000000000400
> > [171878.245741] Call Trace:
> > [171878.245744]  kfence_unprotect+0x17/0x30
> > [171878.245746]  kfence_handle_page_fault+0x97/0x250
> > [171878.245748]  page_fault_oops+0x88/0x130
> > [171878.245751]  do_user_addr_fault+0x323/0x670
> > [171878.245754]  ? cifsFileInfo_put_final+0x10a/0x120 [cifs]
> > [171878.245784]  exc_page_fault+0x6c/0x150
> > [171878.245808]  asm_exc_page_fault+0x1e/0x30
> > [171878.245811] RIP: 0010:_raw_spin_lock+0xc/0x30
> > [171878.245814] Code: ba 01 00 00 00 f0 0f b1 17 75 01 c3 55 89 c6 48
> > 89 e5 e8 d7 42 4b ff 66 90 5d c3 0f 1f 00 0f 1f 44 00 00 31 c0 ba 01
> > 00 00 00 <f0> 0f b1 17 75 01 c3 55 89 c6 48 89 e5 e8 b2 42 4b ff 66 90
> > 5d c3
> > [171878.245830] RSP: 0018:ffffa33b84d47e10 EFLAGS: 00010246
> > [171878.245831] RAX: 0000000000000000 RBX: ffff8d9a0994d510 RCX:
> > 000000008020001e
> > [171878.245832] RDX: 0000000000000001 RSI: 000000008020001e RDI:
> > 0000000000000268
> > [171878.245833] RBP: ffffa33b84d47e70 R08: 0000000000000001 R09:
> > 0000000000000001
> > [171878.245834] R10: ffffffffaec73500 R11: 0000000000000000 R12:
> > 0000000000000000
> > [171878.245835] R13: ffff8d9a0994d400 R14: ffff8d9f9e1f9c20 R15:
> > ffff8d9a092b1800
> > [171878.245838]  ? cifs_oplock_break+0x1e9/0x5d0 [cifs]
> > [171878.245871]  process_one_work+0x220/0x3c0
> > [171878.245874]  worker_thread+0x50/0x370
> > [171878.245876]  kthread+0x12f/0x150
> > [171878.245878]  ? process_one_work+0x3c0/0x3c0
> > [171878.245880]  ? __kthread_bind_mask+0x70/0x70
> > [171878.245882]  ret_from_fork+0x22/0x30
> > [171878.245887] ---[ end trace fefd5c5bed217748 ]---
> > [171878.245892] ------------[ cut here ]------------
> >
> > On Tue, Mar 9, 2021 at 3:11 AM Rohith Surabattula
> > <rohiths.msft@gmail.com> wrote:
> > >
> > > Hi All,
> > >
> > > Please find the attached patch which will defer the close to server.
> > > So, performance can be improved.
> > >
> > > i.e When file is open, write, close, open, read, close....
> > > As close is deferred and oplock is held, cache will not be invalidated
> > > and same handle can be used for second open.
> > >
> > > Please review the changes and let me know your thoughts.
> > >
> > > Regards,
> > > Rohith
> >
> >
> >
> > --
> > Thanks,
> >
> > Steve
>
>
>
> --
> Thanks,
>
> Steve

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-12 19:35     ` Rohith Surabattula
@ 2021-04-19 23:03       ` Steve French
  2021-04-28  3:30         ` Rohith Surabattula
  0 siblings, 1 reply; 16+ messages in thread
From: Steve French @ 2021-04-19 23:03 UTC (permalink / raw)
  To: Rohith Surabattula
  Cc: linux-cifs, Pavel Shilovsky, Shyam Prasad N, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

[-- Attachment #1: Type: text/plain, Size: 7264 bytes --]

Rohith's followon patch to fix the oops is attached.  I lightly
updated it to fix a checkpatch warning and a minor merge conflict.

Will try it out with xfstests later tonight.

On Mon, Apr 12, 2021 at 2:35 PM Rohith Surabattula
<rohiths.msft@gmail.com> wrote:
>
> Hi Steve,
>
> I didn't observe when I ran locally against Azure.
> Let me try to reproduce.
>
> Regards,
> Rohith
>
> On Mon, Apr 12, 2021 at 10:54 PM Steve French <smfrench@gmail.com> wrote:
> >
> > I was running to Samba server as the target
> >
> > On Mon, Apr 12, 2021 at 12:23 PM Steve French <smfrench@gmail.com> wrote:
> > >
> > > I saw some problems in dmesg ( when running test generic/005) multiple
> > > similar errors.  Anyone else see them?
> > >
> > > Rohith,
> > > Can you repro that?
> > >
> > > [171877.491187] run fstests generic/005 at 2021-04-12 12:20:57
> > > [171878.245576] ------------[ cut here ]------------
> > > [171878.245578] WARNING: CPU: 5 PID: 546266 at
> > > arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x33/0xa0
> > > [171878.245583] Modules linked in: cifs(OE) md4 rfcomm nls_utf8 libdes
> > > ccm cachefiles fscache nf_tables nfnetlink cmac algif_hash
> > > algif_skcipher af_alg bnep nls_iso8859_1 mei_hdcp intel_rapl_msr
> > > x86_pkg_temp_thermal intel_powerclamp snd_sof_pci_intel_cnl
> > > snd_sof_intel_hda_common coretemp snd_soc_hdac_hda soundwire_intel
> > > soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda
> > > kvm_intel snd_sof_pci nouveau snd_sof iwlmvm kvm snd_sof_xtensa_dsp
> > > snd_hda_ext_core snd_soc_acpi_intel_match mac80211 snd_soc_acpi
> > > crct10dif_pclmul soundwire_bus ghash_clmulni_intel aesni_intel
> > > snd_hda_codec_realtek snd_soc_core mxm_wmi snd_hda_codec_generic
> > > drm_ttm_helper crypto_simd ttm cryptd snd_compress snd_hda_codec_hdmi
> > > libarc4 ac97_bus drm_kms_helper snd_pcm_dmaengine rapl btusb cec
> > > intel_cstate snd_hda_intel uvcvideo btrtl rc_core btbcm
> > > videobuf2_vmalloc btintel videobuf2_memops iwlwifi snd_intel_dspcfg
> > > videobuf2_v4l2 fb_sys_fops snd_intel_sdw_acpi thinkpad_acpi serio_raw
> > > videobuf2_common syscopyarea
> > > [171878.245630]  bluetooth snd_hda_codec processor_thermal_device
> > > efi_pstore videodev processor_thermal_rfim sysfillrect
> > > processor_thermal_mbox snd_seq_midi processor_thermal_rapl sysimgblt
> > > snd_hda_core ecdh_generic snd_seq_midi_event snd_hwdep nvidiafb
> > > ucsi_acpi nvram intel_rapl_common input_leds mc ecc mei_me
> > > platform_profile intel_wmi_thunderbolt vgastate typec_ucsi wmi_bmof
> > > elan_i2c ee1004 ledtrig_audio cfg80211 8250_dw fb_ddc joydev snd_pcm
> > > mei i2c_algo_bit intel_soc_dts_iosf intel_pch_thermal snd_rawmidi
> > > typec snd_seq snd_seq_device snd_timer snd soundcore int3403_thermal
> > > int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad mac_hid
> > > sch_fq_codel parport_pc ppdev lp parport drm sunrpc ip_tables x_tables
> > > autofs4 wacom hid_generic usbhid hid xfs btrfs blake2b_generic xor
> > > raid6_pq libcrc32c rtsx_pci_sdmmc crc32_pclmul nvme psmouse e1000e
> > > i2c_i801 intel_lpss_pci i2c_smbus rtsx_pci nvme_core intel_lpss
> > > xhci_pci idma64 xhci_pci_renesas wmi video pinctrl_cannonlake
> > > [171878.245679]  [last unloaded: cifs]
> > > [171878.245680] CPU: 5 PID: 546266 Comm: kworker/5:1 Tainted: G
> > > W  OE     5.12.0-051200rc6-generic #202104042231
> > > [171878.245682] Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS
> > > N2CET54W (1.37 ) 06/20/2020
> > > [171878.245684] Workqueue: cifsoplockd cifs_oplock_break [cifs]
> > > [171878.245720] RIP: 0010:kfence_protect_page+0x33/0xa0
> > > [171878.245724] Code: 53 89 f3 48 8d 75 e4 48 83 ec 10 65 48 8b 04 25
> > > 28 00 00 00 48 89 45 e8 31 c0 e8 d8 f4 d9 ff 48 85 c0 74 06 83 7d e4
> > > 01 74 06 <0f> 0b 31 c0 eb 39 48 8b 38 48 89 c2 84 db 75 47 48 89 f8 0f
> > > 1f 40
> > > [171878.245726] RSP: 0018:ffffa33b84d47c20 EFLAGS: 00010046
> > > [171878.245727] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
> > > ffffa33b84d47c24
> > > [171878.245729] RDX: ffffa33b84d47c24 RSI: 0000000000000000 RDI:
> > > 0000000000000000
> > > [171878.245730] RBP: ffffa33b84d47c40 R08: 0000000000000000 R09:
> > > 0000000000000000
> > > [171878.245731] R10: 0000000000000000 R11: 0000000000000000 R12:
> > > 0000000000000000
> > > [171878.245732] R13: ffffa33b84d47d68 R14: ffffa33b84d47d68 R15:
> > > 0000000000000000
> > > [171878.245734] FS:  0000000000000000(0000) GS:ffff8da17bb40000(0000)
> > > knlGS:0000000000000000
> > > [171878.245736] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > [171878.245737] CR2: 0000000000000268 CR3: 0000000484e10003 CR4:
> > > 00000000003706e0
> > > [171878.245739] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> > > 0000000000000000
> > > [171878.245740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> > > 0000000000000400
> > > [171878.245741] Call Trace:
> > > [171878.245744]  kfence_unprotect+0x17/0x30
> > > [171878.245746]  kfence_handle_page_fault+0x97/0x250
> > > [171878.245748]  page_fault_oops+0x88/0x130
> > > [171878.245751]  do_user_addr_fault+0x323/0x670
> > > [171878.245754]  ? cifsFileInfo_put_final+0x10a/0x120 [cifs]
> > > [171878.245784]  exc_page_fault+0x6c/0x150
> > > [171878.245808]  asm_exc_page_fault+0x1e/0x30
> > > [171878.245811] RIP: 0010:_raw_spin_lock+0xc/0x30
> > > [171878.245814] Code: ba 01 00 00 00 f0 0f b1 17 75 01 c3 55 89 c6 48
> > > 89 e5 e8 d7 42 4b ff 66 90 5d c3 0f 1f 00 0f 1f 44 00 00 31 c0 ba 01
> > > 00 00 00 <f0> 0f b1 17 75 01 c3 55 89 c6 48 89 e5 e8 b2 42 4b ff 66 90
> > > 5d c3
> > > [171878.245830] RSP: 0018:ffffa33b84d47e10 EFLAGS: 00010246
> > > [171878.245831] RAX: 0000000000000000 RBX: ffff8d9a0994d510 RCX:
> > > 000000008020001e
> > > [171878.245832] RDX: 0000000000000001 RSI: 000000008020001e RDI:
> > > 0000000000000268
> > > [171878.245833] RBP: ffffa33b84d47e70 R08: 0000000000000001 R09:
> > > 0000000000000001
> > > [171878.245834] R10: ffffffffaec73500 R11: 0000000000000000 R12:
> > > 0000000000000000
> > > [171878.245835] R13: ffff8d9a0994d400 R14: ffff8d9f9e1f9c20 R15:
> > > ffff8d9a092b1800
> > > [171878.245838]  ? cifs_oplock_break+0x1e9/0x5d0 [cifs]
> > > [171878.245871]  process_one_work+0x220/0x3c0
> > > [171878.245874]  worker_thread+0x50/0x370
> > > [171878.245876]  kthread+0x12f/0x150
> > > [171878.245878]  ? process_one_work+0x3c0/0x3c0
> > > [171878.245880]  ? __kthread_bind_mask+0x70/0x70
> > > [171878.245882]  ret_from_fork+0x22/0x30
> > > [171878.245887] ---[ end trace fefd5c5bed217748 ]---
> > > [171878.245892] ------------[ cut here ]------------
> > >
> > > On Tue, Mar 9, 2021 at 3:11 AM Rohith Surabattula
> > > <rohiths.msft@gmail.com> wrote:
> > > >
> > > > Hi All,
> > > >
> > > > Please find the attached patch which will defer the close to server.
> > > > So, performance can be improved.
> > > >
> > > > i.e When file is open, write, close, open, read, close....
> > > > As close is deferred and oplock is held, cache will not be invalidated
> > > > and same handle can be used for second open.
> > > >
> > > > Please review the changes and let me know your thoughts.
> > > >
> > > > Regards,
> > > > Rohith
> > >
> > >
> > >
> > > --
> > > Thanks,
> > >
> > > Steve
> >
> >
> >
> > --
> > Thanks,
> >
> > Steve



-- 
Thanks,

Steve

[-- Attachment #2: 0002-Cifs-Fix-kernel-oops-caused-by-deferred-close-for-fi.patch --]
[-- Type: text/x-patch, Size: 3463 bytes --]

From 81e4e8c7ad9b528502d1f8dc03b5f941f7a85112 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 19 Apr 2021 19:02:03 +0000
Subject: [PATCH] Cifs: Fix kernel oops caused by deferred close for files.

Fix regression issue caused by deferred close for files.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
---
 fs/cifs/cifsproto.h |  2 ++
 fs/cifs/file.c      |  2 +-
 fs/cifs/inode.c     |  3 ++-
 fs/cifs/misc.c      | 18 ++++++++++++++++++
 4 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e2e56e1f66f5..00e49156910f 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -267,6 +267,8 @@ extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
 
 extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
 
+extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 744cce9357ba..461860279bf0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4860,7 +4860,6 @@ void cifs_oplock_break(struct work_struct *work)
 							     cinode);
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
-	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
 	/*
 	 * When oplock break is received and there are no active
 	 * file handles but cached, then set the flag oplock_break_received.
@@ -4873,6 +4872,7 @@ void cifs_oplock_break(struct work_struct *work)
 		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
+	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index ce7d7173550c..ab1541c5210f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1643,7 +1643,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 		goto unlink_out;
 	}
 
-	cifs_close_deferred_file(CIFS_I(inode));
+	cifs_close_all_deferred_files(tcon);
 	if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = CIFSPOSIXDelFile(xid, tcon, full_path,
@@ -2110,6 +2110,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
 		goto cifs_rename_exit;
 	}
 
+	cifs_close_all_deferred_files(tcon);
 	rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
 			    to_name);
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 5892748b34e6..bf90dfa98d64 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -734,6 +734,24 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
 	}
 }
 
+void
+cifs_close_all_deferred_files(struct cifs_tcon *tcon)
+{
+	struct cifsFileInfo *cfile;
+	struct cifsInodeInfo *cinode;
+	struct list_head *tmp;
+	struct cifs_deferred_close *dclose;
+
+	list_for_each(tmp, &tcon->openFileList) {
+		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+		cinode = CIFS_I(d_inode(cfile->dentry));
+		spin_lock(&cinode->deferred_lock);
+		if (cifs_is_deferred_close(cfile, &dclose))
+			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		spin_unlock(&cinode->deferred_lock);
+	}
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: cifs: Deferred close for files
  2021-04-19 23:03       ` Steve French
@ 2021-04-28  3:30         ` Rohith Surabattula
  0 siblings, 0 replies; 16+ messages in thread
From: Rohith Surabattula @ 2021-04-28  3:30 UTC (permalink / raw)
  To: Steve French
  Cc: linux-cifs, Pavel Shilovsky, Shyam Prasad N, sribhat.msa,
	ronnie sahlberg, Aurélien Aptel

[-- Attachment #1: Type: text/plain, Size: 7754 bytes --]

Hi Steve,

Have updated the patch to fix the xfstest generic/422 failure.

Regards,
Rohith

On Tue, Apr 20, 2021 at 4:34 AM Steve French <smfrench@gmail.com> wrote:
>
> Rohith's followon patch to fix the oops is attached.  I lightly
> updated it to fix a checkpatch warning and a minor merge conflict.
>
> Will try it out with xfstests later tonight.
>
> On Mon, Apr 12, 2021 at 2:35 PM Rohith Surabattula
> <rohiths.msft@gmail.com> wrote:
> >
> > Hi Steve,
> >
> > I didn't observe when I ran locally against Azure.
> > Let me try to reproduce.
> >
> > Regards,
> > Rohith
> >
> > On Mon, Apr 12, 2021 at 10:54 PM Steve French <smfrench@gmail.com> wrote:
> > >
> > > I was running to Samba server as the target
> > >
> > > On Mon, Apr 12, 2021 at 12:23 PM Steve French <smfrench@gmail.com> wrote:
> > > >
> > > > I saw some problems in dmesg ( when running test generic/005) multiple
> > > > similar errors.  Anyone else see them?
> > > >
> > > > Rohith,
> > > > Can you repro that?
> > > >
> > > > [171877.491187] run fstests generic/005 at 2021-04-12 12:20:57
> > > > [171878.245576] ------------[ cut here ]------------
> > > > [171878.245578] WARNING: CPU: 5 PID: 546266 at
> > > > arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x33/0xa0
> > > > [171878.245583] Modules linked in: cifs(OE) md4 rfcomm nls_utf8 libdes
> > > > ccm cachefiles fscache nf_tables nfnetlink cmac algif_hash
> > > > algif_skcipher af_alg bnep nls_iso8859_1 mei_hdcp intel_rapl_msr
> > > > x86_pkg_temp_thermal intel_powerclamp snd_sof_pci_intel_cnl
> > > > snd_sof_intel_hda_common coretemp snd_soc_hdac_hda soundwire_intel
> > > > soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda
> > > > kvm_intel snd_sof_pci nouveau snd_sof iwlmvm kvm snd_sof_xtensa_dsp
> > > > snd_hda_ext_core snd_soc_acpi_intel_match mac80211 snd_soc_acpi
> > > > crct10dif_pclmul soundwire_bus ghash_clmulni_intel aesni_intel
> > > > snd_hda_codec_realtek snd_soc_core mxm_wmi snd_hda_codec_generic
> > > > drm_ttm_helper crypto_simd ttm cryptd snd_compress snd_hda_codec_hdmi
> > > > libarc4 ac97_bus drm_kms_helper snd_pcm_dmaengine rapl btusb cec
> > > > intel_cstate snd_hda_intel uvcvideo btrtl rc_core btbcm
> > > > videobuf2_vmalloc btintel videobuf2_memops iwlwifi snd_intel_dspcfg
> > > > videobuf2_v4l2 fb_sys_fops snd_intel_sdw_acpi thinkpad_acpi serio_raw
> > > > videobuf2_common syscopyarea
> > > > [171878.245630]  bluetooth snd_hda_codec processor_thermal_device
> > > > efi_pstore videodev processor_thermal_rfim sysfillrect
> > > > processor_thermal_mbox snd_seq_midi processor_thermal_rapl sysimgblt
> > > > snd_hda_core ecdh_generic snd_seq_midi_event snd_hwdep nvidiafb
> > > > ucsi_acpi nvram intel_rapl_common input_leds mc ecc mei_me
> > > > platform_profile intel_wmi_thunderbolt vgastate typec_ucsi wmi_bmof
> > > > elan_i2c ee1004 ledtrig_audio cfg80211 8250_dw fb_ddc joydev snd_pcm
> > > > mei i2c_algo_bit intel_soc_dts_iosf intel_pch_thermal snd_rawmidi
> > > > typec snd_seq snd_seq_device snd_timer snd soundcore int3403_thermal
> > > > int340x_thermal_zone int3400_thermal acpi_thermal_rel acpi_pad mac_hid
> > > > sch_fq_codel parport_pc ppdev lp parport drm sunrpc ip_tables x_tables
> > > > autofs4 wacom hid_generic usbhid hid xfs btrfs blake2b_generic xor
> > > > raid6_pq libcrc32c rtsx_pci_sdmmc crc32_pclmul nvme psmouse e1000e
> > > > i2c_i801 intel_lpss_pci i2c_smbus rtsx_pci nvme_core intel_lpss
> > > > xhci_pci idma64 xhci_pci_renesas wmi video pinctrl_cannonlake
> > > > [171878.245679]  [last unloaded: cifs]
> > > > [171878.245680] CPU: 5 PID: 546266 Comm: kworker/5:1 Tainted: G
> > > > W  OE     5.12.0-051200rc6-generic #202104042231
> > > > [171878.245682] Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS
> > > > N2CET54W (1.37 ) 06/20/2020
> > > > [171878.245684] Workqueue: cifsoplockd cifs_oplock_break [cifs]
> > > > [171878.245720] RIP: 0010:kfence_protect_page+0x33/0xa0
> > > > [171878.245724] Code: 53 89 f3 48 8d 75 e4 48 83 ec 10 65 48 8b 04 25
> > > > 28 00 00 00 48 89 45 e8 31 c0 e8 d8 f4 d9 ff 48 85 c0 74 06 83 7d e4
> > > > 01 74 06 <0f> 0b 31 c0 eb 39 48 8b 38 48 89 c2 84 db 75 47 48 89 f8 0f
> > > > 1f 40
> > > > [171878.245726] RSP: 0018:ffffa33b84d47c20 EFLAGS: 00010046
> > > > [171878.245727] RAX: 0000000000000000 RBX: 0000000000000000 RCX:
> > > > ffffa33b84d47c24
> > > > [171878.245729] RDX: ffffa33b84d47c24 RSI: 0000000000000000 RDI:
> > > > 0000000000000000
> > > > [171878.245730] RBP: ffffa33b84d47c40 R08: 0000000000000000 R09:
> > > > 0000000000000000
> > > > [171878.245731] R10: 0000000000000000 R11: 0000000000000000 R12:
> > > > 0000000000000000
> > > > [171878.245732] R13: ffffa33b84d47d68 R14: ffffa33b84d47d68 R15:
> > > > 0000000000000000
> > > > [171878.245734] FS:  0000000000000000(0000) GS:ffff8da17bb40000(0000)
> > > > knlGS:0000000000000000
> > > > [171878.245736] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > > [171878.245737] CR2: 0000000000000268 CR3: 0000000484e10003 CR4:
> > > > 00000000003706e0
> > > > [171878.245739] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
> > > > 0000000000000000
> > > > [171878.245740] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
> > > > 0000000000000400
> > > > [171878.245741] Call Trace:
> > > > [171878.245744]  kfence_unprotect+0x17/0x30
> > > > [171878.245746]  kfence_handle_page_fault+0x97/0x250
> > > > [171878.245748]  page_fault_oops+0x88/0x130
> > > > [171878.245751]  do_user_addr_fault+0x323/0x670
> > > > [171878.245754]  ? cifsFileInfo_put_final+0x10a/0x120 [cifs]
> > > > [171878.245784]  exc_page_fault+0x6c/0x150
> > > > [171878.245808]  asm_exc_page_fault+0x1e/0x30
> > > > [171878.245811] RIP: 0010:_raw_spin_lock+0xc/0x30
> > > > [171878.245814] Code: ba 01 00 00 00 f0 0f b1 17 75 01 c3 55 89 c6 48
> > > > 89 e5 e8 d7 42 4b ff 66 90 5d c3 0f 1f 00 0f 1f 44 00 00 31 c0 ba 01
> > > > 00 00 00 <f0> 0f b1 17 75 01 c3 55 89 c6 48 89 e5 e8 b2 42 4b ff 66 90
> > > > 5d c3
> > > > [171878.245830] RSP: 0018:ffffa33b84d47e10 EFLAGS: 00010246
> > > > [171878.245831] RAX: 0000000000000000 RBX: ffff8d9a0994d510 RCX:
> > > > 000000008020001e
> > > > [171878.245832] RDX: 0000000000000001 RSI: 000000008020001e RDI:
> > > > 0000000000000268
> > > > [171878.245833] RBP: ffffa33b84d47e70 R08: 0000000000000001 R09:
> > > > 0000000000000001
> > > > [171878.245834] R10: ffffffffaec73500 R11: 0000000000000000 R12:
> > > > 0000000000000000
> > > > [171878.245835] R13: ffff8d9a0994d400 R14: ffff8d9f9e1f9c20 R15:
> > > > ffff8d9a092b1800
> > > > [171878.245838]  ? cifs_oplock_break+0x1e9/0x5d0 [cifs]
> > > > [171878.245871]  process_one_work+0x220/0x3c0
> > > > [171878.245874]  worker_thread+0x50/0x370
> > > > [171878.245876]  kthread+0x12f/0x150
> > > > [171878.245878]  ? process_one_work+0x3c0/0x3c0
> > > > [171878.245880]  ? __kthread_bind_mask+0x70/0x70
> > > > [171878.245882]  ret_from_fork+0x22/0x30
> > > > [171878.245887] ---[ end trace fefd5c5bed217748 ]---
> > > > [171878.245892] ------------[ cut here ]------------
> > > >
> > > > On Tue, Mar 9, 2021 at 3:11 AM Rohith Surabattula
> > > > <rohiths.msft@gmail.com> wrote:
> > > > >
> > > > > Hi All,
> > > > >
> > > > > Please find the attached patch which will defer the close to server.
> > > > > So, performance can be improved.
> > > > >
> > > > > i.e When file is open, write, close, open, read, close....
> > > > > As close is deferred and oplock is held, cache will not be invalidated
> > > > > and same handle can be used for second open.
> > > > >
> > > > > Please review the changes and let me know your thoughts.
> > > > >
> > > > > Regards,
> > > > > Rohith
> > > >
> > > >
> > > >
> > > > --
> > > > Thanks,
> > > >
> > > > Steve
> > >
> > >
> > >
> > > --
> > > Thanks,
> > >
> > > Steve
>
>
>
> --
> Thanks,
>
> Steve

[-- Attachment #2: 0002-Cifs-Fix-kernel-oops-caused-by-deferred-close-for-fi (2).patch --]
[-- Type: application/octet-stream, Size: 4916 bytes --]

From 632499231ec80a1343069e346b7c80461c514610 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 19 Apr 2021 19:02:03 +0000
Subject: [PATCH 39/39] Cifs: Fix kernel oops caused by deferred close for
 files.

Fix regression issue caused by deferred close for files.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsproto.h |  2 ++
 fs/cifs/file.c      | 16 ++++++++++++----
 fs/cifs/inode.c     |  3 ++-
 fs/cifs/misc.c      | 17 +++++++++++++++++
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c6dacce87d3a..3c6b97ef39d3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -278,6 +278,8 @@ extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
 
 extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
 
+extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
+
 extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7e97aeabd616..5058252eeae6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -872,6 +872,10 @@ void smb2_deferred_work_close(struct work_struct *work)
 			struct cifsFileInfo, deferred.work);
 
 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+	if (!cfile->deferred_scheduled) {
+		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+		return;
+	}
 	cifs_del_deferred_close(cfile);
 	cfile->deferred_scheduled = false;
 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
@@ -1981,8 +1985,10 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
 
 	if (total_written > 0) {
 		spin_lock(&d_inode(dentry)->i_lock);
-		if (*offset > d_inode(dentry)->i_size)
+		if (*offset > d_inode(dentry)->i_size) {
 			i_size_write(d_inode(dentry), *offset);
+			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
+		}
 		spin_unlock(&d_inode(dentry)->i_lock);
 	}
 	mark_inode_dirty_sync(d_inode(dentry));
@@ -2641,8 +2647,10 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
 
 	if (rc > 0) {
 		spin_lock(&inode->i_lock);
-		if (pos > inode->i_size)
+		if (pos > inode->i_size) {
 			i_size_write(inode, pos);
+			inode->i_blocks = (512 - 1 + pos) >> 9;
+		}
 		spin_unlock(&inode->i_lock);
 	}
 
@@ -4858,7 +4866,6 @@ void cifs_oplock_break(struct work_struct *work)
 							     cinode);
 		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
 	}
-	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
 	/*
 	 * When oplock break is received and there are no active
 	 * file handles but cached, then set the flag oplock_break_received.
@@ -4866,11 +4873,12 @@ void cifs_oplock_break(struct work_struct *work)
 	 */
 	spin_lock(&CIFS_I(inode)->deferred_lock);
 	is_deferred = cifs_is_deferred_close(cfile, &dclose);
-	if (is_deferred) {
+	if (is_deferred && cfile->deferred_scheduled) {
 		cfile->oplock_break_received = true;
 		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
+	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
 	cifs_done_oplock_break(cinode);
 }
 
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index b4326ffcefce..baa9ffb4c446 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1645,7 +1645,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
 		goto unlink_out;
 	}
 
-	cifs_close_deferred_file(CIFS_I(inode));
+	cifs_close_all_deferred_files(tcon);
 	if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = CIFSPOSIXDelFile(xid, tcon, full_path,
@@ -2113,6 +2113,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
 		goto cifs_rename_exit;
 	}
 
+	cifs_close_all_deferred_files(tcon);
 	rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
 			    to_name);
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index e63fbd4a6bfe..524dbdfb7184 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -734,6 +734,23 @@ cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
 	}
 }
 
+void
+cifs_close_all_deferred_files(struct cifs_tcon *tcon)
+{
+	struct cifsFileInfo *cfile;
+	struct cifsInodeInfo *cinode;
+	struct list_head *tmp;
+
+	spin_lock(&tcon->open_file_lock);
+	list_for_each(tmp, &tcon->openFileList) {
+		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+		cinode = CIFS_I(d_inode(cfile->dentry));
+		if (delayed_work_pending(&cfile->deferred))
+			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+	}
+	spin_unlock(&tcon->open_file_lock);
+}
+
 /* parses DFS refferal V3 structure
  * caller is responsible for freeing target_nodes
  * returns:
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2021-04-28  3:30 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-09  9:11 cifs: Deferred close for files Rohith Surabattula
2021-03-11 13:47 ` Shyam Prasad N
2021-03-11 17:55   ` Tom Talpey
2021-03-22 17:07     ` Rohith Surabattula
2021-03-24 14:20       ` Tom Talpey
2021-03-25  2:42         ` Rohith Surabattula
2021-04-07 14:57           ` Rohith Surabattula
2021-04-11 12:19             ` Rohith Surabattula
2021-04-11 18:49               ` Steve French
2021-04-12  3:43                 ` Rohith Surabattula
2021-04-12 16:57                   ` Aurélien Aptel
2021-04-12 17:23 ` Steve French
2021-04-12 17:24   ` Steve French
2021-04-12 19:35     ` Rohith Surabattula
2021-04-19 23:03       ` Steve French
2021-04-28  3:30         ` Rohith Surabattula

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).