All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/6] ceph: don't request caps for idle open files
@ 2020-02-28 11:55 Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

This series make cephfs client not request caps for open files that
idle for a long time. For the case that one active client and multiple
standby clients open the same file, this increase the possibility that
mds issues exclusive caps to the active client.

Yan, Zheng (4):
  ceph: always renew caps if mds_wanted is insufficient
  ceph: consider inode's last read/write when calculating wanted caps
  ceph: simplify calling of ceph_get_fmode()
  ceph: remove delay check logic from ceph_check_caps()

 fs/ceph/caps.c               | 324 +++++++++++++++--------------------
 fs/ceph/file.c               |  39 ++---
 fs/ceph/inode.c              |  19 +-
 fs/ceph/ioctl.c              |   2 +
 fs/ceph/mds_client.c         |   5 -
 fs/ceph/super.h              |  35 ++--
 include/linux/ceph/ceph_fs.h |   1 +
 7 files changed, 188 insertions(+), 237 deletions(-)

changes since v2
 - make __ceph_caps_file_wanted more readable
 - add patch 5 and 6, which fix hung write during testing patch 1~4

-- 
2.21.1

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3 1/6] ceph: always renew caps if mds_wanted is insufficient
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags.
The flag indicates that mds closed session and caps were dropped. This
patch is preparation for not requesting caps for idle open files.

CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also
remove it.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c       | 36 +++++++++++++++---------------------
 fs/ceph/mds_client.c |  5 -----
 fs/ceph/super.h      | 11 +++++------
 3 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d05717397c2a..293920d013ff 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2659,6 +2659,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		}
 	} else {
 		int session_readonly = false;
+		int mds_wanted;
 		if (ci->i_auth_cap &&
 		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
 			struct ceph_mds_session *s = ci->i_auth_cap->session;
@@ -2667,32 +2668,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 			spin_unlock(&s->s_cap_lock);
 		}
 		if (session_readonly) {
-			dout("get_cap_refs %p needed %s but mds%d readonly\n",
+			dout("get_cap_refs %p need %s but mds%d readonly\n",
 			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
 			ret = -EROFS;
 			goto out_unlock;
 		}
 
-		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
-			int mds_wanted;
-			if (READ_ONCE(mdsc->fsc->mount_state) ==
-			    CEPH_MOUNT_SHUTDOWN) {
-				dout("get_cap_refs %p forced umount\n", inode);
-				ret = -EIO;
-				goto out_unlock;
-			}
-			mds_wanted = __ceph_caps_mds_wanted(ci, false);
-			if (need & ~(mds_wanted & need)) {
-				dout("get_cap_refs %p caps were dropped"
-				     " (session killed?)\n", inode);
-				ret = -ESTALE;
-				goto out_unlock;
-			}
-			if (!(file_wanted & ~mds_wanted))
-				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
+		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+			dout("get_cap_refs %p forced umount\n", inode);
+			ret = -EIO;
+			goto out_unlock;
+		}
+		mds_wanted = __ceph_caps_mds_wanted(ci, false);
+		if (need & ~mds_wanted) {
+			dout("get_cap_refs %p need %s > mds_wanted %s\n",
+			     inode, ceph_cap_string(need),
+			     ceph_cap_string(mds_wanted));
+			ret = -ESTALE;
+			goto out_unlock;
 		}
 
-		dout("get_cap_refs %p have %s needed %s\n", inode,
+		dout("get_cap_refs %p have %s need %s\n", inode,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
 out_unlock:
@@ -3646,8 +3642,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		if (cap->mds_wanted | cap->issued)
-			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2da98b6cc064..baf801ba34d9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1411,8 +1411,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	if (cap->mds_wanted | cap->issued)
-		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
@@ -1578,9 +1576,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
 			/* mds did not re-issue stale cap */
 			spin_lock(&ci->i_ceph_lock);
 			cap->issued = cap->implemented = CEPH_CAP_PIN;
-			/* make sure mds knows what we want */
-			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
-				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 			spin_unlock(&ci->i_ceph_lock);
 		}
 	} else if (ev == FORCE_RO) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 37dc1ac8f6c3..48e84d7f48a0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -517,12 +517,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
 #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
 #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
-#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
-#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
-#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
-#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
+#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE	(1 << 9)  /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
+#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
 
 /*
  * Masks of ceph inode work.
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 2/6] ceph: consider inode's last read/write when calculating wanted caps
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 3/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Add i_last_rd and i_last_wr to ceph_inode_info. These two fields are
used to track inode's last read/write, they are updated when getting
caps for read/write.

If there is no read/write on an inode for 'caps_wanted_delay_max'
seconds, __ceph_caps_file_wanted() does not request caps for read/write
even there are open files.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c               | 158 ++++++++++++++++++++++++-----------
 fs/ceph/file.c               |  21 +++--
 fs/ceph/inode.c              |  10 ++-
 fs/ceph/ioctl.c              |   2 +
 fs/ceph/super.h              |  13 ++-
 include/linux/ceph/ceph_fs.h |   1 +
 6 files changed, 144 insertions(+), 61 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 293920d013ff..1bd91ca8aeb4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -971,18 +971,52 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
 	return used;
 }
 
+#define FMODE_WAIT_BIAS 1000
+
 /*
  * wanted, by virtue of open file modes
  */
 int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
 {
-	int i, bits = 0;
-	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
-		if (ci->i_nr_by_mode[i])
-			bits |= 1 << i;
-	}
+	struct ceph_mount_options *opt =
+		ceph_inode_to_client(&ci->vfs_inode)->mount_options;
+	unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
+	unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
+	int bits = 0;
+	const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN);
+	const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD);
+	const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
+	const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
+
+	if (ci->i_nr_by_mode[PIN_SHIFT] > 0)
+		bits |= 1 << PIN_SHIFT;
+
+	if (ci->i_nr_by_mode[RD_SHIFT] > 0) {
+		if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS ||
+		    time_after(ci->i_last_rd, used_cutoff))
+			bits |= 1 << RD_SHIFT;
+	} else if (time_after(ci->i_last_rd, idle_cutoff)) {
+		bits |= 1 << RD_SHIFT;
+	}
+
+	if (ci->i_nr_by_mode[WR_SHIFT] > 0) {
+		if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS ||
+		    time_after(ci->i_last_wr, used_cutoff))
+			bits |= 1 << WR_SHIFT;
+	} else if (time_after(ci->i_last_wr, idle_cutoff)) {
+		bits |= 1 << WR_SHIFT;
+	}
+
+	/* check lazyio only when read/write is wanted */
+	if ((bits & (CEPH_FILE_MODE_RDWR << 1)) &&
+	    ci->i_nr_by_mode[LAZY_SHIFT] > 0)
+		bits |= 1 << LAZY_SHIFT;
+
 	if (bits == 0)
 		return 0;
+	if (bits == (1 << PIN_SHIFT) && !S_ISDIR(ci->vfs_inode.i_mode))
+		return 0;
+
 	return ceph_caps_for_mode(bits >> 1);
 }
 
@@ -1021,14 +1055,6 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 	return mds_wanted;
 }
 
-/*
- * called under i_ceph_lock
- */
-static int __ceph_is_single_caps(struct ceph_inode_info *ci)
-{
-	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
-}
-
 int ceph_is_any_caps(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1856,10 +1882,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
-	if (!(flags & CHECK_CAPS_AUTHONLY) ||
-	    (ci->i_auth_cap && __ceph_is_single_caps(ci)))
-		__cap_delay_cancel(mdsc, ci);
-
 	goto retry_locked;
 retry:
 	spin_lock(&ci->i_ceph_lock);
@@ -2081,9 +2103,16 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	/* Reschedule delayed caps release if we delayed anything */
-	if (delayed)
-		__cap_delay_requeue(mdsc, ci, false);
+	if (list_empty(&ci->i_cap_delay_list)) {
+	    if (delayed) {
+		    /* Reschedule delayed caps release if we delayed anything */
+		    __cap_delay_requeue(mdsc, ci, false);
+	    } else if ((file_wanted & ~CEPH_CAP_PIN) &&
+			!(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		    /* periodically re-calculate caps wanted by open files */
+		    __cap_delay_requeue(mdsc, ci, true);
+	    }
+	}
 
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -2549,8 +2578,9 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got,
  * FIXME: how does a 0 return differ from -EAGAIN?
  */
 enum {
-	NON_BLOCKING	= 1,
-	CHECK_FILELOCK	= 2,
+	/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
+	NON_BLOCKING	= (1 << 8),
+	CHECK_FILELOCK	= (1 << 9),
 };
 
 static int try_get_cap_refs(struct inode *inode, int need, int want,
@@ -2560,7 +2590,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	int ret = 0;
 	int have, implemented;
-	int file_wanted;
 	bool snap_rwsem_locked = false;
 
 	dout("get_cap_refs %p need %s want %s\n", inode,
@@ -2576,15 +2605,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		goto out_unlock;
 	}
 
-	/* make sure file is actually open */
-	file_wanted = __ceph_caps_file_wanted(ci);
-	if ((file_wanted & need) != need) {
-		dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
-		     ceph_cap_string(need), ceph_cap_string(file_wanted));
-		ret = -EBADF;
-		goto out_unlock;
-	}
-
 	/* finish pending truncate */
 	while (ci->i_truncate_pending) {
 		spin_unlock(&ci->i_ceph_lock);
@@ -2692,6 +2712,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
 out_unlock:
+
+	__ceph_touch_fmode(ci, mdsc, flags);
+
 	spin_unlock(&ci->i_ceph_lock);
 	if (snap_rwsem_locked)
 		up_read(&mdsc->snap_rwsem);
@@ -2729,10 +2752,20 @@ static void check_max_size(struct inode *inode, loff_t endoff)
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
 
+static inline int get_used_fmode(int caps)
+{
+	int fmode = 0;
+	if (caps & CEPH_CAP_FILE_RD)
+		fmode |= CEPH_FILE_MODE_RD;
+	if (caps & CEPH_CAP_FILE_WR)
+		fmode |= CEPH_FILE_MODE_WR;
+	return fmode;
+}
+
 int ceph_try_get_caps(struct inode *inode, int need, int want,
 		      bool nonblock, int *got)
 {
-	int ret;
+	int ret, flags;
 
 	BUG_ON(need & ~CEPH_CAP_FILE_RD);
 	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO|CEPH_CAP_FILE_SHARED));
@@ -2740,8 +2773,11 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
 	if (ret < 0)
 		return ret;
 
-	ret = try_get_cap_refs(inode, need, want, 0,
-			       (nonblock ? NON_BLOCKING : 0), got);
+	flags = get_used_fmode(need | want);
+	if (nonblock)
+		flags |= NON_BLOCKING;
+
+	ret = try_get_cap_refs(inode, need, want, 0, flags, got);
 	return ret == -EAGAIN ? 0 : ret;
 }
 
@@ -2767,11 +2803,15 @@ int ceph_get_caps(struct file *filp, int need, int want,
 	    fi->filp_gen != READ_ONCE(fsc->filp_gen))
 		return -EBADF;
 
+	flags = get_used_fmode(need | want);
+
 	while (true) {
 		if (endoff > 0)
 			check_max_size(inode, endoff);
 
-		flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
+		flags &= CEPH_FILE_MODE_MASK;
+		if (atomic_read(&fi->num_locks))
+			flags |= CHECK_FILELOCK;
 		_got = 0;
 		ret = try_get_cap_refs(inode, need, want, endoff,
 				       flags, &_got);
@@ -2791,6 +2831,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
 			list_add(&cw.list, &mdsc->cap_wait_list);
 			spin_unlock(&mdsc->caps_list_lock);
 
+			/* make sure used fmode not timeout */
+			ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
 			add_wait_queue(&ci->i_cap_wq, &wait);
 
 			flags |= NON_BLOCKING;
@@ -2804,6 +2846,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
 			}
 
 			remove_wait_queue(&ci->i_cap_wq, &wait);
+			ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
 
 			spin_lock(&mdsc->caps_list_lock);
 			list_del(&cw.list);
@@ -2823,7 +2866,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
 		if (ret < 0) {
 			if (ret == -ESTALE) {
 				/* session was killed, try renew caps */
-				ret = ceph_renew_caps(inode);
+				ret = ceph_renew_caps(inode, flags);
 				if (ret == 0)
 					continue;
 			}
@@ -4121,6 +4164,31 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 	dout("flush_dirty_caps done\n");
 }
 
+void __ceph_touch_fmode(struct ceph_inode_info *ci,
+			struct ceph_mds_client *mdsc, int fmode)
+{
+	unsigned long now = jiffies;
+	if (fmode & CEPH_FILE_MODE_RD)
+		ci->i_last_rd = now;
+	if (fmode & CEPH_FILE_MODE_WR)
+		ci->i_last_wr = now;
+	/* queue periodic check */
+	if (fmode && list_empty(&ci->i_cap_delay_list))
+		__cap_delay_requeue(mdsc, ci, true);
+}
+
+void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
+{
+	int i;
+	int bits = (fmode << 1) | 1;
+	spin_lock(&ci->i_ceph_lock);
+	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
+		if (bits & (1 << i))
+			ci->i_nr_by_mode[i] += count;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+}
+
 void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
 {
 	int i;
@@ -4136,26 +4204,18 @@ void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
  * we may need to release capabilities to the MDS (or schedule
  * their delayed release).
  */
-void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
+void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
 {
-	int i, last = 0;
+	int i;
 	int bits = (fmode << 1) | 1;
 	spin_lock(&ci->i_ceph_lock);
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
 		if (bits & (1 << i)) {
-			BUG_ON(ci->i_nr_by_mode[i] == 0);
-			if (--ci->i_nr_by_mode[i] == 0)
-				last++;
+			BUG_ON(ci->i_nr_by_mode[i] < count);
+			ci->i_nr_by_mode[i] -= count;
 		}
 	}
-	dout("put_fmode %p fmode %d {%d,%d,%d,%d}\n",
-	     &ci->vfs_inode, fmode,
-	     ci->i_nr_by_mode[0], ci->i_nr_by_mode[1],
-	     ci->i_nr_by_mode[2], ci->i_nr_by_mode[3]);
 	spin_unlock(&ci->i_ceph_lock);
-
-	if (last && ci->i_vino.snap == CEPH_NOSNAP)
-		ceph_check_caps(ci, 0, NULL);
 }
 
 /*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7db97304bf06..786b72e7696f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 		struct ceph_dir_file_info *dfi =
 			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
 		if (!dfi) {
-			ceph_put_fmode(ci, fmode); /* clean up */
+			ceph_put_fmode(ci, fmode, 1); /* clean up */
 			return -ENOMEM;
 		}
 
@@ -224,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	} else {
 		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
 		if (!fi) {
-			ceph_put_fmode(ci, fmode); /* clean up */
+			ceph_put_fmode(ci, fmode, 1); /* clean up */
 			return -ENOMEM;
 		}
 
@@ -263,7 +263,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 	case S_IFLNK:
 		dout("init_file %p %p 0%o (symlink)\n", inode, file,
 		     inode->i_mode);
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		break;
 
 	default:
@@ -273,7 +273,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		 * we need to drop the open ref now, since we don't
 		 * have .release set to ceph_release.
 		 */
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		BUG_ON(inode->i_fop->release == ceph_release);
 
 		/* call the proper open fop */
@@ -285,14 +285,15 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 /*
  * try renew caps after session gets killed.
  */
-int ceph_renew_caps(struct inode *inode)
+int ceph_renew_caps(struct inode *inode, int fmode)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	int err, flags, wanted;
 
 	spin_lock(&ci->i_ceph_lock);
+	__ceph_touch_fmode(ci, mdsc, fmode);
 	wanted = __ceph_caps_file_wanted(ci);
 	if (__ceph_is_any_real_caps(ci) &&
 	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
@@ -405,6 +406,7 @@ int ceph_open(struct inode *inode, struct file *file)
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
 		   (ci->i_snap_caps & wanted) == wanted) {
 		__ceph_get_fmode(ci, fmode);
+		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
 	}
@@ -525,7 +527,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 out_req:
 	if (!req->r_err && req->r_target_inode)
-		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
+		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode, 1);
 	ceph_mdsc_put_request(req);
 out_ctx:
 	ceph_release_acl_sec_ctx(&as_ctx);
@@ -542,7 +544,7 @@ int ceph_release(struct inode *inode, struct file *file)
 		dout("release inode %p dir file %p\n", inode, file);
 		WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
 
-		ceph_put_fmode(ci, dfi->file_info.fmode);
+		ceph_put_fmode(ci, dfi->file_info.fmode, 1);
 
 		if (dfi->last_readdir)
 			ceph_mdsc_put_request(dfi->last_readdir);
@@ -554,7 +556,8 @@ int ceph_release(struct inode *inode, struct file *file)
 		dout("release inode %p regular file %p\n", inode, file);
 		WARN_ON(!list_empty(&fi->rw_contexts));
 
-		ceph_put_fmode(ci, fi->fmode);
+		ceph_put_fmode(ci, fi->fmode, 1);
+
 		kmem_cache_free(ceph_file_cachep, fi);
 	}
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 094b8fc37787..95e7440cf6f7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -478,6 +478,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_head_snapc = NULL;
 	ci->i_snap_caps = 0;
 
+	ci->i_last_rd = ci->i_last_wr = jiffies;
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
 		ci->i_nr_by_mode[i] = 0;
 
@@ -637,7 +638,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
 			if ((issued & (CEPH_CAP_FILE_CACHE|
 				       CEPH_CAP_FILE_BUFFER)) ||
 			    mapping_mapped(inode->i_mapping) ||
-			    __ceph_caps_file_wanted(ci)) {
+			    __ceph_is_file_opened(ci)) {
 				ci->i_truncate_pending++;
 				queue_trunc = 1;
 			}
@@ -1010,6 +1011,13 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 			fill_inline = true;
 	}
 
+	if (cap_fmode >= 0) {
+		if (!info_caps)
+			pr_warn("mds issued no caps on %llx.%llx\n",
+				ceph_vinop(inode));
+		__ceph_touch_fmode(ci, mdsc, cap_fmode);
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (fill_inline)
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index c90f03beb15d..6e061bf62ad4 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -243,11 +243,13 @@ static long ceph_ioctl_lazyio(struct file *file)
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 
 	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
 		spin_lock(&ci->i_ceph_lock);
 		fi->fmode |= CEPH_FILE_MODE_LAZY;
 		ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
+		__ceph_touch_fmode(ci, mdsc, fi->fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		dout("ioctl_layzio: file %p marked lazy\n", file);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 48e84d7f48a0..8ce210cc62c9 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -361,6 +361,8 @@ struct ceph_inode_info {
 						    dirty|flushing caps */
 	unsigned i_snap_caps;           /* cap bits for snapped files */
 
+	unsigned long i_last_rd;
+	unsigned long i_last_wr;
 	int i_nr_by_mode[CEPH_FILE_MODE_BITS];  /* open file counts */
 
 	struct mutex i_truncate_mutex;
@@ -673,6 +675,10 @@ extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
 extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
 extern int __ceph_caps_used(struct ceph_inode_info *ci);
 
+static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
+{
+	return ci->i_nr_by_mode[0];
+}
 extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
 extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
 
@@ -1074,7 +1080,10 @@ extern int ceph_try_get_caps(struct inode *inode,
 
 /* for counting open files by mode */
 extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
-extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
+extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
+extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
+extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
+			       struct ceph_mds_client *mdsc, int fmode);
 
 /* addr.c */
 extern const struct address_space_operations ceph_aops;
@@ -1086,7 +1095,7 @@ extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 
-extern int ceph_renew_caps(struct inode *inode);
+extern int ceph_renew_caps(struct inode *inode, int fmode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index cb21c5cf12c3..8017130a08a1 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -564,6 +564,7 @@ struct ceph_filelock {
 #define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
 #define CEPH_FILE_MODE_LAZY       4  /* lazy io */
 #define CEPH_FILE_MODE_BITS       4
+#define CEPH_FILE_MODE_MASK       ((1 << CEPH_FILE_MODE_BITS) - 1)
 
 int ceph_flags_to_mode(int flags);
 
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 3/6] ceph: simplify calling of ceph_get_fmode()
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 4/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Originally, calling ceph_get_fmode() for open files is by thread that
handles request reply. The reason is that there is a small window
between updating caps and request initiator gets woken up. we need to
prevent ceph_check_caps() from releasing wanted caps in the window.

Previous patch make fill_inode() call __ceph_touch_fmode() for open file
request. This prevents ceph_check_caps() from releasing wanted caps for
'caps_wanted_delay_min' seconds, enough for request initiator to get
woken up and call ceph_get_fmode(). So we can call ceph_get_fmode() in
ceph_open() now.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c  | 26 +++-----------------------
 fs/ceph/file.c  | 21 +++++----------------
 fs/ceph/inode.c |  8 +-------
 fs/ceph/super.h |  3 +--
 4 files changed, 10 insertions(+), 48 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1bd91ca8aeb4..ccee4a0814d7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -606,7 +606,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
  */
 void ceph_add_cap(struct inode *inode,
 		  struct ceph_mds_session *session, u64 cap_id,
-		  int fmode, unsigned issued, unsigned wanted,
+		  unsigned issued, unsigned wanted,
 		  unsigned seq, unsigned mseq, u64 realmino, int flags,
 		  struct ceph_cap **new_cap)
 {
@@ -622,13 +622,6 @@ void ceph_add_cap(struct inode *inode,
 	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
 	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
 
-	/*
-	 * If we are opening the file, include file mode wanted bits
-	 * in wanted.
-	 */
-	if (fmode >= 0)
-		wanted |= ceph_caps_for_mode(fmode);
-
 	spin_lock(&session->s_gen_ttl_lock);
 	gen = session->s_cap_gen;
 	spin_unlock(&session->s_gen_ttl_lock);
@@ -753,9 +746,6 @@ void ceph_add_cap(struct inode *inode,
 	cap->issue_seq = seq;
 	cap->mseq = mseq;
 	cap->cap_gen = gen;
-
-	if (fmode >= 0)
-		__ceph_get_fmode(ci, fmode);
 }
 
 /*
@@ -3732,7 +3722,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		/* add placeholder for the export tagert */
 		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
 		tcap = new_cap;
-		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
+		ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
 			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
 
 		if (!list_empty(&ci->i_cap_flush_list) &&
@@ -3837,7 +3827,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 	__ceph_caps_issued(ci, &issued);
 	issued |= __ceph_caps_dirty(ci);
 
-	ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
+	ceph_add_cap(inode, session, cap_id, caps, wanted, seq, mseq,
 		     realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
 
 	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
@@ -4189,16 +4179,6 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
 	spin_unlock(&ci->i_ceph_lock);
 }
 
-void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
-{
-	int i;
-	int bits = (fmode << 1) | 1;
-	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
-		if (bits & (1 << i))
-			ci->i_nr_by_mode[i]++;
-	}
-}
-
 /*
  * Drop open file reference.  If we were the last open file,
  * we may need to release capabilities to the MDS (or schedule
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 786b72e7696f..88da7f68e513 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -212,10 +212,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	if (isdir) {
 		struct ceph_dir_file_info *dfi =
 			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
-		if (!dfi) {
-			ceph_put_fmode(ci, fmode, 1); /* clean up */
+		if (!dfi)
 			return -ENOMEM;
-		}
 
 		file->private_data = dfi;
 		fi = &dfi->file_info;
@@ -223,15 +221,15 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 		dfi->readdir_cache_idx = -1;
 	} else {
 		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
-		if (!fi) {
-			ceph_put_fmode(ci, fmode, 1); /* clean up */
+		if (!fi)
 			return -ENOMEM;
-		}
 
 		file->private_data = fi;
 	}
 
+	ceph_get_fmode(ci, fmode, 1);
 	fi->fmode = fmode;
+
 	spin_lock_init(&fi->rw_contexts_lock);
 	INIT_LIST_HEAD(&fi->rw_contexts);
 	fi->meta_err = errseq_sample(&ci->i_meta_err);
@@ -263,7 +261,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 	case S_IFLNK:
 		dout("init_file %p %p 0%o (symlink)\n", inode, file,
 		     inode->i_mode);
-		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		break;
 
 	default:
@@ -273,7 +270,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		 * we need to drop the open ref now, since we don't
 		 * have .release set to ceph_release.
 		 */
-		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		BUG_ON(inode->i_fop->release == ceph_release);
 
 		/* call the proper open fop */
@@ -327,7 +323,6 @@ int ceph_renew_caps(struct inode *inode, int fmode)
 	req->r_inode = inode;
 	ihold(inode);
 	req->r_num_caps = 1;
-	req->r_fmode = -1;
 
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	ceph_mdsc_put_request(req);
@@ -373,9 +368,6 @@ int ceph_open(struct inode *inode, struct file *file)
 
 	/* trivially open snapdir */
 	if (ceph_snap(inode) == CEPH_SNAPDIR) {
-		spin_lock(&ci->i_ceph_lock);
-		__ceph_get_fmode(ci, fmode);
-		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
 	}
 
@@ -393,7 +385,7 @@ int ceph_open(struct inode *inode, struct file *file)
 		dout("open %p fmode %d want %s issued %s using existing\n",
 		     inode, fmode, ceph_cap_string(wanted),
 		     ceph_cap_string(issued));
-		__ceph_get_fmode(ci, fmode);
+		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 
 		/* adjust wanted? */
@@ -405,7 +397,6 @@ int ceph_open(struct inode *inode, struct file *file)
 		return ceph_init_file(inode, file, fmode);
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
 		   (ci->i_snap_caps & wanted) == wanted) {
-		__ceph_get_fmode(ci, fmode);
 		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
@@ -526,8 +517,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = finish_open(file, dentry, ceph_open);
 	}
 out_req:
-	if (!req->r_err && req->r_target_inode)
-		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode, 1);
 	ceph_mdsc_put_request(req);
 out_ctx:
 	ceph_release_acl_sec_ctx(&as_ctx);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 95e7440cf6f7..0b0f503c84c3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -968,7 +968,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 		if (ceph_snap(inode) == CEPH_NOSNAP) {
 			ceph_add_cap(inode, session,
 				     le64_to_cpu(info->cap.cap_id),
-				     cap_fmode, info_caps,
+				     info_caps,
 				     le32_to_cpu(info->cap.wanted),
 				     le32_to_cpu(info->cap.seq),
 				     le32_to_cpu(info->cap.mseq),
@@ -993,13 +993,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
 			dout(" %p got snap_caps %s\n", inode,
 			     ceph_cap_string(info_caps));
 			ci->i_snap_caps |= info_caps;
-			if (cap_fmode >= 0)
-				__ceph_get_fmode(ci, cap_fmode);
 		}
-	} else if (cap_fmode >= 0) {
-		pr_warn("mds issued no caps on %llx.%llx\n",
-			   ceph_vinop(inode));
-		__ceph_get_fmode(ci, cap_fmode);
 	}
 
 	if (iinfo->inline_version > 0 &&
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8ce210cc62c9..d89478db8b24 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1037,7 +1037,7 @@ extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 				     struct ceph_cap_reservation *ctx);
 extern void ceph_add_cap(struct inode *inode,
 			 struct ceph_mds_session *session, u64 cap_id,
-			 int fmode, unsigned issued, unsigned wanted,
+			 unsigned issued, unsigned wanted,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
@@ -1079,7 +1079,6 @@ extern int ceph_try_get_caps(struct inode *inode,
 			     int need, int want, bool nonblock, int *got);
 
 /* for counting open files by mode */
-extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
 extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
 extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
 extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 4/6] ceph: remove delay check logic from ceph_check_caps()
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (2 preceding siblings ...)
  2020-02-28 11:55 ` [PATCH v3 3/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

__ceph_caps_file_wanted() already checks 'caps_wanted_delay_min' and
'caps_wanted_delay_max'. There is no need to duplicte the logic in
ceph_check_caps() and __send_cap()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c  | 146 ++++++++++++------------------------------------
 fs/ceph/file.c  |  13 ++---
 fs/ceph/inode.c |   1 -
 fs/ceph/super.h |   8 +--
 4 files changed, 43 insertions(+), 125 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ccee4a0814d7..29f39058aca7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -490,13 +490,10 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
-
-	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    opt->caps_wanted_delay_min * HZ);
 	ci->i_hold_caps_max = round_jiffies(jiffies +
 					    opt->caps_wanted_delay_max * HZ);
-	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
-	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
+	dout("__cap_set_timeouts %p %lu\n", &ci->vfs_inode,
+	     ci->i_hold_caps_max - jiffies);
 }
 
 /*
@@ -508,8 +505,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
  *    -> we take mdsc->cap_delay_lock
  */
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
-				struct ceph_inode_info *ci,
-				bool set_timeout)
+				struct ceph_inode_info *ci)
 {
 	dout("__cap_delay_requeue %p flags %d at %lu\n", &ci->vfs_inode,
 	     ci->i_ceph_flags, ci->i_hold_caps_max);
@@ -520,8 +516,7 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
 				goto no_change;
 			list_del_init(&ci->i_cap_delay_list);
 		}
-		if (set_timeout)
-			__cap_set_timeouts(mdsc, ci);
+		__cap_set_timeouts(mdsc, ci);
 		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
 no_change:
 		spin_unlock(&mdsc->cap_delay_lock);
@@ -719,7 +714,7 @@ void ceph_add_cap(struct inode *inode,
 		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
 		     ceph_cap_string(issued), ceph_cap_string(wanted),
 		     ceph_cap_string(actual_wanted));
-		__cap_delay_requeue(mdsc, ci, true);
+		__cap_delay_requeue(mdsc, ci);
 	}
 
 	if (flags & CEPH_CAP_FLAG_AUTH) {
@@ -1307,7 +1302,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	struct cap_msg_args arg;
 	int held, revoking;
 	int wake = 0;
-	int delayed = 0;
 	int ret;
 
 	held = cap->issued | cap->implemented;
@@ -1320,28 +1314,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	     ceph_cap_string(revoking));
 	BUG_ON((retain & CEPH_CAP_PIN) == 0);
 
-	arg.session = cap->session;
-
-	/* don't release wanted unless we've waited a bit. */
-	if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-	    time_before(jiffies, ci->i_hold_caps_min)) {
-		dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(cap->issued & retain),
-		     ceph_cap_string(cap->mds_wanted),
-		     ceph_cap_string(want));
-		want |= cap->mds_wanted;
-		retain |= cap->issued;
-		delayed = 1;
-	}
-	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
-	if (want & ~cap->mds_wanted) {
-		/* user space may open/close single file frequently.
-		 * This avoids droping mds_wanted immediately after
-		 * requesting new mds_wanted.
-		 */
-		__cap_set_timeouts(mdsc, ci);
-	}
+	ci->i_ceph_flags &= ~CEPH_I_FLUSH;
 
 	cap->issued &= retain;  /* drop bits we don't want */
 	if (cap->implemented & ~cap->issued) {
@@ -1356,6 +1329,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	cap->implemented &= cap->issued | used;
 	cap->mds_wanted = want;
 
+	arg.session = cap->session;
 	arg.ino = ceph_vino(inode).ino;
 	arg.cid = cap->cap_id;
 	arg.follows = flushing ? ci->i_head_snapc->seq : 0;
@@ -1416,14 +1390,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 
 	ret = send_cap_msg(&arg);
 	if (ret < 0) {
-		dout("error sending cap msg, must requeue %p\n", inode);
-		delayed = 1;
+		pr_err("error sending cap msg, ino (%llx.%llx) "
+		       "flushing %s tid %llu, requeue\n",
+		       ceph_vinop(inode), ceph_cap_string(flushing),
+		       flush_tid);
+		spin_lock(&ci->i_ceph_lock);
+		__cap_delay_requeue(mdsc, ci);
+		spin_unlock(&ci->i_ceph_lock);
 	}
 
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
-	return delayed;
+	return ret;
 }
 
 static inline int __send_flush_snap(struct inode *inode,
@@ -1687,7 +1666,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
 	    (mask & CEPH_CAP_FILE_BUFFER))
 		dirty |= I_DIRTY_DATASYNC;
-	__cap_delay_requeue(mdsc, ci, true);
+	__cap_delay_requeue(mdsc, ci);
 	return dirty;
 }
 
@@ -1838,8 +1817,6 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
  * versus held caps.  Release, flush, ack revoked caps to mds as
  * appropriate.
  *
- *  CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
- *    cap release further.
  *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
  *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
  *    further delay.
@@ -1858,17 +1835,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
 	struct rb_node *p;
-	int delayed = 0, sent = 0;
-	bool no_delay = flags & CHECK_CAPS_NODELAY;
 	bool queue_invalidate = false;
 	bool tried_invalidate = false;
 
-	/* if we are unmounting, flush any unused caps immediately. */
-	if (mdsc->stopping)
-		no_delay = true;
-
 	spin_lock(&ci->i_ceph_lock);
-
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
@@ -1914,14 +1884,13 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	}
 
 	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
-	     " issued %s revoking %s retain %s %s%s%s\n", inode,
+	     " issued %s revoking %s retain %s %s%s\n", inode,
 	     ceph_cap_string(file_wanted),
 	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
 	     ceph_cap_string(ci->i_flushing_caps),
 	     ceph_cap_string(issued), ceph_cap_string(revoking),
 	     ceph_cap_string(retain),
 	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
-	     (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
 	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
 
 	/*
@@ -1929,7 +1898,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	 * have cached pages, but don't want them, then try to invalidate.
 	 * If we fail, it's because pages are locked.... try again later.
 	 */
-	if ((!no_delay || mdsc->stopping) &&
+	if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) &&
 	    S_ISREG(inode->i_mode) &&
 	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
 	    inode->i_data.nrpages &&		/* have cached pages */
@@ -2009,21 +1978,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		if ((cap->issued & ~retain) == 0)
 			continue;     /* nope, all good */
 
-		if (no_delay)
-			goto ack;
-
-		/* delay? */
-		if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-		    time_before(jiffies, ci->i_hold_caps_max)) {
-			dout(" delaying issued %s -> %s, wanted %s -> %s\n",
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(cap->issued & retain),
-			     ceph_cap_string(cap->mds_wanted),
-			     ceph_cap_string(want));
-			delayed++;
-			continue;
-		}
-
 ack:
 		if (session && session != cap->session) {
 			dout("oops, wrong session %p mutex\n", session);
@@ -2084,24 +2038,18 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		}
 
 		mds = cap->mds;  /* remember mds, so we don't repeat */
-		sent++;
 
 		/* __send_cap drops i_ceph_lock */
-		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0,
-				cap_used, want, retain, flushing,
-				flush_tid, oldest_flush_tid);
+		__send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0, cap_used, want,
+			   retain, flushing, flush_tid, oldest_flush_tid);
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	if (list_empty(&ci->i_cap_delay_list)) {
-	    if (delayed) {
-		    /* Reschedule delayed caps release if we delayed anything */
-		    __cap_delay_requeue(mdsc, ci, false);
-	    } else if ((file_wanted & ~CEPH_CAP_PIN) &&
-			!(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
-		    /* periodically re-calculate caps wanted by open files */
-		    __cap_delay_requeue(mdsc, ci, true);
-	    }
+	/* periodically re-calculate caps wanted by open files */
+	if (list_empty(&ci->i_cap_delay_list) &&
+	    (file_wanted & ~CEPH_CAP_PIN) &&
+	    !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		__cap_delay_requeue(mdsc, ci);
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2131,7 +2079,6 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 retry_locked:
 	if (ci->i_dirty_caps && ci->i_auth_cap) {
 		struct ceph_cap *cap = ci->i_auth_cap;
-		int delayed;
 
 		if (session != cap->session) {
 			spin_unlock(&ci->i_ceph_lock);
@@ -2160,18 +2107,10 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 						 &oldest_flush_tid);
 
 		/* __send_cap drops i_ceph_lock */
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-				     CEPH_CLIENT_CAPS_SYNC,
-				     __ceph_caps_used(ci),
-				     __ceph_caps_wanted(ci),
-				     (cap->issued | cap->implemented),
-				     flushing, flush_tid, oldest_flush_tid);
-
-		if (delayed) {
-			spin_lock(&ci->i_ceph_lock);
-			__cap_delay_requeue(mdsc, ci, true);
-			spin_unlock(&ci->i_ceph_lock);
-		}
+		__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
+			   __ceph_caps_used(ci), __ceph_caps_wanted(ci),
+			   (cap->issued | cap->implemented),
+			   flushing, flush_tid, oldest_flush_tid);
 	} else {
 		if (!list_empty(&ci->i_cap_flush_list)) {
 			struct ceph_cap_flush *cf =
@@ -2371,22 +2310,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 		if (cf->caps) {
 			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
 			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
-			ci->i_ceph_flags |= CEPH_I_NODELAY;
-
-			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+			__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
 					 (cf->tid < last_snap_flush ?
 					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
 					  __ceph_caps_used(ci),
 					  __ceph_caps_wanted(ci),
 					  (cap->issued | cap->implemented),
 					  cf->caps, cf->tid, oldest_flush_tid);
-			if (ret) {
-				pr_err("kick_flushing_caps: error sending "
-					"cap flush, ino (%llx.%llx) "
-					"tid %llu flushing %s\n",
-					ceph_vinop(inode), cf->tid,
-					ceph_cap_string(cf->caps));
-			}
 		} else {
 			struct ceph_cap_snap *capsnap =
 					container_of(cf, struct ceph_cap_snap,
@@ -3005,7 +2935,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
 	     last ? " last" : "", put ? " put" : "");
 
-	if (last && !flushsnaps)
+	if (last)
 		ceph_check_caps(ci, 0, NULL);
 	else if (flushsnaps)
 		ceph_flush_snaps(ci, NULL);
@@ -3423,10 +3353,10 @@ static void handle_cap_grant(struct inode *inode,
 		wake_up_all(&ci->i_cap_wq);
 
 	if (check_caps == 1)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL,
 				session);
 	else if (check_caps == 2)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+		ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session);
 	else
 		mutex_unlock(&session->s_mutex);
 }
@@ -4101,7 +4031,6 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 {
 	struct inode *inode;
 	struct ceph_inode_info *ci;
-	int flags = CHECK_CAPS_NODELAY;
 
 	dout("check_delayed_caps\n");
 	while (1) {
@@ -4121,7 +4050,7 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 
 		if (inode) {
 			dout("check_delayed_caps on %p\n", inode);
-			ceph_check_caps(ci, flags, NULL);
+			ceph_check_caps(ci, 0, NULL);
 			/* avoid calling iput_final() in tick thread */
 			ceph_async_iput(inode);
 		}
@@ -4146,7 +4075,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 		ihold(inode);
 		dout("flush_dirty_caps %p\n", inode);
 		spin_unlock(&mdsc->cap_dirty_lock);
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+		ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
 		iput(inode);
 		spin_lock(&mdsc->cap_dirty_lock);
 	}
@@ -4164,7 +4093,7 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
 		ci->i_last_wr = now;
 	/* queue periodic check */
 	if (fmode && list_empty(&ci->i_cap_delay_list))
-		__cap_delay_requeue(mdsc, ci, true);
+		__cap_delay_requeue(mdsc, ci);
 }
 
 void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
@@ -4213,7 +4142,6 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 	if (inode->i_nlink == 1) {
 		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
 
-		ci->i_ceph_flags |= CEPH_I_NODELAY;
 		if (__ceph_caps_dirty(ci)) {
 			struct ceph_mds_client *mdsc =
 				ceph_inode_to_client(inode)->mdsc;
@@ -4269,8 +4197,6 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 		if (force || (cap->issued & drop)) {
 			if (cap->issued & drop) {
 				int wanted = __ceph_caps_wanted(ci);
-				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
-					wanted |= cap->mds_wanted;
 				dout("encode_inode_release %p cap %p "
 				     "%s -> %s, wanted %s -> %s\n", inode, cap,
 				     ceph_cap_string(cap->issued),
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88da7f68e513..5d300a41ea08 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1552,7 +1552,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
 		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
-			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
+			ceph_check_caps(ci, 0, NULL);
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -2152,15 +2152,10 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	inode_inc_iversion_raw(dst_inode);
 
 	if (dst_off > size) {
-		int caps_flags = 0;
-
 		/* Let the MDS know about dst file size change */
-		if (ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
-			caps_flags |= CHECK_CAPS_NODELAY;
-		if (ceph_inode_set_size(dst_inode, dst_off))
-			caps_flags |= CHECK_CAPS_AUTHONLY;
-		if (caps_flags)
-			ceph_check_caps(dst_ci, caps_flags, NULL);
+		if (ceph_inode_set_size(dst_inode, dst_off) ||
+		    ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
+			ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY, NULL);
 	}
 	/* Mark Fw dirty */
 	spin_lock(&dst_ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0f503c84c3..5a8fa8a2d3cf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -471,7 +471,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_prealloc_cap_flush = NULL;
 	INIT_LIST_HEAD(&ci->i_cap_flush_list);
 	init_waitqueue_head(&ci->i_cap_wq);
-	ci->i_hold_caps_min = 0;
 	ci->i_hold_caps_max = 0;
 	INIT_LIST_HEAD(&ci->i_cap_delay_list);
 	INIT_LIST_HEAD(&ci->i_cap_snaps);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index d89478db8b24..e586cff3dfd5 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -170,9 +170,9 @@ struct ceph_cap {
 	struct list_head caps_item;
 };
 
-#define CHECK_CAPS_NODELAY    1  /* do not delay any further */
-#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
-#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
+#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
+#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
 
 struct ceph_cap_flush {
 	u64 tid;
@@ -352,7 +352,6 @@ struct ceph_inode_info {
 	struct ceph_cap_flush *i_prealloc_cap_flush;
 	struct list_head i_cap_flush_list;
 	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
-	unsigned long i_hold_caps_min; /* jiffies */
 	unsigned long i_hold_caps_max; /* jiffies */
 	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
 	struct ceph_cap_reservation i_cap_migration_resv;
@@ -513,7 +512,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
  * Ceph inode.
  */
 #define CEPH_I_DIR_ORDERED	(1 << 0)  /* dentries in dir are ordered */
-#define CEPH_I_NODELAY		(1 << 1)  /* do not delay cap release */
 #define CEPH_I_FLUSH		(1 << 2)  /* do not delay flush of dirty metadata */
 #define CEPH_I_POOL_PERM	(1 << 3)  /* pool rd/wr bits are valid */
 #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (3 preceding siblings ...)
  2020-02-28 11:55 ` [PATCH v3 4/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 11:55 ` [PATCH v3 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

because non-auth mds can't do anything to 'update max' cap message.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 29f39058aca7..49f773247044 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1339,7 +1339,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	arg.size = inode->i_size;
 	ci->i_reported_size = arg.size;
 	arg.max_size = ci->i_wanted_max_size;
-	ci->i_requested_max_size = arg.max_size;
+	if (cap == ci->i_auth_cap)
+		ci->i_requested_max_size = arg.max_size;
 
 	if (flushing & CEPH_CAP_XATTR_EXCL) {
 		old_blob = __ceph_build_xattrs_blob(ci);
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH v3 6/6] ceph: check all mds' caps after page writeback
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (4 preceding siblings ...)
  2020-02-28 11:55 ` [PATCH v3 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
@ 2020-02-28 11:55 ` Yan, Zheng
  2020-02-28 13:01 ` [PATCH v3 0/6] ceph: don't request caps for idle open files Jeff Layton
  2020-03-02 19:53 ` Jeff Layton
  7 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-02-28 11:55 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

If an inode has caps from multiple mds, following case can happen.

- non-auth mds revokes Fsc. Fcb is used, so page writeback is queued.
- when writeback finishes, ceph_check_caps() is called with auth only
  flag. ceph_check_caps() invalidates pagecache, but skip checking any
  non-auth caps.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c  | 2 +-
 fs/ceph/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 49f773247044..9b3d5816c109 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3018,7 +3018,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (last) {
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ci, 0, NULL);
 	} else if (flush_snaps) {
 		ceph_flush_snaps(ci, NULL);
 	}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5a8fa8a2d3cf..896d30820035 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1974,7 +1974,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
 	mutex_unlock(&ci->i_truncate_mutex);
 
 	if (wrbuffer_refs == 0)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ci, 0, NULL);
 
 	wake_up_all(&ci->i_cap_wq);
 }
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 0/6] ceph: don't request caps for idle open files
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (5 preceding siblings ...)
  2020-02-28 11:55 ` [PATCH v3 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
@ 2020-02-28 13:01 ` Jeff Layton
  2020-03-02 19:53 ` Jeff Layton
  7 siblings, 0 replies; 12+ messages in thread
From: Jeff Layton @ 2020-02-28 13:01 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Fri, 2020-02-28 at 19:55 +0800, Yan, Zheng wrote:
> This series make cephfs client not request caps for open files that
> idle for a long time. For the case that one active client and multiple
> standby clients open the same file, this increase the possibility that
> mds issues exclusive caps to the active client.
> 
> Yan, Zheng (4):
>   ceph: always renew caps if mds_wanted is insufficient
>   ceph: consider inode's last read/write when calculating wanted caps
>   ceph: simplify calling of ceph_get_fmode()
>   ceph: remove delay check logic from ceph_check_caps()
> 
>  fs/ceph/caps.c               | 324 +++++++++++++++--------------------
>  fs/ceph/file.c               |  39 ++---
>  fs/ceph/inode.c              |  19 +-
>  fs/ceph/ioctl.c              |   2 +
>  fs/ceph/mds_client.c         |   5 -
>  fs/ceph/super.h              |  35 ++--
>  include/linux/ceph/ceph_fs.h |   1 +
>  7 files changed, 188 insertions(+), 237 deletions(-)
> 
> changes since v2
>  - make __ceph_caps_file_wanted more readable
>  - add patch 5 and 6, which fix hung write during testing patch 1~4
> 

Thanks Zheng. This looks good to me -- merged into testing branch with
some small revisions to the changelogs. Let me know if I made any
mistakes there.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 0/6] ceph: don't request caps for idle open files
  2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (6 preceding siblings ...)
  2020-02-28 13:01 ` [PATCH v3 0/6] ceph: don't request caps for idle open files Jeff Layton
@ 2020-03-02 19:53 ` Jeff Layton
  2020-03-03 16:23   ` Yan, Zheng
  7 siblings, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2020-03-02 19:53 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel; +Cc: Ilya Dryomov, Patrick Donnelly

On Fri, 2020-02-28 at 19:55 +0800, Yan, Zheng wrote:
> This series make cephfs client not request caps for open files that
> idle for a long time. For the case that one active client and multiple
> standby clients open the same file, this increase the possibility that
> mds issues exclusive caps to the active client.
> 
> Yan, Zheng (4):
>   ceph: always renew caps if mds_wanted is insufficient
>   ceph: consider inode's last read/write when calculating wanted caps
>   ceph: simplify calling of ceph_get_fmode()
>   ceph: remove delay check logic from ceph_check_caps()
> 
>  fs/ceph/caps.c               | 324 +++++++++++++++--------------------
>  fs/ceph/file.c               |  39 ++---
>  fs/ceph/inode.c              |  19 +-
>  fs/ceph/ioctl.c              |   2 +
>  fs/ceph/mds_client.c         |   5 -
>  fs/ceph/super.h              |  35 ++--
>  include/linux/ceph/ceph_fs.h |   1 +
>  7 files changed, 188 insertions(+), 237 deletions(-)
> 
> changes since v2
>  - make __ceph_caps_file_wanted more readable
>  - add patch 5 and 6, which fix hung write during testing patch 1~4
> 

This patch series causes some serious slowdown in the async dirops
patches that I've not yet fully tracked down, and I suspect that they
may also be the culprit in these bugs:

    https://tracker.ceph.com/issues/44381
    https://tracker.ceph.com/issues/44382

I'm going to drop this series from the testing branch for now, until we
can track down the issue.

Thanks,
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 0/6] ceph: don't request caps for idle open files
  2020-03-02 19:53 ` Jeff Layton
@ 2020-03-03 16:23   ` Yan, Zheng
  2020-03-03 20:17     ` Jeff Layton
  0 siblings, 1 reply; 12+ messages in thread
From: Yan, Zheng @ 2020-03-03 16:23 UTC (permalink / raw)
  To: Jeff Layton, ceph-devel; +Cc: Ilya Dryomov, Patrick Donnelly

On 3/3/20 3:53 AM, Jeff Layton wrote:
> On Fri, 2020-02-28 at 19:55 +0800, Yan, Zheng wrote:
>> This series make cephfs client not request caps for open files that
>> idle for a long time. For the case that one active client and multiple
>> standby clients open the same file, this increase the possibility that
>> mds issues exclusive caps to the active client.
>>
>> Yan, Zheng (4):
>>    ceph: always renew caps if mds_wanted is insufficient
>>    ceph: consider inode's last read/write when calculating wanted caps
>>    ceph: simplify calling of ceph_get_fmode()
>>    ceph: remove delay check logic from ceph_check_caps()
>>
>>   fs/ceph/caps.c               | 324 +++++++++++++++--------------------
>>   fs/ceph/file.c               |  39 ++---
>>   fs/ceph/inode.c              |  19 +-
>>   fs/ceph/ioctl.c              |   2 +
>>   fs/ceph/mds_client.c         |   5 -
>>   fs/ceph/super.h              |  35 ++--
>>   include/linux/ceph/ceph_fs.h |   1 +
>>   7 files changed, 188 insertions(+), 237 deletions(-)
>>
>> changes since v2
>>   - make __ceph_caps_file_wanted more readable
>>   - add patch 5 and 6, which fix hung write during testing patch 1~4
>>
> 
> This patch series causes some serious slowdown in the async dirops
> patches that I've not yet fully tracked down, and I suspect that they
> may also be the culprit in these bugs:
> 

slow down which tests?

>      https://tracker.ceph.com/issues/44381

this is because I forgot to check if inode is snap when queue delayed 
check. But it can't explain slow down.

>      https://tracker.ceph.com/issues/44382
> 
> I'm going to drop this series from the testing branch for now, until we
> can track down the issue.
> 
> Thanks,
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 0/6] ceph: don't request caps for idle open files
  2020-03-03 16:23   ` Yan, Zheng
@ 2020-03-03 20:17     ` Jeff Layton
  2020-03-04 13:41       ` Yan, Zheng
  0 siblings, 1 reply; 12+ messages in thread
From: Jeff Layton @ 2020-03-03 20:17 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel; +Cc: Ilya Dryomov, Patrick Donnelly

On Wed, 2020-03-04 at 00:23 +0800, Yan, Zheng wrote:
> On 3/3/20 3:53 AM, Jeff Layton wrote:
> > On Fri, 2020-02-28 at 19:55 +0800, Yan, Zheng wrote:
> > > This series make cephfs client not request caps for open files that
> > > idle for a long time. For the case that one active client and multiple
> > > standby clients open the same file, this increase the possibility that
> > > mds issues exclusive caps to the active client.
> > > 
> > > Yan, Zheng (4):
> > >    ceph: always renew caps if mds_wanted is insufficient
> > >    ceph: consider inode's last read/write when calculating wanted caps
> > >    ceph: simplify calling of ceph_get_fmode()
> > >    ceph: remove delay check logic from ceph_check_caps()
> > > 
> > >   fs/ceph/caps.c               | 324 +++++++++++++++--------------------
> > >   fs/ceph/file.c               |  39 ++---
> > >   fs/ceph/inode.c              |  19 +-
> > >   fs/ceph/ioctl.c              |   2 +
> > >   fs/ceph/mds_client.c         |   5 -
> > >   fs/ceph/super.h              |  35 ++--
> > >   include/linux/ceph/ceph_fs.h |   1 +
> > >   7 files changed, 188 insertions(+), 237 deletions(-)
> > > 
> > > changes since v2
> > >   - make __ceph_caps_file_wanted more readable
> > >   - add patch 5 and 6, which fix hung write during testing patch 1~4
> > > 
> > 
> > This patch series causes some serious slowdown in the async dirops
> > patches that I've not yet fully tracked down, and I suspect that they
> > may also be the culprit in these bugs:
> > 
> 
> slow down which tests?
> 

Most of the simple tests I was doing to sanity check async dirops.
Basically, this script was not seeing speed gain with async dirops
enabled:

-----------------8<-------------------
#!/bin/sh

MOUNTPOINT=/mnt/cephfs
TESTDIR=$MOUNTPOINT/test-dirops.$$

mkdir $TESTDIR
stat $TESTDIR
echo "Creating files in $TESTDIR"
time for i in `seq 1 10000`; do
    echo "foobarbaz" > $TESTDIR/$i
done
echo; echo "sync"
time sync
echo "Starting rm"
time rm -f $TESTDIR/*
echo; echo "rmdir"
time rmdir $TESTDIR
echo; echo "sync"
time sync
-----------------8<-------------------

It mostly seemed like it was just not getting caps in some cases.
Cranking up dynamic_debug seemed to make the problem go away, which led
me to believe there was probably a race condition in there.

At this point, I've gone ahead and merged the async dirops patches into
testing, so if you could rebase this on top of the current testing
branch and repost, I'll test them out again.

> >      https://tracker.ceph.com/issues/44381
> 
> this is because I forgot to check if inode is snap when queue delayed 
> check. But it can't explain slow down.
>

Ok, good to know.

> >      https://tracker.ceph.com/issues/44382
> > 
> > I'm going to drop this series from the testing branch for now, until we
> > can track down the issue.
> > 
> > 

Thanks,
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v3 0/6] ceph: don't request caps for idle open files
  2020-03-03 20:17     ` Jeff Layton
@ 2020-03-04 13:41       ` Yan, Zheng
  0 siblings, 0 replies; 12+ messages in thread
From: Yan, Zheng @ 2020-03-04 13:41 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Yan, Zheng, ceph-devel, Ilya Dryomov, Patrick Donnelly

On Wed, Mar 4, 2020 at 5:48 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Wed, 2020-03-04 at 00:23 +0800, Yan, Zheng wrote:
> > On 3/3/20 3:53 AM, Jeff Layton wrote:
> > > On Fri, 2020-02-28 at 19:55 +0800, Yan, Zheng wrote:
> > > > This series make cephfs client not request caps for open files that
> > > > idle for a long time. For the case that one active client and multiple
> > > > standby clients open the same file, this increase the possibility that
> > > > mds issues exclusive caps to the active client.
> > > >
> > > > Yan, Zheng (4):
> > > >    ceph: always renew caps if mds_wanted is insufficient
> > > >    ceph: consider inode's last read/write when calculating wanted caps
> > > >    ceph: simplify calling of ceph_get_fmode()
> > > >    ceph: remove delay check logic from ceph_check_caps()
> > > >
> > > >   fs/ceph/caps.c               | 324 +++++++++++++++--------------------
> > > >   fs/ceph/file.c               |  39 ++---
> > > >   fs/ceph/inode.c              |  19 +-
> > > >   fs/ceph/ioctl.c              |   2 +
> > > >   fs/ceph/mds_client.c         |   5 -
> > > >   fs/ceph/super.h              |  35 ++--
> > > >   include/linux/ceph/ceph_fs.h |   1 +
> > > >   7 files changed, 188 insertions(+), 237 deletions(-)
> > > >
> > > > changes since v2
> > > >   - make __ceph_caps_file_wanted more readable
> > > >   - add patch 5 and 6, which fix hung write during testing patch 1~4
> > > >
> > >
> > > This patch series causes some serious slowdown in the async dirops
> > > patches that I've not yet fully tracked down, and I suspect that they
> > > may also be the culprit in these bugs:
> > >
> >
> > slow down which tests?
> >
>
> Most of the simple tests I was doing to sanity check async dirops.
> Basically, this script was not seeing speed gain with async dirops
> enabled:
>
> -----------------8<-------------------
> #!/bin/sh
>
> MOUNTPOINT=/mnt/cephfs
> TESTDIR=$MOUNTPOINT/test-dirops.$$
>
> mkdir $TESTDIR
> stat $TESTDIR
> echo "Creating files in $TESTDIR"
> time for i in `seq 1 10000`; do
>     echo "foobarbaz" > $TESTDIR/$i
> done
> echo; echo "sync"
> time sync
> echo "Starting rm"
> time rm -f $TESTDIR/*
> echo; echo "rmdir"
> time rmdir $TESTDIR
> echo; echo "sync"
> time sync
> -----------------8<-------------------
>
> It mostly seemed like it was just not getting caps in some cases.
> Cranking up dynamic_debug seemed to make the problem go away, which led
> me to believe there was probably a race condition in there.
>

should be fixed by patch "mds: update dentry lease for async create".
I saw dput() from ceph_mdsc_release_request() pruned dentry and
cleared dir's complete flags.  The issue happens if client gets safe
replies quickly.

I also found that mds  revoke Fsx when fragmenting dirfrags. Fixed by
following PR
https://github.com/ceph/ceph/pull/33719

> At this point, I've gone ahead and merged the async dirops patches into
> testing, so if you could rebase this on top of the current testing
> branch and repost, I'll test them out again.
>
> > >      https://tracker.ceph.com/issues/44381
> >
> > this is because I forgot to check if inode is snap when queue delayed
> > check. But it can't explain slow down.
> >
>
> Ok, good to know.
>
> > >      https://tracker.ceph.com/issues/44382
> > >
> > > I'm going to drop this series from the testing branch for now, until we
> > > can track down the issue.
> > >
> > >
>
> Thanks,
> --
> Jeff Layton <jlayton@kernel.org>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-03-04 13:42 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-28 11:55 [PATCH v3 0/6] ceph: don't request caps for idle open files Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 3/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 4/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
2020-02-28 11:55 ` [PATCH v3 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
2020-02-28 13:01 ` [PATCH v3 0/6] ceph: don't request caps for idle open files Jeff Layton
2020-03-02 19:53 ` Jeff Layton
2020-03-03 16:23   ` Yan, Zheng
2020-03-03 20:17     ` Jeff Layton
2020-03-04 13:41       ` Yan, Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.