All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v4 0/6] ceph: don't request caps for idle open files
@ 2020-03-04 17:32 Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
                   ` (6 more replies)
  0 siblings, 7 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

This series make cephfs client not request caps for open files that
idle for a long time. For the case that one active client and multiple
standby clients open the same file, this increase the possibility that
mds issues exclusive caps to the active client.

Yan, Zheng (6):
  ceph: always renew caps if mds_wanted is insufficient
  ceph: consider inode's last read/write when calculating wanted caps
  ceph: remove delay check logic from ceph_check_caps()
  ceph: simplify calling of ceph_get_fmode()
  ceph: update i_requested_max_size only when sending cap msg to auth mds
  ceph: check all mds' caps after page writeback

 fs/ceph/caps.c               | 338 ++++++++++++++++-------------------
 fs/ceph/file.c               |  45 ++---
 fs/ceph/inode.c              |  21 +--
 fs/ceph/ioctl.c              |   2 +
 fs/ceph/mds_client.c         |   5 -
 fs/ceph/super.h              |  37 ++--
 include/linux/ceph/ceph_fs.h |   1 +
 7 files changed, 202 insertions(+), 247 deletions(-)

changes since v2
 - make __ceph_caps_file_wanted() more readable
 - add patch 5 and 6, which fix hung write during testing patch 1~4

changes since v3
 - don't queue delayed cap check for snap inode
 - initialize ci->{last_rd,last_wr} to jiffies - 3600 * HZ
 - make __ceph_caps_file_wanted() check inode type

-- 
2.21.1

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v4 1/6] ceph: always renew caps if mds_wanted is insufficient
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Original code only renews caps for inodes with CEPH_I_CAP_DROPPED flag,
which indicates that mds has closed the session and caps were dropped.
Remove this flag in preparation for not requesting caps for idle open
files.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c       | 36 +++++++++++++++---------------------
 fs/ceph/mds_client.c |  5 -----
 fs/ceph/super.h      | 13 ++++++-------
 3 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index be85225b962c..a75e5eb3740e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2686,6 +2686,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		}
 	} else {
 		int session_readonly = false;
+		int mds_wanted;
 		if (ci->i_auth_cap &&
 		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
 			struct ceph_mds_session *s = ci->i_auth_cap->session;
@@ -2694,32 +2695,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 			spin_unlock(&s->s_cap_lock);
 		}
 		if (session_readonly) {
-			dout("get_cap_refs %p needed %s but mds%d readonly\n",
+			dout("get_cap_refs %p need %s but mds%d readonly\n",
 			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
 			ret = -EROFS;
 			goto out_unlock;
 		}
 
-		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
-			int mds_wanted;
-			if (READ_ONCE(mdsc->fsc->mount_state) ==
-			    CEPH_MOUNT_SHUTDOWN) {
-				dout("get_cap_refs %p forced umount\n", inode);
-				ret = -EIO;
-				goto out_unlock;
-			}
-			mds_wanted = __ceph_caps_mds_wanted(ci, false);
-			if (need & ~(mds_wanted & need)) {
-				dout("get_cap_refs %p caps were dropped"
-				     " (session killed?)\n", inode);
-				ret = -ESTALE;
-				goto out_unlock;
-			}
-			if (!(file_wanted & ~mds_wanted))
-				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
+		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+			dout("get_cap_refs %p forced umount\n", inode);
+			ret = -EIO;
+			goto out_unlock;
+		}
+		mds_wanted = __ceph_caps_mds_wanted(ci, false);
+		if (need & ~mds_wanted) {
+			dout("get_cap_refs %p need %s > mds_wanted %s\n",
+			     inode, ceph_cap_string(need),
+			     ceph_cap_string(mds_wanted));
+			ret = -ESTALE;
+			goto out_unlock;
 		}
 
-		dout("get_cap_refs %p have %s needed %s\n", inode,
+		dout("get_cap_refs %p have %s need %s\n", inode,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
 out_unlock:
@@ -3678,8 +3674,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		if (cap->mds_wanted | cap->issued)
-			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 177acb9f456b..68b8afded466 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1514,8 +1514,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	if (cap->mds_wanted | cap->issued)
-		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
@@ -1681,9 +1679,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
 			/* mds did not re-issue stale cap */
 			spin_lock(&ci->i_ceph_lock);
 			cap->issued = cap->implemented = CEPH_CAP_PIN;
-			/* make sure mds knows what we want */
-			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
-				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 			spin_unlock(&ci->i_ceph_lock);
 		}
 	} else if (ev == FORCE_RO) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 22b89eaeaad4..356ad7b46b85 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -522,13 +522,12 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
 #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
 #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
-#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
-#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
-#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
-#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
-#define CEPH_ASYNC_CREATE_BIT	(13)	  /* async create in flight for this */
+#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE	(1 << 9) /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
+#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
+#define CEPH_ASYNC_CREATE_BIT	(12)	  /* async create in flight for this */
 #define CEPH_I_ASYNC_CREATE	(1 << CEPH_ASYNC_CREATE_BIT)
 
 /*
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 2/6] ceph: consider inode's last read/write when calculating wanted caps
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 3/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Add i_last_rd and i_last_wr to ceph_inode_info. These fields are
used to track the last time the client acquired read/write caps for
the inode.

If there is no read/write on an inode for 'caps_wanted_delay_max'
seconds, __ceph_caps_file_wanted() does not request caps for read/write
even there are open files.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c               | 161 ++++++++++++++++++++++++-----------
 fs/ceph/file.c               |  21 +++--
 fs/ceph/inode.c              |  10 ++-
 fs/ceph/ioctl.c              |   2 +
 fs/ceph/super.h              |  13 ++-
 include/linux/ceph/ceph_fs.h |   1 +
 6 files changed, 148 insertions(+), 60 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a75e5eb3740e..6a3b75b8311e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -978,16 +978,52 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
 	return used;
 }
 
+#define FMODE_WAIT_BIAS 1000
+
 /*
  * wanted, by virtue of open file modes
  */
 int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
 {
-	int i, bits = 0;
-	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
-		if (ci->i_nr_by_mode[i])
-			bits |= 1 << i;
+	int bits = 0;
+
+	if (S_ISDIR(ci->vfs_inode.i_mode)) {
+		const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN);
+		if (ci->i_nr_by_mode[PIN_SHIFT] > 0)
+			bits |= 1 << PIN_SHIFT;
+	} else {
+		const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD);
+		const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
+		const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
+		struct ceph_mount_options *opt =
+			ceph_inode_to_client(&ci->vfs_inode)->mount_options;
+		unsigned long used_cutoff =
+			jiffies - opt->caps_wanted_delay_max * HZ;
+		unsigned long idle_cutoff =
+			jiffies - opt->caps_wanted_delay_min * HZ;
+
+		if (ci->i_nr_by_mode[RD_SHIFT] > 0) {
+			if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS ||
+			    time_after(ci->i_last_rd, used_cutoff))
+				bits |= 1 << RD_SHIFT;
+		} else if (time_after(ci->i_last_rd, idle_cutoff)) {
+			bits |= 1 << RD_SHIFT;
+		}
+
+		if (ci->i_nr_by_mode[WR_SHIFT] > 0) {
+			if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS ||
+			    time_after(ci->i_last_wr, used_cutoff))
+				bits |= 1 << WR_SHIFT;
+		} else if (time_after(ci->i_last_wr, idle_cutoff)) {
+			bits |= 1 << WR_SHIFT;
+		}
+
+		/* check lazyio only when read/write is wanted */
+		if ((bits & (CEPH_FILE_MODE_RDWR << 1)) &&
+		    ci->i_nr_by_mode[LAZY_SHIFT] > 0)
+			bits |= 1 << LAZY_SHIFT;
 	}
+
 	if (bits == 0)
 		return 0;
 	return ceph_caps_for_mode(bits >> 1);
@@ -1032,14 +1068,6 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
 	return mds_wanted;
 }
 
-/*
- * called under i_ceph_lock
- */
-static int __ceph_is_single_caps(struct ceph_inode_info *ci)
-{
-	return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
-}
-
 int ceph_is_any_caps(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1877,10 +1905,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
-	if (!(flags & CHECK_CAPS_AUTHONLY) ||
-	    (ci->i_auth_cap && __ceph_is_single_caps(ci)))
-		__cap_delay_cancel(mdsc, ci);
-
 	goto retry_locked;
 retry:
 	spin_lock(&ci->i_ceph_lock);
@@ -2105,9 +2129,17 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	/* Reschedule delayed caps release if we delayed anything */
-	if (delayed)
-		__cap_delay_requeue(mdsc, ci, false);
+	if (list_empty(&ci->i_cap_delay_list)) {
+	    if (delayed) {
+		    /* Reschedule delayed caps release if we delayed anything */
+		    __cap_delay_requeue(mdsc, ci, false);
+	    } else if (__ceph_is_any_real_caps(ci) &&
+			(file_wanted & ~CEPH_CAP_PIN) &&
+			!(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		    /* periodically re-calculate caps wanted by open files */
+		    __cap_delay_requeue(mdsc, ci, true);
+	    }
+	}
 
 	spin_unlock(&ci->i_ceph_lock);
 
@@ -2573,8 +2605,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
  * FIXME: how does a 0 return differ from -EAGAIN?
  */
 enum {
-	NON_BLOCKING	= 1,
-	CHECK_FILELOCK	= 2,
+	/* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
+	NON_BLOCKING	= (1 << 8),
+	CHECK_FILELOCK	= (1 << 9),
 };
 
 static int try_get_cap_refs(struct inode *inode, int need, int want,
@@ -2584,7 +2617,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	int ret = 0;
 	int have, implemented;
-	int file_wanted;
 	bool snap_rwsem_locked = false;
 
 	dout("get_cap_refs %p need %s want %s\n", inode,
@@ -2600,15 +2632,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		goto out_unlock;
 	}
 
-	/* make sure file is actually open */
-	file_wanted = __ceph_caps_file_wanted(ci);
-	if ((file_wanted & need) != need) {
-		dout("try_get_cap_refs need %s file_wanted %s, EBADF\n",
-		     ceph_cap_string(need), ceph_cap_string(file_wanted));
-		ret = -EBADF;
-		goto out_unlock;
-	}
-
 	/* finish pending truncate */
 	while (ci->i_truncate_pending) {
 		spin_unlock(&ci->i_ceph_lock);
@@ -2719,6 +2742,9 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
 out_unlock:
+
+	__ceph_touch_fmode(ci, mdsc, flags);
+
 	spin_unlock(&ci->i_ceph_lock);
 	if (snap_rwsem_locked)
 		up_read(&mdsc->snap_rwsem);
@@ -2756,10 +2782,20 @@ static void check_max_size(struct inode *inode, loff_t endoff)
 		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
 }
 
+static inline int get_used_fmode(int caps)
+{
+	int fmode = 0;
+	if (caps & CEPH_CAP_FILE_RD)
+		fmode |= CEPH_FILE_MODE_RD;
+	if (caps & CEPH_CAP_FILE_WR)
+		fmode |= CEPH_FILE_MODE_WR;
+	return fmode;
+}
+
 int ceph_try_get_caps(struct inode *inode, int need, int want,
 		      bool nonblock, int *got)
 {
-	int ret;
+	int ret, flags;
 
 	BUG_ON(need & ~CEPH_CAP_FILE_RD);
 	BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
@@ -2771,8 +2807,11 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
 			return ret;
 	}
 
-	ret = try_get_cap_refs(inode, need, want, 0,
-			       (nonblock ? NON_BLOCKING : 0), got);
+	flags = get_used_fmode(need | want);
+	if (nonblock)
+		flags |= NON_BLOCKING;
+
+	ret = try_get_cap_refs(inode, need, want, 0, flags, got);
 	return ret == -EAGAIN ? 0 : ret;
 }
 
@@ -2798,11 +2837,15 @@ int ceph_get_caps(struct file *filp, int need, int want,
 	    fi->filp_gen != READ_ONCE(fsc->filp_gen))
 		return -EBADF;
 
+	flags = get_used_fmode(need | want);
+
 	while (true) {
 		if (endoff > 0)
 			check_max_size(inode, endoff);
 
-		flags = atomic_read(&fi->num_locks) ? CHECK_FILELOCK : 0;
+		flags &= CEPH_FILE_MODE_MASK;
+		if (atomic_read(&fi->num_locks))
+			flags |= CHECK_FILELOCK;
 		_got = 0;
 		ret = try_get_cap_refs(inode, need, want, endoff,
 				       flags, &_got);
@@ -2822,6 +2865,8 @@ int ceph_get_caps(struct file *filp, int need, int want,
 			list_add(&cw.list, &mdsc->cap_wait_list);
 			spin_unlock(&mdsc->caps_list_lock);
 
+			/* make sure used fmode not timeout */
+			ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
 			add_wait_queue(&ci->i_cap_wq, &wait);
 
 			flags |= NON_BLOCKING;
@@ -2835,6 +2880,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
 			}
 
 			remove_wait_queue(&ci->i_cap_wq, &wait);
+			ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
 
 			spin_lock(&mdsc->caps_list_lock);
 			list_del(&cw.list);
@@ -2854,7 +2900,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
 		if (ret < 0) {
 			if (ret == -ESTALE) {
 				/* session was killed, try renew caps */
-				ret = ceph_renew_caps(inode);
+				ret = ceph_renew_caps(inode, flags);
 				if (ret == 0)
 					continue;
 			}
@@ -4153,6 +4199,33 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 	dout("flush_dirty_caps done\n");
 }
 
+void __ceph_touch_fmode(struct ceph_inode_info *ci,
+			struct ceph_mds_client *mdsc, int fmode)
+{
+	unsigned long now = jiffies;
+	if (fmode & CEPH_FILE_MODE_RD)
+		ci->i_last_rd = now;
+	if (fmode & CEPH_FILE_MODE_WR)
+		ci->i_last_wr = now;
+	/* queue periodic check */
+	if (fmode &&
+	    __ceph_is_any_real_caps(ci) &&
+	    list_empty(&ci->i_cap_delay_list))
+		__cap_delay_requeue(mdsc, ci, true);
+}
+
+void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
+{
+	int i;
+	int bits = (fmode << 1) | 1;
+	spin_lock(&ci->i_ceph_lock);
+	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
+		if (bits & (1 << i))
+			ci->i_nr_by_mode[i] += count;
+	}
+	spin_unlock(&ci->i_ceph_lock);
+}
+
 void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
 {
 	int i;
@@ -4168,26 +4241,18 @@ void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
  * we may need to release capabilities to the MDS (or schedule
  * their delayed release).
  */
-void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
+void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
 {
-	int i, last = 0;
+	int i;
 	int bits = (fmode << 1) | 1;
 	spin_lock(&ci->i_ceph_lock);
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
 		if (bits & (1 << i)) {
-			BUG_ON(ci->i_nr_by_mode[i] == 0);
-			if (--ci->i_nr_by_mode[i] == 0)
-				last++;
+			BUG_ON(ci->i_nr_by_mode[i] < count);
+			ci->i_nr_by_mode[i] -= count;
 		}
 	}
-	dout("put_fmode %p fmode %d {%d,%d,%d,%d}\n",
-	     &ci->vfs_inode, fmode,
-	     ci->i_nr_by_mode[0], ci->i_nr_by_mode[1],
-	     ci->i_nr_by_mode[2], ci->i_nr_by_mode[3]);
 	spin_unlock(&ci->i_ceph_lock);
-
-	if (last && ci->i_vino.snap == CEPH_NOSNAP)
-		ceph_check_caps(ci, 0, NULL);
 }
 
 /*
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 671b141aedfe..c39951e7e289 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,7 +213,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 		struct ceph_dir_file_info *dfi =
 			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
 		if (!dfi) {
-			ceph_put_fmode(ci, fmode); /* clean up */
+			ceph_put_fmode(ci, fmode, 1); /* clean up */
 			return -ENOMEM;
 		}
 
@@ -224,7 +224,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	} else {
 		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
 		if (!fi) {
-			ceph_put_fmode(ci, fmode); /* clean up */
+			ceph_put_fmode(ci, fmode, 1); /* clean up */
 			return -ENOMEM;
 		}
 
@@ -263,7 +263,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 	case S_IFLNK:
 		dout("init_file %p %p 0%o (symlink)\n", inode, file,
 		     inode->i_mode);
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		break;
 
 	default:
@@ -273,7 +273,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		 * we need to drop the open ref now, since we don't
 		 * have .release set to ceph_release.
 		 */
-		ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
+		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		BUG_ON(inode->i_fop->release == ceph_release);
 
 		/* call the proper open fop */
@@ -285,14 +285,15 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 /*
  * try renew caps after session gets killed.
  */
-int ceph_renew_caps(struct inode *inode)
+int ceph_renew_caps(struct inode *inode, int fmode)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	int err, flags, wanted;
 
 	spin_lock(&ci->i_ceph_lock);
+	__ceph_touch_fmode(ci, mdsc, fmode);
 	wanted = __ceph_caps_file_wanted(ci);
 	if (__ceph_is_any_real_caps(ci) &&
 	    (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
@@ -405,6 +406,7 @@ int ceph_open(struct inode *inode, struct file *file)
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
 		   (ci->i_snap_caps & wanted) == wanted) {
 		__ceph_get_fmode(ci, fmode);
+		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
 	}
@@ -781,7 +783,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 out_req:
 	if (!req->r_err && req->r_target_inode)
-		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
+		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode, 1);
 	ceph_mdsc_put_request(req);
 out_ctx:
 	ceph_release_acl_sec_ctx(&as_ctx);
@@ -798,7 +800,7 @@ int ceph_release(struct inode *inode, struct file *file)
 		dout("release inode %p dir file %p\n", inode, file);
 		WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
 
-		ceph_put_fmode(ci, dfi->file_info.fmode);
+		ceph_put_fmode(ci, dfi->file_info.fmode, 1);
 
 		if (dfi->last_readdir)
 			ceph_mdsc_put_request(dfi->last_readdir);
@@ -810,7 +812,8 @@ int ceph_release(struct inode *inode, struct file *file)
 		dout("release inode %p regular file %p\n", inode, file);
 		WARN_ON(!list_empty(&fi->rw_contexts));
 
-		ceph_put_fmode(ci, fi->fmode);
+		ceph_put_fmode(ci, fi->fmode, 1);
+
 		kmem_cache_free(ceph_file_cachep, fi);
 	}
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 73f986efb1fd..a618e8ae9d0f 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -479,6 +479,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_head_snapc = NULL;
 	ci->i_snap_caps = 0;
 
+	ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ;
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++)
 		ci->i_nr_by_mode[i] = 0;
 
@@ -639,7 +640,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
 			if ((issued & (CEPH_CAP_FILE_CACHE|
 				       CEPH_CAP_FILE_BUFFER)) ||
 			    mapping_mapped(inode->i_mapping) ||
-			    __ceph_caps_file_wanted(ci)) {
+			    __ceph_is_file_opened(ci)) {
 				ci->i_truncate_pending++;
 				queue_trunc = 1;
 			}
@@ -1013,6 +1014,13 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 			fill_inline = true;
 	}
 
+	if (cap_fmode >= 0) {
+		if (!info_caps)
+			pr_warn("mds issued no caps on %llx.%llx\n",
+				ceph_vinop(inode));
+		__ceph_touch_fmode(ci, mdsc, cap_fmode);
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (fill_inline)
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index c90f03beb15d..6e061bf62ad4 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -243,11 +243,13 @@ static long ceph_ioctl_lazyio(struct file *file)
 	struct ceph_file_info *fi = file->private_data;
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 
 	if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
 		spin_lock(&ci->i_ceph_lock);
 		fi->fmode |= CEPH_FILE_MODE_LAZY;
 		ci->i_nr_by_mode[ffs(CEPH_FILE_MODE_LAZY)]++;
+		__ceph_touch_fmode(ci, mdsc, fi->fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		dout("ioctl_layzio: file %p marked lazy\n", file);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 356ad7b46b85..63dd8e61ba88 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -366,6 +366,8 @@ struct ceph_inode_info {
 						    dirty|flushing caps */
 	unsigned i_snap_caps;           /* cap bits for snapped files */
 
+	unsigned long i_last_rd;
+	unsigned long i_last_wr;
 	int i_nr_by_mode[CEPH_FILE_MODE_BITS];  /* open file counts */
 
 	struct mutex i_truncate_mutex;
@@ -680,6 +682,10 @@ extern int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
 extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
 extern int __ceph_caps_used(struct ceph_inode_info *ci);
 
+static inline bool __ceph_is_file_opened(struct ceph_inode_info *ci)
+{
+	return ci->i_nr_by_mode[0];
+}
 extern int __ceph_caps_file_wanted(struct ceph_inode_info *ci);
 extern int __ceph_caps_wanted(struct ceph_inode_info *ci);
 
@@ -1093,7 +1099,10 @@ extern int ceph_try_get_caps(struct inode *inode,
 
 /* for counting open files by mode */
 extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
-extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode);
+extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
+extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
+extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
+			       struct ceph_mds_client *mdsc, int fmode);
 
 /* addr.c */
 extern const struct address_space_operations ceph_aops;
@@ -1105,7 +1114,7 @@ extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
 /* file.c */
 extern const struct file_operations ceph_file_fops;
 
-extern int ceph_renew_caps(struct inode *inode);
+extern int ceph_renew_caps(struct inode *inode, int fmode);
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index e035c5194005..ebf5ba62b772 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -568,6 +568,7 @@ struct ceph_filelock {
 #define CEPH_FILE_MODE_RDWR       3  /* RD | WR */
 #define CEPH_FILE_MODE_LAZY       4  /* lazy io */
 #define CEPH_FILE_MODE_BITS       4
+#define CEPH_FILE_MODE_MASK       ((1 << CEPH_FILE_MODE_BITS) - 1)
 
 int ceph_flags_to_mode(int flags);
 
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 3/6] ceph: remove delay check logic from ceph_check_caps()
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 4/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

__ceph_caps_file_wanted() already checks 'caps_wanted_delay_min' and
'caps_wanted_delay_max'. There is no need to duplicte the logic in
ceph_check_caps() and __send_cap()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c  | 148 ++++++++++++------------------------------------
 fs/ceph/file.c  |  13 ++---
 fs/ceph/inode.c |   1 -
 fs/ceph/super.h |   8 +--
 4 files changed, 44 insertions(+), 126 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6a3b75b8311e..2210c313b860 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -490,13 +490,10 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
-
-	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    opt->caps_wanted_delay_min * HZ);
 	ci->i_hold_caps_max = round_jiffies(jiffies +
 					    opt->caps_wanted_delay_max * HZ);
-	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
-	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
+	dout("__cap_set_timeouts %p %lu\n", &ci->vfs_inode,
+	     ci->i_hold_caps_max - jiffies);
 }
 
 /*
@@ -508,8 +505,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
  *    -> we take mdsc->cap_delay_lock
  */
 static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
-				struct ceph_inode_info *ci,
-				bool set_timeout)
+				struct ceph_inode_info *ci)
 {
 	dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->vfs_inode,
 	     ci->i_ceph_flags, ci->i_hold_caps_max);
@@ -520,8 +516,7 @@ static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
 				goto no_change;
 			list_del_init(&ci->i_cap_delay_list);
 		}
-		if (set_timeout)
-			__cap_set_timeouts(mdsc, ci);
+		__cap_set_timeouts(mdsc, ci);
 		list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
 no_change:
 		spin_unlock(&mdsc->cap_delay_lock);
@@ -733,7 +728,7 @@ void ceph_add_cap(struct inode *inode,
 		dout(" issued %s, mds wanted %s, actual %s, queueing\n",
 		     ceph_cap_string(issued), ceph_cap_string(wanted),
 		     ceph_cap_string(actual_wanted));
-		__cap_delay_requeue(mdsc, ci, true);
+		__cap_delay_requeue(mdsc, ci);
 	}
 
 	if (flags & CEPH_CAP_FLAG_AUTH) {
@@ -1330,7 +1325,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	struct cap_msg_args arg;
 	int held, revoking;
 	int wake = 0;
-	int delayed = 0;
 	int ret;
 
 	/* Don't send anything if it's still being created. Return delayed */
@@ -1350,28 +1344,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	     ceph_cap_string(revoking));
 	BUG_ON((retain & CEPH_CAP_PIN) == 0);
 
-	arg.session = cap->session;
-
-	/* don't release wanted unless we've waited a bit. */
-	if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-	    time_before(jiffies, ci->i_hold_caps_min)) {
-		dout(" delaying issued %s -> %s, wanted %s -> %s on send\n",
-		     ceph_cap_string(cap->issued),
-		     ceph_cap_string(cap->issued & retain),
-		     ceph_cap_string(cap->mds_wanted),
-		     ceph_cap_string(want));
-		want |= cap->mds_wanted;
-		retain |= cap->issued;
-		delayed = 1;
-	}
-	ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
-	if (want & ~cap->mds_wanted) {
-		/* user space may open/close single file frequently.
-		 * This avoids droping mds_wanted immediately after
-		 * requesting new mds_wanted.
-		 */
-		__cap_set_timeouts(mdsc, ci);
-	}
+	ci->i_ceph_flags &= ~CEPH_I_FLUSH;
 
 	cap->issued &= retain;  /* drop bits we don't want */
 	if (cap->implemented & ~cap->issued) {
@@ -1386,6 +1359,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	cap->implemented &= cap->issued | used;
 	cap->mds_wanted = want;
 
+	arg.session = cap->session;
 	arg.ino = ceph_vino(inode).ino;
 	arg.cid = cap->cap_id;
 	arg.follows = flushing ? ci->i_head_snapc->seq : 0;
@@ -1446,14 +1420,19 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 
 	ret = send_cap_msg(&arg);
 	if (ret < 0) {
-		dout("error sending cap msg, must requeue %p\n", inode);
-		delayed = 1;
+		pr_err("error sending cap msg, ino (%llx.%llx) "
+		       "flushing %s tid %llu, requeue\n",
+		       ceph_vinop(inode), ceph_cap_string(flushing),
+		       flush_tid);
+		spin_lock(&ci->i_ceph_lock);
+		__cap_delay_requeue(mdsc, ci);
+		spin_unlock(&ci->i_ceph_lock);
 	}
 
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
-	return delayed;
+	return ret;
 }
 
 static inline int __send_flush_snap(struct inode *inode,
@@ -1719,7 +1698,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
 	if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
 	    (mask & CEPH_CAP_FILE_BUFFER))
 		dirty |= I_DIRTY_DATASYNC;
-	__cap_delay_requeue(mdsc, ci, true);
+	__cap_delay_requeue(mdsc, ci);
 	return dirty;
 }
 
@@ -1871,8 +1850,6 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
  * versus held caps.  Release, flush, ack revoked caps to mds as
  * appropriate.
  *
- *  CHECK_CAPS_NODELAY - caller is delayed work and we should not delay
- *    cap release further.
  *  CHECK_CAPS_AUTHONLY - we should only check the auth cap
  *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
  *    further delay.
@@ -1891,17 +1868,10 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	int mds = -1;   /* keep track of how far we've gone through i_caps list
 			   to avoid an infinite loop on retry */
 	struct rb_node *p;
-	int delayed = 0, sent = 0;
-	bool no_delay = flags & CHECK_CAPS_NODELAY;
 	bool queue_invalidate = false;
 	bool tried_invalidate = false;
 
-	/* if we are unmounting, flush any unused caps immediately. */
-	if (mdsc->stopping)
-		no_delay = true;
-
 	spin_lock(&ci->i_ceph_lock);
-
 	if (ci->i_ceph_flags & CEPH_I_FLUSH)
 		flags |= CHECK_CAPS_FLUSH;
 
@@ -1950,14 +1920,13 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	}
 
 	dout("check_caps %p file_want %s used %s dirty %s flushing %s"
-	     " issued %s revoking %s retain %s %s%s%s\n", inode,
+	     " issued %s revoking %s retain %s %s%s\n", inode,
 	     ceph_cap_string(file_wanted),
 	     ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
 	     ceph_cap_string(ci->i_flushing_caps),
 	     ceph_cap_string(issued), ceph_cap_string(revoking),
 	     ceph_cap_string(retain),
 	     (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
-	     (flags & CHECK_CAPS_NODELAY) ? " NODELAY" : "",
 	     (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
 
 	/*
@@ -1965,7 +1934,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	 * have cached pages, but don't want them, then try to invalidate.
 	 * If we fail, it's because pages are locked.... try again later.
 	 */
-	if ((!no_delay || mdsc->stopping) &&
+	if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) &&
 	    S_ISREG(inode->i_mode) &&
 	    !(ci->i_wb_ref || ci->i_wrbuffer_ref) &&   /* no dirty pages... */
 	    inode->i_data.nrpages &&		/* have cached pages */
@@ -2045,21 +2014,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		if ((cap->issued & ~retain) == 0)
 			continue;     /* nope, all good */
 
-		if (no_delay)
-			goto ack;
-
-		/* delay? */
-		if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0 &&
-		    time_before(jiffies, ci->i_hold_caps_max)) {
-			dout(" delaying issued %s -> %s, wanted %s -> %s\n",
-			     ceph_cap_string(cap->issued),
-			     ceph_cap_string(cap->issued & retain),
-			     ceph_cap_string(cap->mds_wanted),
-			     ceph_cap_string(want));
-			delayed++;
-			continue;
-		}
-
 ack:
 		if (session && session != cap->session) {
 			dout("oops, wrong session %p mutex\n", session);
@@ -2120,25 +2074,19 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		}
 
 		mds = cap->mds;  /* remember mds, so we don't repeat */
-		sent++;
 
 		/* __send_cap drops i_ceph_lock */
-		delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0,
-				cap_used, want, retain, flushing,
-				flush_tid, oldest_flush_tid);
+		__send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, 0, cap_used, want,
+			   retain, flushing, flush_tid, oldest_flush_tid);
 		goto retry; /* retake i_ceph_lock and restart our cap scan. */
 	}
 
-	if (list_empty(&ci->i_cap_delay_list)) {
-	    if (delayed) {
-		    /* Reschedule delayed caps release if we delayed anything */
-		    __cap_delay_requeue(mdsc, ci, false);
-	    } else if (__ceph_is_any_real_caps(ci) &&
-			(file_wanted & ~CEPH_CAP_PIN) &&
-			!(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
-		    /* periodically re-calculate caps wanted by open files */
-		    __cap_delay_requeue(mdsc, ci, true);
-	    }
+	/* periodically re-calculate caps wanted by open files */
+	if (__ceph_is_any_real_caps(ci) &&
+	    list_empty(&ci->i_cap_delay_list) &&
+	    (file_wanted & ~CEPH_CAP_PIN) &&
+	    !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		__cap_delay_requeue(mdsc, ci);
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
@@ -2168,7 +2116,6 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 retry_locked:
 	if (ci->i_dirty_caps && ci->i_auth_cap) {
 		struct ceph_cap *cap = ci->i_auth_cap;
-		int delayed;
 
 		if (session != cap->session) {
 			spin_unlock(&ci->i_ceph_lock);
@@ -2197,18 +2144,10 @@ static int try_flush_caps(struct inode *inode, u64 *ptid)
 						 &oldest_flush_tid);
 
 		/* __send_cap drops i_ceph_lock */
-		delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
-				     CEPH_CLIENT_CAPS_SYNC,
-				     __ceph_caps_used(ci),
-				     __ceph_caps_wanted(ci),
-				     (cap->issued | cap->implemented),
-				     flushing, flush_tid, oldest_flush_tid);
-
-		if (delayed) {
-			spin_lock(&ci->i_ceph_lock);
-			__cap_delay_requeue(mdsc, ci, true);
-			spin_unlock(&ci->i_ceph_lock);
-		}
+		__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
+			   __ceph_caps_used(ci), __ceph_caps_wanted(ci),
+			   (cap->issued | cap->implemented),
+			   flushing, flush_tid, oldest_flush_tid);
 	} else {
 		if (!list_empty(&ci->i_cap_flush_list)) {
 			struct ceph_cap_flush *cf =
@@ -2412,22 +2351,13 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
 		if (cf->caps) {
 			dout("kick_flushing_caps %p cap %p tid %llu %s\n",
 			     inode, cap, cf->tid, ceph_cap_string(cf->caps));
-			ci->i_ceph_flags |= CEPH_I_NODELAY;
-
-			ret = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+			__send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
 					 (cf->tid < last_snap_flush ?
 					  CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
 					  __ceph_caps_used(ci),
 					  __ceph_caps_wanted(ci),
 					  (cap->issued | cap->implemented),
 					  cf->caps, cf->tid, oldest_flush_tid);
-			if (ret) {
-				pr_err("kick_flushing_caps: error sending "
-					"cap flush, ino (%llx.%llx) "
-					"tid %llu flushing %s\n",
-					ceph_vinop(inode), cf->tid,
-					ceph_cap_string(cf->caps));
-			}
 		} else {
 			struct ceph_cap_snap *capsnap =
 					container_of(cf, struct ceph_cap_snap,
@@ -3049,7 +2979,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
 	     last ? " last" : "", put ? " put" : "");
 
-	if (last && !flushsnaps)
+	if (last)
 		ceph_check_caps(ci, 0, NULL);
 	else if (flushsnaps)
 		ceph_flush_snaps(ci, NULL);
@@ -3468,10 +3398,10 @@ static void handle_cap_grant(struct inode *inode,
 		wake_up_all(&ci->i_cap_wq);
 
 	if (check_caps == 1)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_AUTHONLY,
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL,
 				session);
 	else if (check_caps == 2)
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY, session);
+		ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session);
 	else
 		mutex_unlock(&session->s_mutex);
 }
@@ -4146,7 +4076,6 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 {
 	struct inode *inode;
 	struct ceph_inode_info *ci;
-	int flags = CHECK_CAPS_NODELAY;
 
 	dout("check_delayed_caps\n");
 	while (1) {
@@ -4166,7 +4095,7 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 
 		if (inode) {
 			dout("check_delayed_caps on %p\n", inode);
-			ceph_check_caps(ci, flags, NULL);
+			ceph_check_caps(ci, 0, NULL);
 			/* avoid calling iput_final() in tick thread */
 			ceph_async_iput(inode);
 		}
@@ -4191,7 +4120,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
 		ihold(inode);
 		dout("flush_dirty_caps %p\n", inode);
 		spin_unlock(&mdsc->cap_dirty_lock);
-		ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, NULL);
+		ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
 		iput(inode);
 		spin_lock(&mdsc->cap_dirty_lock);
 	}
@@ -4211,7 +4140,7 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
 	if (fmode &&
 	    __ceph_is_any_real_caps(ci) &&
 	    list_empty(&ci->i_cap_delay_list))
-		__cap_delay_requeue(mdsc, ci, true);
+		__cap_delay_requeue(mdsc, ci);
 }
 
 void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
@@ -4270,7 +4199,6 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 	if (inode->i_nlink == 1) {
 		drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
 
-		ci->i_ceph_flags |= CEPH_I_NODELAY;
 		if (__ceph_caps_dirty(ci)) {
 			struct ceph_mds_client *mdsc =
 				ceph_inode_to_client(inode)->mdsc;
@@ -4326,8 +4254,6 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 		if (force || (cap->issued & drop)) {
 			if (cap->issued & drop) {
 				int wanted = __ceph_caps_wanted(ci);
-				if ((ci->i_ceph_flags & CEPH_I_NODELAY) == 0)
-					wanted |= cap->mds_wanted;
 				dout("encode_inode_release %p cap %p "
 				     "%s -> %s, wanted %s -> %s\n", inode, cap,
 				     ceph_cap_string(cap->issued),
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c39951e7e289..ea1a1b13247f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1819,7 +1819,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
 		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
-			ceph_check_caps(ci, CHECK_CAPS_NODELAY, NULL);
+			ceph_check_caps(ci, 0, NULL);
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -2419,15 +2419,10 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	inode_inc_iversion_raw(dst_inode);
 
 	if (dst_off > size) {
-		int caps_flags = 0;
-
 		/* Let the MDS know about dst file size change */
-		if (ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
-			caps_flags |= CHECK_CAPS_NODELAY;
-		if (ceph_inode_set_size(dst_inode, dst_off))
-			caps_flags |= CHECK_CAPS_AUTHONLY;
-		if (caps_flags)
-			ceph_check_caps(dst_ci, caps_flags, NULL);
+		if (ceph_inode_set_size(dst_inode, dst_off) ||
+		    ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
+			ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY, NULL);
 	}
 	/* Mark Fw dirty */
 	spin_lock(&dst_ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a618e8ae9d0f..c4dd534aaec3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -472,7 +472,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	ci->i_prealloc_cap_flush = NULL;
 	INIT_LIST_HEAD(&ci->i_cap_flush_list);
 	init_waitqueue_head(&ci->i_cap_wq);
-	ci->i_hold_caps_min = 0;
 	ci->i_hold_caps_max = 0;
 	INIT_LIST_HEAD(&ci->i_cap_delay_list);
 	INIT_LIST_HEAD(&ci->i_cap_snaps);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 63dd8e61ba88..edceedfee00a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -173,9 +173,9 @@ struct ceph_cap {
 	struct list_head caps_item;
 };
 
-#define CHECK_CAPS_NODELAY    1  /* do not delay any further */
-#define CHECK_CAPS_AUTHONLY   2  /* only check auth cap */
-#define CHECK_CAPS_FLUSH      4  /* flush any dirty caps */
+#define CHECK_CAPS_AUTHONLY   1  /* only check auth cap */
+#define CHECK_CAPS_FLUSH      2  /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL    4  /* don't invalidate pagecache */
 
 struct ceph_cap_flush {
 	u64 tid;
@@ -357,7 +357,6 @@ struct ceph_inode_info {
 	struct ceph_cap_flush *i_prealloc_cap_flush;
 	struct list_head i_cap_flush_list;
 	wait_queue_head_t i_cap_wq;      /* threads waiting on a capability */
-	unsigned long i_hold_caps_min; /* jiffies */
 	unsigned long i_hold_caps_max; /* jiffies */
 	struct list_head i_cap_delay_list;  /* for delayed cap release to mds */
 	struct ceph_cap_reservation i_cap_migration_resv;
@@ -518,7 +517,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
  * Ceph inode.
  */
 #define CEPH_I_DIR_ORDERED	(1 << 0)  /* dentries in dir are ordered */
-#define CEPH_I_NODELAY		(1 << 1)  /* do not delay cap release */
 #define CEPH_I_FLUSH		(1 << 2)  /* do not delay flush of dirty metadata */
 #define CEPH_I_POOL_PERM	(1 << 3)  /* pool rd/wr bits are valid */
 #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 4/6] ceph: simplify calling of ceph_get_fmode()
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (2 preceding siblings ...)
  2020-03-04 17:32 ` [PATCH v4 3/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Originally, calling ceph_get_fmode() for open files is by thread that
handles request reply. There is a small window between updating caps and
and waking the request initiator. We need to prevent ceph_check_caps()
from releasing wanted caps in the window.

Previous patchies made fill_inode() call __ceph_touch_fmode() for open file
requests. This prevented ceph_check_caps() from releasing wanted caps for
'caps_wanted_delay_min' seconds, enough for request initiator to get
woken up and call ceph_get_fmode().

This allows us to now call ceph_get_fmode() in ceph_open() instead.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c  | 26 +++-----------------------
 fs/ceph/file.c  | 21 +++++----------------
 fs/ceph/inode.c |  8 +-------
 fs/ceph/super.h |  3 +--
 4 files changed, 10 insertions(+), 48 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2210c313b860..1b6e6fd98169 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -608,7 +608,7 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
  */
 void ceph_add_cap(struct inode *inode,
 		  struct ceph_mds_session *session, u64 cap_id,
-		  int fmode, unsigned issued, unsigned wanted,
+		  unsigned issued, unsigned wanted,
 		  unsigned seq, unsigned mseq, u64 realmino, int flags,
 		  struct ceph_cap **new_cap)
 {
@@ -624,13 +624,6 @@ void ceph_add_cap(struct inode *inode,
 	dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
 	     session->s_mds, cap_id, ceph_cap_string(issued), seq);
 
-	/*
-	 * If we are opening the file, include file mode wanted bits
-	 * in wanted.
-	 */
-	if (fmode >= 0)
-		wanted |= ceph_caps_for_mode(fmode);
-
 	spin_lock(&session->s_gen_ttl_lock);
 	gen = session->s_cap_gen;
 	spin_unlock(&session->s_gen_ttl_lock);
@@ -755,9 +748,6 @@ void ceph_add_cap(struct inode *inode,
 	cap->issue_seq = seq;
 	cap->mseq = mseq;
 	cap->cap_gen = gen;
-
-	if (fmode >= 0)
-		__ceph_get_fmode(ci, fmode);
 }
 
 /*
@@ -3697,7 +3687,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		/* add placeholder for the export tagert */
 		int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
 		tcap = new_cap;
-		ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
+		ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
 			     t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
 
 		if (!list_empty(&ci->i_cap_flush_list) &&
@@ -3802,7 +3792,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 	__ceph_caps_issued(ci, &issued);
 	issued |= __ceph_caps_dirty(ci);
 
-	ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq,
+	ceph_add_cap(inode, session, cap_id, caps, wanted, seq, mseq,
 		     realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
 
 	ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
@@ -4155,16 +4145,6 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
 	spin_unlock(&ci->i_ceph_lock);
 }
 
-void __ceph_get_fmode(struct ceph_inode_info *ci, int fmode)
-{
-	int i;
-	int bits = (fmode << 1) | 1;
-	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
-		if (bits & (1 << i))
-			ci->i_nr_by_mode[i]++;
-	}
-}
-
 /*
  * Drop open file reference.  If we were the last open file,
  * we may need to release capabilities to the MDS (or schedule
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ea1a1b13247f..ba46ba740628 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -212,10 +212,8 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	if (isdir) {
 		struct ceph_dir_file_info *dfi =
 			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
-		if (!dfi) {
-			ceph_put_fmode(ci, fmode, 1); /* clean up */
+		if (!dfi)
 			return -ENOMEM;
-		}
 
 		file->private_data = dfi;
 		fi = &dfi->file_info;
@@ -223,15 +221,15 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 		dfi->readdir_cache_idx = -1;
 	} else {
 		fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
-		if (!fi) {
-			ceph_put_fmode(ci, fmode, 1); /* clean up */
+		if (!fi)
 			return -ENOMEM;
-		}
 
 		file->private_data = fi;
 	}
 
+	ceph_get_fmode(ci, fmode, 1);
 	fi->fmode = fmode;
+
 	spin_lock_init(&fi->rw_contexts_lock);
 	INIT_LIST_HEAD(&fi->rw_contexts);
 	fi->meta_err = errseq_sample(&ci->i_meta_err);
@@ -263,7 +261,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 	case S_IFLNK:
 		dout("init_file %p %p 0%o (symlink)\n", inode, file,
 		     inode->i_mode);
-		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		break;
 
 	default:
@@ -273,7 +270,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
 		 * we need to drop the open ref now, since we don't
 		 * have .release set to ceph_release.
 		 */
-		ceph_put_fmode(ceph_inode(inode), fmode, 1); /* clean up */
 		BUG_ON(inode->i_fop->release == ceph_release);
 
 		/* call the proper open fop */
@@ -327,7 +323,6 @@ int ceph_renew_caps(struct inode *inode, int fmode)
 	req->r_inode = inode;
 	ihold(inode);
 	req->r_num_caps = 1;
-	req->r_fmode = -1;
 
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
 	ceph_mdsc_put_request(req);
@@ -373,9 +368,6 @@ int ceph_open(struct inode *inode, struct file *file)
 
 	/* trivially open snapdir */
 	if (ceph_snap(inode) == CEPH_SNAPDIR) {
-		spin_lock(&ci->i_ceph_lock);
-		__ceph_get_fmode(ci, fmode);
-		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
 	}
 
@@ -393,7 +385,7 @@ int ceph_open(struct inode *inode, struct file *file)
 		dout("open %p fmode %d want %s issued %s using existing\n",
 		     inode, fmode, ceph_cap_string(wanted),
 		     ceph_cap_string(issued));
-		__ceph_get_fmode(ci, fmode);
+		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 
 		/* adjust wanted? */
@@ -405,7 +397,6 @@ int ceph_open(struct inode *inode, struct file *file)
 		return ceph_init_file(inode, file, fmode);
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
 		   (ci->i_snap_caps & wanted) == wanted) {
-		__ceph_get_fmode(ci, fmode);
 		__ceph_touch_fmode(ci, mdsc, fmode);
 		spin_unlock(&ci->i_ceph_lock);
 		return ceph_init_file(inode, file, fmode);
@@ -782,8 +773,6 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 		err = finish_open(file, dentry, ceph_open);
 	}
 out_req:
-	if (!req->r_err && req->r_target_inode)
-		ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode, 1);
 	ceph_mdsc_put_request(req);
 out_ctx:
 	ceph_release_acl_sec_ctx(&as_ctx);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c4dd534aaec3..c7ff9f7067f6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -970,7 +970,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		if (ceph_snap(inode) == CEPH_NOSNAP) {
 			ceph_add_cap(inode, session,
 				     le64_to_cpu(info->cap.cap_id),
-				     cap_fmode, info_caps,
+				     info_caps,
 				     le32_to_cpu(info->cap.wanted),
 				     le32_to_cpu(info->cap.seq),
 				     le32_to_cpu(info->cap.mseq),
@@ -995,13 +995,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 			dout(" %p got snap_caps %s\n", inode,
 			     ceph_cap_string(info_caps));
 			ci->i_snap_caps |= info_caps;
-			if (cap_fmode >= 0)
-				__ceph_get_fmode(ci, cap_fmode);
 		}
-	} else if (cap_fmode >= 0) {
-		pr_warn("mds issued no caps on %llx.%llx\n",
-			   ceph_vinop(inode));
-		__ceph_get_fmode(ci, cap_fmode);
 	}
 
 	if (iinfo->inline_version > 0 &&
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index edceedfee00a..60aac3aee055 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1050,7 +1050,7 @@ extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 				     struct ceph_cap_reservation *ctx);
 extern void ceph_add_cap(struct inode *inode,
 			 struct ceph_mds_session *session, u64 cap_id,
-			 int fmode, unsigned issued, unsigned wanted,
+			 unsigned issued, unsigned wanted,
 			 unsigned cap, unsigned seq, u64 realmino, int flags,
 			 struct ceph_cap **new_cap);
 extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release);
@@ -1096,7 +1096,6 @@ extern int ceph_try_get_caps(struct inode *inode,
 			     int need, int want, bool nonblock, int *got);
 
 /* for counting open files by mode */
-extern void __ceph_get_fmode(struct ceph_inode_info *ci, int mode);
 extern void ceph_get_fmode(struct ceph_inode_info *ci, int mode, int count);
 extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode, int count);
 extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (3 preceding siblings ...)
  2020-03-04 17:32 ` [PATCH v4 4/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:32 ` [PATCH v4 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
  2020-03-04 17:48 ` [PATCH v4 0/6] ceph: don't request caps for idle open files Jeff Layton
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

Non-auth mds can't do anything to 'update max' cap message.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1b6e6fd98169..0408749ced2d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1359,7 +1359,8 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
 	arg.size = inode->i_size;
 	ci->i_reported_size = arg.size;
 	arg.max_size = ci->i_wanted_max_size;
-	ci->i_requested_max_size = arg.max_size;
+	if (cap == ci->i_auth_cap)
+		ci->i_requested_max_size = arg.max_size;
 
 	if (flushing & CEPH_CAP_XATTR_EXCL) {
 		old_blob = __ceph_build_xattrs_blob(ci);
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v4 6/6] ceph: check all mds' caps after page writeback
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (4 preceding siblings ...)
  2020-03-04 17:32 ` [PATCH v4 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
@ 2020-03-04 17:32 ` Yan, Zheng
  2020-03-04 17:48 ` [PATCH v4 0/6] ceph: don't request caps for idle open files Jeff Layton
  6 siblings, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-04 17:32 UTC (permalink / raw)
  To: ceph-devel; +Cc: jlayton, Yan, Zheng

If an inode has caps from multiple mds's, the following can happen:

- non-auth mds revokes Fsc. Fcb is used, so page writeback is queued.
- when writeback finishes, ceph_check_caps() is called with auth only
  flag. ceph_check_caps() invalidates pagecache, but skips checking any
  non-auth caps.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/caps.c  | 2 +-
 fs/ceph/inode.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0408749ced2d..5e9aecfa2f52 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3052,7 +3052,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (last) {
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ci, 0, NULL);
 	} else if (flush_snaps) {
 		ceph_flush_snaps(ci, NULL);
 	}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index c7ff9f7067f6..ee40ba7e0e77 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1984,7 +1984,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
 	mutex_unlock(&ci->i_truncate_mutex);
 
 	if (wrbuffer_refs == 0)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ci, 0, NULL);
 
 	wake_up_all(&ci->i_cap_wq);
 }
-- 
2.21.1

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 0/6] ceph: don't request caps for idle open files
  2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
                   ` (5 preceding siblings ...)
  2020-03-04 17:32 ` [PATCH v4 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
@ 2020-03-04 17:48 ` Jeff Layton
  2020-03-04 19:24   ` Jeff Layton
  2020-03-05 12:23   ` Yan, Zheng
  6 siblings, 2 replies; 11+ messages in thread
From: Jeff Layton @ 2020-03-04 17:48 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Thu, 2020-03-05 at 01:32 +0800, Yan, Zheng wrote:
> This series make cephfs client not request caps for open files that
> idle for a long time. For the case that one active client and multiple
> standby clients open the same file, this increase the possibility that
> mds issues exclusive caps to the active client.
> 
> Yan, Zheng (6):
>   ceph: always renew caps if mds_wanted is insufficient
>   ceph: consider inode's last read/write when calculating wanted caps
>   ceph: remove delay check logic from ceph_check_caps()
>   ceph: simplify calling of ceph_get_fmode()
>   ceph: update i_requested_max_size only when sending cap msg to auth mds
>   ceph: check all mds' caps after page writeback
> 
>  fs/ceph/caps.c               | 338 ++++++++++++++++-------------------
>  fs/ceph/file.c               |  45 ++---
>  fs/ceph/inode.c              |  21 +--
>  fs/ceph/ioctl.c              |   2 +
>  fs/ceph/mds_client.c         |   5 -
>  fs/ceph/super.h              |  37 ++--
>  include/linux/ceph/ceph_fs.h |   1 +
>  7 files changed, 202 insertions(+), 247 deletions(-)
> 
> changes since v2
>  - make __ceph_caps_file_wanted() more readable
>  - add patch 5 and 6, which fix hung write during testing patch 1~4
> 
> changes since v3
>  - don't queue delayed cap check for snap inode
>  - initialize ci->{last_rd,last_wr} to jiffies - 3600 * HZ
>  - make __ceph_caps_file_wanted() check inode type
> 

I just tried this out, and it still seems to kill unlink performance
with -o nowsync. From the script I posted to the other thread:

--------8<--------
$ ./test-async-dirops.sh 
  File: /mnt/cephfs/test-dirops.1401
  Size: 0         	Blocks: 0          IO Block: 65536  directory
Device: 26h/38d	Inode: 1099511627778  Links: 2
Access: (0775/drwxrwxr-x)  Uid: ( 4447/ jlayton)   Gid: ( 4447/ jlayton)
Context: system_u:object_r:cephfs_t:s0
Access: 2020-03-04 12:42:03.914006772 -0500
Modify: 2020-03-04 12:42:03.914006772 -0500
Change: 2020-03-04 12:42:03.914006772 -0500
 Birth: 2020-03-04 12:42:03.914006772 -0500
Creating files in /mnt/cephfs/test-dirops.1401

real	0m6.269s
user	0m0.123s
sys	0m0.454s

sync

real	0m5.358s
user	0m0.003s
sys	0m0.011s
Starting rm

real	0m18.932s
user	0m0.169s
sys	0m0.713s

rmdir

real	0m0.135s
user	0m0.000s
sys	0m0.002s

sync

real	0m1.725s
user	0m0.000s
sys	0m0.002s
--------8<--------

Create and sync parts look reasonable. Usually, the rm part finishes in
less than a second as we end up doing most of the removes
asynchronously, but here it didn't. I suspect that means that unlink
didn't get caps for some reason, but I'd need to do some debugging to
figure out what's actually happening.

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 0/6] ceph: don't request caps for idle open files
  2020-03-04 17:48 ` [PATCH v4 0/6] ceph: don't request caps for idle open files Jeff Layton
@ 2020-03-04 19:24   ` Jeff Layton
  2020-03-04 20:23     ` Jeff Layton
  2020-03-05 12:23   ` Yan, Zheng
  1 sibling, 1 reply; 11+ messages in thread
From: Jeff Layton @ 2020-03-04 19:24 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Wed, 2020-03-04 at 12:48 -0500, Jeff Layton wrote:
> On Thu, 2020-03-05 at 01:32 +0800, Yan, Zheng wrote:
> > This series make cephfs client not request caps for open files that
> > idle for a long time. For the case that one active client and multiple
> > standby clients open the same file, this increase the possibility that
> > mds issues exclusive caps to the active client.
> > 
> > Yan, Zheng (6):
> >   ceph: always renew caps if mds_wanted is insufficient
> >   ceph: consider inode's last read/write when calculating wanted caps
> >   ceph: remove delay check logic from ceph_check_caps()
> >   ceph: simplify calling of ceph_get_fmode()
> >   ceph: update i_requested_max_size only when sending cap msg to auth mds
> >   ceph: check all mds' caps after page writeback
> > 
> >  fs/ceph/caps.c               | 338 ++++++++++++++++-------------------
> >  fs/ceph/file.c               |  45 ++---
> >  fs/ceph/inode.c              |  21 +--
> >  fs/ceph/ioctl.c              |   2 +
> >  fs/ceph/mds_client.c         |   5 -
> >  fs/ceph/super.h              |  37 ++--
> >  include/linux/ceph/ceph_fs.h |   1 +
> >  7 files changed, 202 insertions(+), 247 deletions(-)
> > 
> > changes since v2
> >  - make __ceph_caps_file_wanted() more readable
> >  - add patch 5 and 6, which fix hung write during testing patch 1~4
> > 
> > changes since v3
> >  - don't queue delayed cap check for snap inode
> >  - initialize ci->{last_rd,last_wr} to jiffies - 3600 * HZ
> >  - make __ceph_caps_file_wanted() check inode type
> > 
> 
> I just tried this out, and it still seems to kill unlink performance
> with -o nowsync. From the script I posted to the other thread:
> 
> --------8<--------
> $ ./test-async-dirops.sh 
>   File: /mnt/cephfs/test-dirops.1401
>   Size: 0         	Blocks: 0          IO Block: 65536  directory
> Device: 26h/38d	Inode: 1099511627778  Links: 2
> Access: (0775/drwxrwxr-x)  Uid: ( 4447/ jlayton)   Gid: ( 4447/ jlayton)
> Context: system_u:object_r:cephfs_t:s0
> Access: 2020-03-04 12:42:03.914006772 -0500
> Modify: 2020-03-04 12:42:03.914006772 -0500
> Change: 2020-03-04 12:42:03.914006772 -0500
>  Birth: 2020-03-04 12:42:03.914006772 -0500
> Creating files in /mnt/cephfs/test-dirops.1401
> 
> real	0m6.269s
> user	0m0.123s
> sys	0m0.454s
> 
> sync
> 
> real	0m5.358s
> user	0m0.003s
> sys	0m0.011s
> Starting rm
> 
> real	0m18.932s
> user	0m0.169s
> sys	0m0.713s
> 
> rmdir
> 
> real	0m0.135s
> user	0m0.000s
> sys	0m0.002s
> 
> sync
> 
> real	0m1.725s
> user	0m0.000s
> sys	0m0.002s
> --------8<--------
> 
> Create and sync parts look reasonable. Usually, the rm part finishes in
> less than a second as we end up doing most of the removes
> asynchronously, but here it didn't. I suspect that means that unlink
> didn't get caps for some reason, but I'd need to do some debugging to
> figure out what's actually happening.
> 

A bit more investigation. I piled on some tracepoints that I had plumbed
in a while back:

It starts with doing making a directory (NOSNAP:0x1000000c6c0) and then
doing the first sync create:

     kworker/7:1-2021  [007] ....  5316.161752: ceph_add_cap: ino=NOSNAP:0x1 mds=0 issued=pAsLsXs implemented=pAsLsXs mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.163241: ceph_add_cap: ino=NOSNAP:0x1 mds=0 issued=pAsLsXs implemented=pAsLsXs mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.163250: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsxLsXsxFsx implemented=pAsxLsXsxFsx mds_wanted=-
     kworker/7:1-2021  [007] ....  5316.168011: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsxFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.168830: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=pAsLsXsFsxcral
     kworker/7:1-2021  [007] ....  5316.168839: ceph_add_cap: ino=NOSNAP:0x1000000c6c1 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
 test-async-diro-2257  [005] ....  5316.169004: ceph_sync_create: name=NOSNAP:0x1000000c6c0/1(0x1000000c6c1)

We get a cap grant on the directory in the sync create reply and that sets mds_wanted:

     kworker/7:1-2021  [007] ....  5316.169075: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsxc implemented=pAsLsXsFsxc mds_wanted=pAsLsXsFsxcral
     kworker/7:1-2021  [007] ....  5316.170209: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsxc implemented=pAsLsXsFsxc mds_wanted=pAsLsXsFsxcral

...eventually we start doing async creates and continue that to the end
of the creates. (Side note: the second create was also synchronous --
not sure why):

     kworker/7:1-2021  [007] ....  5316.170216: ceph_add_cap: ino=NOSNAP:0x1000000c6c2 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
 test-async-diro-2257  [005] ....  5316.170251: ceph_sync_create: name=NOSNAP:0x1000000c6c0/2(0x1000000c6c2)
 test-async-diro-2257  [005] ....  5316.170412: ceph_add_cap: ino=NOSNAP:0x1000000c554 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxLsXsxFsxcrwb
 test-async-diro-2257  [005] ....  5316.170415: ceph_async_create: name=NOSNAP:0x1000000c6c0/3(0x1000000c554)
 test-async-diro-2257  [005] ....  5316.170571: ceph_add_cap: ino=NOSNAP:0x1000000c555 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxLsXsxFsxcrwb

Eventually we finish creating and soon after, get a cap revoke and that
resets mds_wanted back to only "p":

     kworker/7:1-2021  [007] ....  5316.461890: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFs implemented=pAsLsXsFsc mds_wanted=p

...and it never changes and we're never granted Du caps (aka Fr):

              rm-2262  [006] ....  5316.466221: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/1(0x1000000c6c1)
     kworker/7:1-2021  [007] ....  5316.467108: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.467676: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.467679: ceph_add_cap: ino=NOSNAP:0x1000000c55b mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
              rm-2262  [006] ....  5316.467714: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/10(0x1000000c55b)
     kworker/7:1-2021  [007] ....  5316.468424: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.468956: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.468960: ceph_add_cap: ino=NOSNAP:0x1000000c5b5 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
              rm-2262  [006] ....  5316.468994: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/100(0x1000000c5b5)
     kworker/7:1-2021  [007] ....  5316.469728: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.470237: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
     kworker/7:1-2021  [007] ....  5316.470242: ceph_add_cap: ino=NOSNAP:0x1000000c55c mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb

Given that we've overloaded Fr caps for Du, I wonder if the new timeout
logic is creeping in here. In particular, we don't want those caps to
ever time out.

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 0/6] ceph: don't request caps for idle open files
  2020-03-04 19:24   ` Jeff Layton
@ 2020-03-04 20:23     ` Jeff Layton
  0 siblings, 0 replies; 11+ messages in thread
From: Jeff Layton @ 2020-03-04 20:23 UTC (permalink / raw)
  To: Yan, Zheng, ceph-devel

On Wed, 2020-03-04 at 14:24 -0500, Jeff Layton wrote:
> On Wed, 2020-03-04 at 12:48 -0500, Jeff Layton wrote:
> > On Thu, 2020-03-05 at 01:32 +0800, Yan, Zheng wrote:
> > > This series make cephfs client not request caps for open files that
> > > idle for a long time. For the case that one active client and multiple
> > > standby clients open the same file, this increase the possibility that
> > > mds issues exclusive caps to the active client.
> > > 
> > > Yan, Zheng (6):
> > >   ceph: always renew caps if mds_wanted is insufficient
> > >   ceph: consider inode's last read/write when calculating wanted caps
> > >   ceph: remove delay check logic from ceph_check_caps()
> > >   ceph: simplify calling of ceph_get_fmode()
> > >   ceph: update i_requested_max_size only when sending cap msg to auth mds
> > >   ceph: check all mds' caps after page writeback
> > > 
> > >  fs/ceph/caps.c               | 338 ++++++++++++++++-------------------
> > >  fs/ceph/file.c               |  45 ++---
> > >  fs/ceph/inode.c              |  21 +--
> > >  fs/ceph/ioctl.c              |   2 +
> > >  fs/ceph/mds_client.c         |   5 -
> > >  fs/ceph/super.h              |  37 ++--
> > >  include/linux/ceph/ceph_fs.h |   1 +
> > >  7 files changed, 202 insertions(+), 247 deletions(-)
> > > 
> > > changes since v2
> > >  - make __ceph_caps_file_wanted() more readable
> > >  - add patch 5 and 6, which fix hung write during testing patch 1~4
> > > 
> > > changes since v3
> > >  - don't queue delayed cap check for snap inode
> > >  - initialize ci->{last_rd,last_wr} to jiffies - 3600 * HZ
> > >  - make __ceph_caps_file_wanted() check inode type
> > > 
> > 
> > I just tried this out, and it still seems to kill unlink performance
> > with -o nowsync. From the script I posted to the other thread:
> > 
> > --------8<--------
> > $ ./test-async-dirops.sh 
> >   File: /mnt/cephfs/test-dirops.1401
> >   Size: 0         	Blocks: 0          IO Block: 65536  directory
> > Device: 26h/38d	Inode: 1099511627778  Links: 2
> > Access: (0775/drwxrwxr-x)  Uid: ( 4447/ jlayton)   Gid: ( 4447/ jlayton)
> > Context: system_u:object_r:cephfs_t:s0
> > Access: 2020-03-04 12:42:03.914006772 -0500
> > Modify: 2020-03-04 12:42:03.914006772 -0500
> > Change: 2020-03-04 12:42:03.914006772 -0500
> >  Birth: 2020-03-04 12:42:03.914006772 -0500
> > Creating files in /mnt/cephfs/test-dirops.1401
> > 
> > real	0m6.269s
> > user	0m0.123s
> > sys	0m0.454s
> > 
> > sync
> > 
> > real	0m5.358s
> > user	0m0.003s
> > sys	0m0.011s
> > Starting rm
> > 
> > real	0m18.932s
> > user	0m0.169s
> > sys	0m0.713s
> > 
> > rmdir
> > 
> > real	0m0.135s
> > user	0m0.000s
> > sys	0m0.002s
> > 
> > sync
> > 
> > real	0m1.725s
> > user	0m0.000s
> > sys	0m0.002s
> > --------8<--------
> > 
> > Create and sync parts look reasonable. Usually, the rm part finishes in
> > less than a second as we end up doing most of the removes
> > asynchronously, but here it didn't. I suspect that means that unlink
> > didn't get caps for some reason, but I'd need to do some debugging to
> > figure out what's actually happening.
> > 
> 
> A bit more investigation. I piled on some tracepoints that I had plumbed
> in a while back:
> 
> It starts with doing making a directory (NOSNAP:0x1000000c6c0) and then
> doing the first sync create:
> 
>      kworker/7:1-2021  [007] ....  5316.161752: ceph_add_cap: ino=NOSNAP:0x1 mds=0 issued=pAsLsXs implemented=pAsLsXs mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.163241: ceph_add_cap: ino=NOSNAP:0x1 mds=0 issued=pAsLsXs implemented=pAsLsXs mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.163250: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsxLsXsxFsx implemented=pAsxLsXsxFsx mds_wanted=-
>      kworker/7:1-2021  [007] ....  5316.168011: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsxFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.168830: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=pAsLsXsFsxcral
>      kworker/7:1-2021  [007] ....  5316.168839: ceph_add_cap: ino=NOSNAP:0x1000000c6c1 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
>  test-async-diro-2257  [005] ....  5316.169004: ceph_sync_create: name=NOSNAP:0x1000000c6c0/1(0x1000000c6c1)
> 
> We get a cap grant on the directory in the sync create reply and that sets mds_wanted:
> 
>      kworker/7:1-2021  [007] ....  5316.169075: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsxc implemented=pAsLsXsFsxc mds_wanted=pAsLsXsFsxcral
>      kworker/7:1-2021  [007] ....  5316.170209: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsxc implemented=pAsLsXsFsxc mds_wanted=pAsLsXsFsxcral
> 
> ...eventually we start doing async creates and continue that to the end
> of the creates. (Side note: the second create was also synchronous --
> not sure why):
> 
>      kworker/7:1-2021  [007] ....  5316.170216: ceph_add_cap: ino=NOSNAP:0x1000000c6c2 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
>  test-async-diro-2257  [005] ....  5316.170251: ceph_sync_create: name=NOSNAP:0x1000000c6c0/2(0x1000000c6c2)
>  test-async-diro-2257  [005] ....  5316.170412: ceph_add_cap: ino=NOSNAP:0x1000000c554 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxLsXsxFsxcrwb
>  test-async-diro-2257  [005] ....  5316.170415: ceph_async_create: name=NOSNAP:0x1000000c6c0/3(0x1000000c554)
>  test-async-diro-2257  [005] ....  5316.170571: ceph_add_cap: ino=NOSNAP:0x1000000c555 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxLsXsxFsxcrwb
> 
> Eventually we finish creating and soon after, get a cap revoke and that
> resets mds_wanted back to only "p":
> 
>      kworker/7:1-2021  [007] ....  5316.461890: ceph_handle_cap_grant: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFs implemented=pAsLsXsFsc mds_wanted=p
> 
> ...and it never changes and we're never granted Du caps (aka Fr):
> 
>               rm-2262  [006] ....  5316.466221: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/1(0x1000000c6c1)
>      kworker/7:1-2021  [007] ....  5316.467108: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.467676: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.467679: ceph_add_cap: ino=NOSNAP:0x1000000c55b mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
>               rm-2262  [006] ....  5316.467714: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/10(0x1000000c55b)
>      kworker/7:1-2021  [007] ....  5316.468424: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.468956: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.468960: ceph_add_cap: ino=NOSNAP:0x1000000c5b5 mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
>               rm-2262  [006] ....  5316.468994: ceph_sync_unlink: name=NOSNAP:0x1000000c6c0/100(0x1000000c5b5)
>      kworker/7:1-2021  [007] ....  5316.469728: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.470237: ceph_add_cap: ino=NOSNAP:0x1000000c6c0 mds=0 issued=pAsLsXsFsx implemented=pAsLsXsFsx mds_wanted=p
>      kworker/7:1-2021  [007] ....  5316.470242: ceph_add_cap: ino=NOSNAP:0x1000000c55c mds=0 issued=pAsxLsXsxFsxcrwb implemented=pAsxLsXsxFsxcrwb mds_wanted=pAsxXsxFxwb
> 
> Given that we've overloaded Fr caps for Du, I wonder if the new timeout
> logic is creeping in here. In particular, we don't want those caps to
> ever time out.
> 

I did a bisect and the first two patches in the series seem to be fine.
This patch causes the problem:

     [PATCH v4 3/6] ceph: remove delay check logic from ceph_check_caps()

...though I've not yet spotted the problem.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v4 0/6] ceph: don't request caps for idle open files
  2020-03-04 17:48 ` [PATCH v4 0/6] ceph: don't request caps for idle open files Jeff Layton
  2020-03-04 19:24   ` Jeff Layton
@ 2020-03-05 12:23   ` Yan, Zheng
  1 sibling, 0 replies; 11+ messages in thread
From: Yan, Zheng @ 2020-03-05 12:23 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Yan, Zheng, ceph-devel

On Thu, Mar 5, 2020 at 1:50 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Thu, 2020-03-05 at 01:32 +0800, Yan, Zheng wrote:
> > This series make cephfs client not request caps for open files that
> > idle for a long time. For the case that one active client and multiple
> > standby clients open the same file, this increase the possibility that
> > mds issues exclusive caps to the active client.
> >
> > Yan, Zheng (6):
> >   ceph: always renew caps if mds_wanted is insufficient
> >   ceph: consider inode's last read/write when calculating wanted caps
> >   ceph: remove delay check logic from ceph_check_caps()
> >   ceph: simplify calling of ceph_get_fmode()
> >   ceph: update i_requested_max_size only when sending cap msg to auth mds
> >   ceph: check all mds' caps after page writeback
> >
> >  fs/ceph/caps.c               | 338 ++++++++++++++++-------------------
> >  fs/ceph/file.c               |  45 ++---
> >  fs/ceph/inode.c              |  21 +--
> >  fs/ceph/ioctl.c              |   2 +
> >  fs/ceph/mds_client.c         |   5 -
> >  fs/ceph/super.h              |  37 ++--
> >  include/linux/ceph/ceph_fs.h |   1 +
> >  7 files changed, 202 insertions(+), 247 deletions(-)
> >
> > changes since v2
> >  - make __ceph_caps_file_wanted() more readable
> >  - add patch 5 and 6, which fix hung write during testing patch 1~4
> >
> > changes since v3
> >  - don't queue delayed cap check for snap inode
> >  - initialize ci->{last_rd,last_wr} to jiffies - 3600 * HZ
> >  - make __ceph_caps_file_wanted() check inode type
> >
>
> I just tried this out, and it still seems to kill unlink performance
> with -o nowsync. From the script I posted to the other thread:
>
> --------8<--------
> $ ./test-async-dirops.sh
>   File: /mnt/cephfs/test-dirops.1401
>   Size: 0               Blocks: 0          IO Block: 65536  directory
> Device: 26h/38d Inode: 1099511627778  Links: 2
> Access: (0775/drwxrwxr-x)  Uid: ( 4447/ jlayton)   Gid: ( 4447/ jlayton)
> Context: system_u:object_r:cephfs_t:s0
> Access: 2020-03-04 12:42:03.914006772 -0500
> Modify: 2020-03-04 12:42:03.914006772 -0500
> Change: 2020-03-04 12:42:03.914006772 -0500
>  Birth: 2020-03-04 12:42:03.914006772 -0500
> Creating files in /mnt/cephfs/test-dirops.1401
>
> real    0m6.269s
> user    0m0.123s
> sys     0m0.454s
>
> sync
>
> real    0m5.358s
> user    0m0.003s
> sys     0m0.011s
> Starting rm
>
> real    0m18.932s
> user    0m0.169s
> sys     0m0.713s
>
> rmdir
>
> real    0m0.135s
> user    0m0.000s
> sys     0m0.002s
>
> sync
>
> real    0m1.725s
> user    0m0.000s
> sys     0m0.002s
> --------8<--------
>
> Create and sync parts look reasonable. Usually, the rm part finishes in
> less than a second as we end up doing most of the removes
> asynchronously, but here it didn't. I suspect that means that unlink
> didn't get caps for some reason, but I'd need to do some debugging to
> figure out what's actually happening.
>

patch 7 of version 5 should fix rm issue
> --
> Jeff Layton <jlayton@kernel.org>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2020-03-05 12:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-04 17:32 [PATCH v4 0/6] ceph: don't request caps for idle open files Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 1/6] ceph: always renew caps if mds_wanted is insufficient Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 2/6] ceph: consider inode's last read/write when calculating wanted caps Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 3/6] ceph: remove delay check logic from ceph_check_caps() Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 4/6] ceph: simplify calling of ceph_get_fmode() Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 5/6] ceph: update i_requested_max_size only when sending cap msg to auth mds Yan, Zheng
2020-03-04 17:32 ` [PATCH v4 6/6] ceph: check all mds' caps after page writeback Yan, Zheng
2020-03-04 17:48 ` [PATCH v4 0/6] ceph: don't request caps for idle open files Jeff Layton
2020-03-04 19:24   ` Jeff Layton
2020-03-04 20:23     ` Jeff Layton
2020-03-05 12:23   ` Yan, Zheng

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.