ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
@ 2020-09-03 13:01 xiubli
  2020-09-03 13:01 ` [PATCH v5 1/2] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc xiubli
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: xiubli @ 2020-09-03 13:01 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Changed in V5:
- Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
- Remove the is_opened member.

Changed in V4:
- A small fix about the total_inodes.

Changed in V3:
- Resend for V2 just forgot one patch, which is adding some helpers
support to simplify the code.

Changed in V2:
- Add number of inodes that have opened files.
- Remove the dir metrics and fold into files.



Xiubo Li (2):
  ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
  ceph: metrics for opened files, pinned caps and opened inodes

 fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
 fs/ceph/debugfs.c | 11 +++++++++++
 fs/ceph/dir.c     | 20 +++++++-------------
 fs/ceph/file.c    | 13 ++++++-------
 fs/ceph/inode.c   | 11 ++++++++---
 fs/ceph/locks.c   |  2 +-
 fs/ceph/metric.c  | 14 ++++++++++++++
 fs/ceph/metric.h  |  7 +++++++
 fs/ceph/quota.c   | 10 +++++-----
 fs/ceph/snap.c    |  2 +-
 fs/ceph/super.h   |  6 ++++++
 11 files changed, 103 insertions(+), 34 deletions(-)

-- 
2.18.4


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v5 1/2] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
  2020-09-03 13:01 [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
@ 2020-09-03 13:01 ` xiubli
  2020-09-03 13:01 ` [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
  2020-09-03 14:18 ` [PATCH v5 0/2] " Jeff Layton
  2 siblings, 0 replies; 19+ messages in thread
From: xiubli @ 2020-09-03 13:01 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

This will help simplify the code.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c  |  3 +--
 fs/ceph/dir.c   | 20 +++++++-------------
 fs/ceph/file.c  |  8 +++-----
 fs/ceph/inode.c |  5 ++---
 fs/ceph/locks.c |  2 +-
 fs/ceph/quota.c | 10 +++++-----
 fs/ceph/snap.c  |  2 +-
 fs/ceph/super.h |  6 ++++++
 8 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 55ccccf77cea..0120fcb3503e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1906,9 +1906,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
 void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		     struct ceph_mds_session *session)
 {
-	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct inode *inode = &ci->vfs_inode;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_cap *cap;
 	u64 flush_tid, oldest_flush_tid;
 	int file_wanted, used, cap_used;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 060bdcc5ce32..f7c0de3f41b5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -38,8 +38,7 @@ static int __dir_lease_try_check(const struct dentry *dentry);
 static int ceph_d_init(struct dentry *dentry)
 {
 	struct ceph_dentry_info *di;
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dentry->d_sb);
 
 	di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
 	if (!di)
@@ -743,7 +742,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 				  unsigned int flags)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
 	int op;
 	int mask;
@@ -832,8 +831,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
 static int ceph_mknod(struct inode *dir, struct dentry *dentry,
 		      umode_t mode, dev_t rdev)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	int err;
@@ -894,8 +892,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 			    const char *dest)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	int err;
@@ -947,8 +944,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
 
 static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
 	int err = -EROFS;
@@ -1015,8 +1011,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
 	int err;
 
@@ -1197,8 +1192,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 		       struct inode *new_dir, struct dentry *new_dentry,
 		       unsigned int flags)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
 	struct ceph_mds_request *req;
 	int op = CEPH_MDS_OP_RENAME;
 	int err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 04ab99c0223a..29ee5f2e394a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -182,8 +182,7 @@ static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
 static struct ceph_mds_request *
 prepare_open_request(struct super_block *sb, int flags, int create_mode)
 {
-	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
-	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
 	struct ceph_mds_request *req;
 	int want_auth = USE_ANY_MDS;
 	int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
@@ -283,7 +282,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
  */
 int ceph_renew_caps(struct inode *inode, int fmode)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	int err, flags, wanted;
@@ -1050,8 +1049,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
 	struct inode *inode = req->r_inode;
 	struct ceph_aio_request *aio_req = req->r_priv;
 	struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
-	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-	struct ceph_client_metric *metric = &fsc->mdsc->metric;
+	struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
 
 	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
 	BUG_ON(!osd_data->num_bvecs);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 357c937699d5..9210cd1e859d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -559,8 +559,7 @@ void ceph_evict_inode(struct inode *inode)
 	 * caps in i_snap_caps.
 	 */
 	if (ci->i_snap_realm) {
-		struct ceph_mds_client *mdsc =
-					ceph_inode_to_client(inode)->mdsc;
+		struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 		if (ceph_snap(inode) == CEPH_NOSNAP) {
 			struct ceph_snap_realm *realm = ci->i_snap_realm;
 			dout(" dropping residual ref to snap realm %p\n",
@@ -740,7 +739,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 		    struct ceph_mds_session *session, int cap_fmode,
 		    struct ceph_cap_reservation *caps_reservation)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int issued, new_issued, info_caps;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d6b9166e71e4..048a435a29be 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -63,7 +63,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
 static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
 			     int cmd, u8 wait, struct file_lock *fl)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_mds_request *req;
 	int err;
 	u64 length = 0;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 198ddde5c1e6..4d3529f92091 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -12,7 +12,7 @@
 
 void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	if (inc)
 		atomic64_inc(&mdsc->quotarealms_count);
 	else
@@ -21,8 +21,8 @@ void ceph_adjust_quota_realms_count(struct inode *inode, bool inc)
 
 static inline bool ceph_has_realms_with_quotas(struct inode *inode)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	struct super_block *sb = mdsc->fsc->sb;
+	struct super_block *sb = inode->i_sb;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
 
 	if (atomic64_read(&mdsc->quotarealms_count) > 0)
 		return true;
@@ -266,7 +266,7 @@ static struct ceph_snap_realm *get_quota_realm(struct ceph_mds_client *mdsc,
 
 static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(old)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
 	struct ceph_snap_realm *old_realm, *new_realm;
 	bool is_same;
 
@@ -313,7 +313,7 @@ enum quota_check_op {
 static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 				 loff_t delta)
 {
-	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_inode_info *ci;
 	struct ceph_snap_realm *realm, *next;
 	struct inode *in;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 923be9399b21..0da39c16dab4 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -602,7 +602,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 			    struct ceph_cap_snap *capsnap)
 {
 	struct inode *inode = &ci->vfs_inode;
-	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 
 	BUG_ON(capsnap->writing);
 	capsnap->size = inode->i_size;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 4c3c964b1c54..cd4137b47c2b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -451,6 +451,12 @@ ceph_sb_to_client(const struct super_block *sb)
 	return (struct ceph_fs_client *)sb->s_fs_info;
 }
 
+static inline struct ceph_mds_client *
+ceph_sb_to_mdsc(const struct super_block *sb)
+{
+	return (struct ceph_mds_client *)ceph_sb_to_client(sb)->mdsc;
+}
+
 static inline struct ceph_vino
 ceph_vino(const struct inode *inode)
 {
-- 
2.18.4


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 13:01 [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
  2020-09-03 13:01 ` [PATCH v5 1/2] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc xiubli
@ 2020-09-03 13:01 ` xiubli
  2020-09-03 14:16   ` Jeff Layton
  2020-09-03 14:18 ` [PATCH v5 0/2] " Jeff Layton
  2 siblings, 1 reply; 19+ messages in thread
From: xiubli @ 2020-09-03 13:01 UTC (permalink / raw)
  To: jlayton; +Cc: idryomov, zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

In client for each inode, it may have many opened files and may
have been pinned in more than one MDS servers. And some inodes
are idle, which have no any opened files.

This patch will show these metrics in the debugfs, likes:

item                               total
-----------------------------------------
opened files  / total inodes       14 / 5
pinned i_caps / total inodes       7  / 5
opened inodes / total inodes       3  / 5

Will send these metrics to ceph, which will be used by the `fs top`,
later.

URL: https://tracker.ceph.com/issues/47005
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c    | 38 ++++++++++++++++++++++++++++++++++++--
 fs/ceph/debugfs.c | 11 +++++++++++
 fs/ceph/file.c    |  5 +++--
 fs/ceph/inode.c   |  6 ++++++
 fs/ceph/metric.c  | 14 ++++++++++++++
 fs/ceph/metric.h  |  7 +++++++
 6 files changed, 77 insertions(+), 4 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0120fcb3503e..f09461fe569b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4283,13 +4283,30 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
 
 void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
 {
-	int i;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
 	int bits = (fmode << 1) | 1;
+	bool is_opened = false;
+	int i;
+
+	if (count == 1)
+		atomic64_inc(&mdsc->metric.opened_files);
+
 	spin_lock(&ci->i_ceph_lock);
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
 		if (bits & (1 << i))
 			ci->i_nr_by_mode[i] += count;
+
+		/*
+		 * If any of the mode ref is larger than 1,
+		 * that means it has been already opened by
+		 * others. Just skip checking the PIN ref.
+		 */
+		if (i && ci->i_nr_by_mode[i] > 1)
+			is_opened = true;
 	}
+
+	if (!is_opened)
+		percpu_counter_inc(&mdsc->metric.opened_inodes);
 	spin_unlock(&ci->i_ceph_lock);
 }
 
@@ -4300,15 +4317,32 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
  */
 void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
 {
-	int i;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
 	int bits = (fmode << 1) | 1;
+	bool is_closed = true;
+	int i;
+
+	if (count == 1)
+		atomic64_dec(&mdsc->metric.opened_files);
+
 	spin_lock(&ci->i_ceph_lock);
 	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
 		if (bits & (1 << i)) {
 			BUG_ON(ci->i_nr_by_mode[i] < count);
 			ci->i_nr_by_mode[i] -= count;
 		}
+
+		/*
+		 * If any of the mode ref is not 0 after
+		 * decreased, that means it is still opened
+		 * by others. Just skip checking the PIN ref.
+		 */
+		if (i && ci->i_nr_by_mode[i])
+			is_closed = false;
 	}
+
+	if (is_closed)
+		percpu_counter_dec(&mdsc->metric.opened_inodes);
 	spin_unlock(&ci->i_ceph_lock);
 }
 
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 97539b497e4c..9efd3982230d 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -148,6 +148,17 @@ static int metric_show(struct seq_file *s, void *p)
 	int nr_caps = 0;
 	s64 total, sum, avg, min, max, sq;
 
+	sum = percpu_counter_sum(&m->total_inodes);
+	seq_printf(s, "item                               total\n");
+	seq_printf(s, "------------------------------------------\n");
+	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
+		   atomic64_read(&m->opened_files), sum);
+	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
+		   atomic64_read(&m->total_caps), sum);
+	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
+		   percpu_counter_sum(&m->opened_inodes), sum);
+
+	seq_printf(s, "\n");
 	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
 	seq_printf(s, "-----------------------------------------------------------------------------------\n");
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 29ee5f2e394a..c63ddf7e054b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -211,8 +211,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
 	BUG_ON(inode->i_fop->release != ceph_release);
 
 	if (isdir) {
-		struct ceph_dir_file_info *dfi =
-			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
+		struct ceph_dir_file_info *dfi;
+
+		dfi = kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
 		if (!dfi)
 			return -ENOMEM;
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9210cd1e859d..f2764159e05b 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -426,6 +426,7 @@ static int ceph_fill_fragtree(struct inode *inode,
  */
 struct inode *ceph_alloc_inode(struct super_block *sb)
 {
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
 	struct ceph_inode_info *ci;
 	int i;
 
@@ -525,12 +526,17 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
 	ci->i_meta_err = 0;
 
+	percpu_counter_inc(&mdsc->metric.total_inodes);
+
 	return &ci->vfs_inode;
 }
 
 void ceph_free_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
+
+	percpu_counter_dec(&mdsc->metric.total_inodes);
 
 	kfree(ci->i_symlink);
 	kmem_cache_free(ceph_inode_cachep, ci);
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index 2466b261fba2..fee4c4778313 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -192,11 +192,23 @@ int ceph_metric_init(struct ceph_client_metric *m)
 	m->total_metadatas = 0;
 	m->metadata_latency_sum = 0;
 
+	atomic64_set(&m->opened_files, 0);
+	ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
+	if (ret)
+		goto err_opened_inodes;
+	ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL);
+	if (ret)
+		goto err_total_inodes;
+
 	m->session = NULL;
 	INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
 
 	return 0;
 
+err_total_inodes:
+	percpu_counter_destroy(&m->opened_inodes);
+err_opened_inodes:
+	percpu_counter_destroy(&m->i_caps_mis);
 err_i_caps_mis:
 	percpu_counter_destroy(&m->i_caps_hit);
 err_i_caps_hit:
@@ -212,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
 	if (!m)
 		return;
 
+	percpu_counter_destroy(&m->total_inodes);
+	percpu_counter_destroy(&m->opened_inodes);
 	percpu_counter_destroy(&m->i_caps_mis);
 	percpu_counter_destroy(&m->i_caps_hit);
 	percpu_counter_destroy(&m->d_lease_mis);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 1d0959d669d7..710f3f1dceab 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -115,6 +115,13 @@ struct ceph_client_metric {
 	ktime_t metadata_latency_min;
 	ktime_t metadata_latency_max;
 
+	/* The total number of directories and files that are opened */
+	atomic64_t opened_files;
+
+	/* The total number of inodes that have opened files or directories */
+	struct percpu_counter opened_inodes;
+	struct percpu_counter total_inodes;
+
 	struct ceph_mds_session *session;
 	struct delayed_work delayed_work;  /* delayed work */
 };
-- 
2.18.4


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 13:01 ` [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
@ 2020-09-03 14:16   ` Jeff Layton
  2020-09-03 14:20     ` Xiubo Li
  0 siblings, 1 reply; 19+ messages in thread
From: Jeff Layton @ 2020-09-03 14:16 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, zyan, pdonnell, ceph-devel

On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> In client for each inode, it may have many opened files and may
> have been pinned in more than one MDS servers. And some inodes
> are idle, which have no any opened files.
> 
> This patch will show these metrics in the debugfs, likes:
> 
> item                               total
> -----------------------------------------
> opened files  / total inodes       14 / 5
> pinned i_caps / total inodes       7  / 5
> opened inodes / total inodes       3  / 5
> 
> Will send these metrics to ceph, which will be used by the `fs top`,
> later.
> 
> URL: https://tracker.ceph.com/issues/47005
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c    | 38 ++++++++++++++++++++++++++++++++++++--
>  fs/ceph/debugfs.c | 11 +++++++++++
>  fs/ceph/file.c    |  5 +++--
>  fs/ceph/inode.c   |  6 ++++++
>  fs/ceph/metric.c  | 14 ++++++++++++++
>  fs/ceph/metric.h  |  7 +++++++
>  6 files changed, 77 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 0120fcb3503e..f09461fe569b 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -4283,13 +4283,30 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
>  
>  void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
>  {
> -	int i;
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
>  	int bits = (fmode << 1) | 1;
> +	bool is_opened = false;
> +	int i;
> +
> +	if (count == 1)
> +		atomic64_inc(&mdsc->metric.opened_files);
> +
>  	spin_lock(&ci->i_ceph_lock);
>  	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
>  		if (bits & (1 << i))
>  			ci->i_nr_by_mode[i] += count;
> +
> +		/*
> +		 * If any of the mode ref is larger than 1,
> +		 * that means it has been already opened by
> +		 * others. Just skip checking the PIN ref.
> +		 */
> +		if (i && ci->i_nr_by_mode[i] > 1)
> +			is_opened = true;
>  	}
> +
> +	if (!is_opened)
> +		percpu_counter_inc(&mdsc->metric.opened_inodes);
>  	spin_unlock(&ci->i_ceph_lock);
>  }
>  
> @@ -4300,15 +4317,32 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
>   */
>  void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
>  {
> -	int i;
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
>  	int bits = (fmode << 1) | 1;
> +	bool is_closed = true;
> +	int i;
> +
> +	if (count == 1)
> +		atomic64_dec(&mdsc->metric.opened_files);
> +
>  	spin_lock(&ci->i_ceph_lock);
>  	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
>  		if (bits & (1 << i)) {
>  			BUG_ON(ci->i_nr_by_mode[i] < count);
>  			ci->i_nr_by_mode[i] -= count;
>  		}
> +
> +		/*
> +		 * If any of the mode ref is not 0 after
> +		 * decreased, that means it is still opened
> +		 * by others. Just skip checking the PIN ref.
> +		 */
> +		if (i && ci->i_nr_by_mode[i])
> +			is_closed = false;
>  	}
> +
> +	if (is_closed)
> +		percpu_counter_dec(&mdsc->metric.opened_inodes);
>  	spin_unlock(&ci->i_ceph_lock);
>  }
>  
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 97539b497e4c..9efd3982230d 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -148,6 +148,17 @@ static int metric_show(struct seq_file *s, void *p)
>  	int nr_caps = 0;
>  	s64 total, sum, avg, min, max, sq;
>  
> +	sum = percpu_counter_sum(&m->total_inodes);
> +	seq_printf(s, "item                               total\n");
> +	seq_printf(s, "------------------------------------------\n");
> +	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
> +		   atomic64_read(&m->opened_files), sum);
> +	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
> +		   atomic64_read(&m->total_caps), sum);
> +	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
> +		   percpu_counter_sum(&m->opened_inodes), sum);
> +
> +	seq_printf(s, "\n");
>  	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
>  	seq_printf(s, "-----------------------------------------------------------------------------------\n");
>  
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 29ee5f2e394a..c63ddf7e054b 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -211,8 +211,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>  	BUG_ON(inode->i_fop->release != ceph_release);
>  
>  	if (isdir) {
> -		struct ceph_dir_file_info *dfi =
> -			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
> +		struct ceph_dir_file_info *dfi;
> +
> +		dfi = kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
>  		if (!dfi)
>  			return -ENOMEM;
>  

^^^
Unrelated delta here? I'll plan to drop this hunk if there are no
objections.


> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 9210cd1e859d..f2764159e05b 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -426,6 +426,7 @@ static int ceph_fill_fragtree(struct inode *inode,
>   */
>  struct inode *ceph_alloc_inode(struct super_block *sb)
>  {
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
>  	struct ceph_inode_info *ci;
>  	int i;
>  
> @@ -525,12 +526,17 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>  
>  	ci->i_meta_err = 0;
>  
> +	percpu_counter_inc(&mdsc->metric.total_inodes);
> +
>  	return &ci->vfs_inode;
>  }
>  
>  void ceph_free_inode(struct inode *inode)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> +
> +	percpu_counter_dec(&mdsc->metric.total_inodes);
>  
>  	kfree(ci->i_symlink);
>  	kmem_cache_free(ceph_inode_cachep, ci);
> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
> index 2466b261fba2..fee4c4778313 100644
> --- a/fs/ceph/metric.c
> +++ b/fs/ceph/metric.c
> @@ -192,11 +192,23 @@ int ceph_metric_init(struct ceph_client_metric *m)
>  	m->total_metadatas = 0;
>  	m->metadata_latency_sum = 0;
>  
> +	atomic64_set(&m->opened_files, 0);
> +	ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
> +	if (ret)
> +		goto err_opened_inodes;
> +	ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL);
> +	if (ret)
> +		goto err_total_inodes;
> +
>  	m->session = NULL;
>  	INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
>  
>  	return 0;
>  
> +err_total_inodes:
> +	percpu_counter_destroy(&m->opened_inodes);
> +err_opened_inodes:
> +	percpu_counter_destroy(&m->i_caps_mis);
>  err_i_caps_mis:
>  	percpu_counter_destroy(&m->i_caps_hit);
>  err_i_caps_hit:
> @@ -212,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
>  	if (!m)
>  		return;
>  
> +	percpu_counter_destroy(&m->total_inodes);
> +	percpu_counter_destroy(&m->opened_inodes);
>  	percpu_counter_destroy(&m->i_caps_mis);
>  	percpu_counter_destroy(&m->i_caps_hit);
>  	percpu_counter_destroy(&m->d_lease_mis);
> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
> index 1d0959d669d7..710f3f1dceab 100644
> --- a/fs/ceph/metric.h
> +++ b/fs/ceph/metric.h
> @@ -115,6 +115,13 @@ struct ceph_client_metric {
>  	ktime_t metadata_latency_min;
>  	ktime_t metadata_latency_max;
>  
> +	/* The total number of directories and files that are opened */
> +	atomic64_t opened_files;
> +
> +	/* The total number of inodes that have opened files or directories */
> +	struct percpu_counter opened_inodes;
> +	struct percpu_counter total_inodes;
> +
>  	struct ceph_mds_session *session;
>  	struct delayed_work delayed_work;  /* delayed work */
>  };

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 13:01 [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
  2020-09-03 13:01 ` [PATCH v5 1/2] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc xiubli
  2020-09-03 13:01 ` [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
@ 2020-09-03 14:18 ` Jeff Layton
  2020-09-03 14:22   ` Xiubo Li
  2 siblings, 1 reply; 19+ messages in thread
From: Jeff Layton @ 2020-09-03 14:18 UTC (permalink / raw)
  To: xiubli; +Cc: idryomov, zyan, pdonnell, ceph-devel

On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Changed in V5:
> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> - Remove the is_opened member.
> 
> Changed in V4:
> - A small fix about the total_inodes.
> 
> Changed in V3:
> - Resend for V2 just forgot one patch, which is adding some helpers
> support to simplify the code.
> 
> Changed in V2:
> - Add number of inodes that have opened files.
> - Remove the dir metrics and fold into files.
> 
> 
> 
> Xiubo Li (2):
>   ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>   ceph: metrics for opened files, pinned caps and opened inodes
> 
>  fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>  fs/ceph/debugfs.c | 11 +++++++++++
>  fs/ceph/dir.c     | 20 +++++++-------------
>  fs/ceph/file.c    | 13 ++++++-------
>  fs/ceph/inode.c   | 11 ++++++++---
>  fs/ceph/locks.c   |  2 +-
>  fs/ceph/metric.c  | 14 ++++++++++++++
>  fs/ceph/metric.h  |  7 +++++++
>  fs/ceph/quota.c   | 10 +++++-----
>  fs/ceph/snap.c    |  2 +-
>  fs/ceph/super.h   |  6 ++++++
>  11 files changed, 103 insertions(+), 34 deletions(-)
> 

Looks good. I went ahead and merge this into testing.

Small merge conflict in quota.c, which I guess is probably due to not
basing this on testing branch. I also dropped what looks like an
unrelated hunk in the second patch.

In the future, if you can be sure that patches you post apply cleanly to
testing branch then that would make things easier.

Thanks!
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 14:16   ` Jeff Layton
@ 2020-09-03 14:20     ` Xiubo Li
  0 siblings, 0 replies; 19+ messages in thread
From: Xiubo Li @ 2020-09-03 14:20 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, zyan, pdonnell, ceph-devel

On 2020/9/3 22:16, Jeff Layton wrote:
> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In client for each inode, it may have many opened files and may
>> have been pinned in more than one MDS servers. And some inodes
>> are idle, which have no any opened files.
>>
>> This patch will show these metrics in the debugfs, likes:
>>
>> item                               total
>> -----------------------------------------
>> opened files  / total inodes       14 / 5
>> pinned i_caps / total inodes       7  / 5
>> opened inodes / total inodes       3  / 5
>>
>> Will send these metrics to ceph, which will be used by the `fs top`,
>> later.
>>
>> URL: https://tracker.ceph.com/issues/47005
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/caps.c    | 38 ++++++++++++++++++++++++++++++++++++--
>>   fs/ceph/debugfs.c | 11 +++++++++++
>>   fs/ceph/file.c    |  5 +++--
>>   fs/ceph/inode.c   |  6 ++++++
>>   fs/ceph/metric.c  | 14 ++++++++++++++
>>   fs/ceph/metric.h  |  7 +++++++
>>   6 files changed, 77 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 0120fcb3503e..f09461fe569b 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -4283,13 +4283,30 @@ void __ceph_touch_fmode(struct ceph_inode_info *ci,
>>   
>>   void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
>>   {
>> -	int i;
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
>>   	int bits = (fmode << 1) | 1;
>> +	bool is_opened = false;
>> +	int i;
>> +
>> +	if (count == 1)
>> +		atomic64_inc(&mdsc->metric.opened_files);
>> +
>>   	spin_lock(&ci->i_ceph_lock);
>>   	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
>>   		if (bits & (1 << i))
>>   			ci->i_nr_by_mode[i] += count;
>> +
>> +		/*
>> +		 * If any of the mode ref is larger than 1,
>> +		 * that means it has been already opened by
>> +		 * others. Just skip checking the PIN ref.
>> +		 */
>> +		if (i && ci->i_nr_by_mode[i] > 1)
>> +			is_opened = true;
>>   	}
>> +
>> +	if (!is_opened)
>> +		percpu_counter_inc(&mdsc->metric.opened_inodes);
>>   	spin_unlock(&ci->i_ceph_lock);
>>   }
>>   
>> @@ -4300,15 +4317,32 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
>>    */
>>   void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
>>   {
>> -	int i;
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
>>   	int bits = (fmode << 1) | 1;
>> +	bool is_closed = true;
>> +	int i;
>> +
>> +	if (count == 1)
>> +		atomic64_dec(&mdsc->metric.opened_files);
>> +
>>   	spin_lock(&ci->i_ceph_lock);
>>   	for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
>>   		if (bits & (1 << i)) {
>>   			BUG_ON(ci->i_nr_by_mode[i] < count);
>>   			ci->i_nr_by_mode[i] -= count;
>>   		}
>> +
>> +		/*
>> +		 * If any of the mode ref is not 0 after
>> +		 * decreased, that means it is still opened
>> +		 * by others. Just skip checking the PIN ref.
>> +		 */
>> +		if (i && ci->i_nr_by_mode[i])
>> +			is_closed = false;
>>   	}
>> +
>> +	if (is_closed)
>> +		percpu_counter_dec(&mdsc->metric.opened_inodes);
>>   	spin_unlock(&ci->i_ceph_lock);
>>   }
>>   
>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
>> index 97539b497e4c..9efd3982230d 100644
>> --- a/fs/ceph/debugfs.c
>> +++ b/fs/ceph/debugfs.c
>> @@ -148,6 +148,17 @@ static int metric_show(struct seq_file *s, void *p)
>>   	int nr_caps = 0;
>>   	s64 total, sum, avg, min, max, sq;
>>   
>> +	sum = percpu_counter_sum(&m->total_inodes);
>> +	seq_printf(s, "item                               total\n");
>> +	seq_printf(s, "------------------------------------------\n");
>> +	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
>> +		   atomic64_read(&m->opened_files), sum);
>> +	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
>> +		   atomic64_read(&m->total_caps), sum);
>> +	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
>> +		   percpu_counter_sum(&m->opened_inodes), sum);
>> +
>> +	seq_printf(s, "\n");
>>   	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
>>   	seq_printf(s, "-----------------------------------------------------------------------------------\n");
>>   
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 29ee5f2e394a..c63ddf7e054b 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -211,8 +211,9 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>>   	BUG_ON(inode->i_fop->release != ceph_release);
>>   
>>   	if (isdir) {
>> -		struct ceph_dir_file_info *dfi =
>> -			kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
>> +		struct ceph_dir_file_info *dfi;
>> +
>> +		dfi = kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
>>   		if (!dfi)
>>   			return -ENOMEM;
>>   
> ^^^
> Unrelated delta here? I'll plan to drop this hunk if there are no
> objections.
>
Yeah, sure. This should be introduced in a previous version and there 
has some other changes would depend on it, but that changes was removed 
later and this was forgot.

Thanks Jeff.


>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 9210cd1e859d..f2764159e05b 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -426,6 +426,7 @@ static int ceph_fill_fragtree(struct inode *inode,
>>    */
>>   struct inode *ceph_alloc_inode(struct super_block *sb)
>>   {
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
>>   	struct ceph_inode_info *ci;
>>   	int i;
>>   
>> @@ -525,12 +526,17 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>   
>>   	ci->i_meta_err = 0;
>>   
>> +	percpu_counter_inc(&mdsc->metric.total_inodes);
>> +
>>   	return &ci->vfs_inode;
>>   }
>>   
>>   void ceph_free_inode(struct inode *inode)
>>   {
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
>> +
>> +	percpu_counter_dec(&mdsc->metric.total_inodes);
>>   
>>   	kfree(ci->i_symlink);
>>   	kmem_cache_free(ceph_inode_cachep, ci);
>> diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
>> index 2466b261fba2..fee4c4778313 100644
>> --- a/fs/ceph/metric.c
>> +++ b/fs/ceph/metric.c
>> @@ -192,11 +192,23 @@ int ceph_metric_init(struct ceph_client_metric *m)
>>   	m->total_metadatas = 0;
>>   	m->metadata_latency_sum = 0;
>>   
>> +	atomic64_set(&m->opened_files, 0);
>> +	ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
>> +	if (ret)
>> +		goto err_opened_inodes;
>> +	ret = percpu_counter_init(&m->total_inodes, 0, GFP_KERNEL);
>> +	if (ret)
>> +		goto err_total_inodes;
>> +
>>   	m->session = NULL;
>>   	INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
>>   
>>   	return 0;
>>   
>> +err_total_inodes:
>> +	percpu_counter_destroy(&m->opened_inodes);
>> +err_opened_inodes:
>> +	percpu_counter_destroy(&m->i_caps_mis);
>>   err_i_caps_mis:
>>   	percpu_counter_destroy(&m->i_caps_hit);
>>   err_i_caps_hit:
>> @@ -212,6 +224,8 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
>>   	if (!m)
>>   		return;
>>   
>> +	percpu_counter_destroy(&m->total_inodes);
>> +	percpu_counter_destroy(&m->opened_inodes);
>>   	percpu_counter_destroy(&m->i_caps_mis);
>>   	percpu_counter_destroy(&m->i_caps_hit);
>>   	percpu_counter_destroy(&m->d_lease_mis);
>> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
>> index 1d0959d669d7..710f3f1dceab 100644
>> --- a/fs/ceph/metric.h
>> +++ b/fs/ceph/metric.h
>> @@ -115,6 +115,13 @@ struct ceph_client_metric {
>>   	ktime_t metadata_latency_min;
>>   	ktime_t metadata_latency_max;
>>   
>> +	/* The total number of directories and files that are opened */
>> +	atomic64_t opened_files;
>> +
>> +	/* The total number of inodes that have opened files or directories */
>> +	struct percpu_counter opened_inodes;
>> +	struct percpu_counter total_inodes;
>> +
>>   	struct ceph_mds_session *session;
>>   	struct delayed_work delayed_work;  /* delayed work */
>>   };



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 14:18 ` [PATCH v5 0/2] " Jeff Layton
@ 2020-09-03 14:22   ` Xiubo Li
  2020-09-09 20:34     ` Ilya Dryomov
  0 siblings, 1 reply; 19+ messages in thread
From: Xiubo Li @ 2020-09-03 14:22 UTC (permalink / raw)
  To: Jeff Layton; +Cc: idryomov, zyan, pdonnell, ceph-devel

On 2020/9/3 22:18, Jeff Layton wrote:
> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Changed in V5:
>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
>> - Remove the is_opened member.
>>
>> Changed in V4:
>> - A small fix about the total_inodes.
>>
>> Changed in V3:
>> - Resend for V2 just forgot one patch, which is adding some helpers
>> support to simplify the code.
>>
>> Changed in V2:
>> - Add number of inodes that have opened files.
>> - Remove the dir metrics and fold into files.
>>
>>
>>
>> Xiubo Li (2):
>>    ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>>    ceph: metrics for opened files, pinned caps and opened inodes
>>
>>   fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>>   fs/ceph/debugfs.c | 11 +++++++++++
>>   fs/ceph/dir.c     | 20 +++++++-------------
>>   fs/ceph/file.c    | 13 ++++++-------
>>   fs/ceph/inode.c   | 11 ++++++++---
>>   fs/ceph/locks.c   |  2 +-
>>   fs/ceph/metric.c  | 14 ++++++++++++++
>>   fs/ceph/metric.h  |  7 +++++++
>>   fs/ceph/quota.c   | 10 +++++-----
>>   fs/ceph/snap.c    |  2 +-
>>   fs/ceph/super.h   |  6 ++++++
>>   11 files changed, 103 insertions(+), 34 deletions(-)
>>
> Looks good. I went ahead and merge this into testing.
>
> Small merge conflict in quota.c, which I guess is probably due to not
> basing this on testing branch. I also dropped what looks like an
> unrelated hunk in the second patch.
>
> In the future, if you can be sure that patches you post apply cleanly to
> testing branch then that would make things easier.

Okay, will do it.

Thanks.


> Thanks!



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-03 14:22   ` Xiubo Li
@ 2020-09-09 20:34     ` Ilya Dryomov
  2020-09-10  0:59       ` Xiubo Li
  0 siblings, 1 reply; 19+ messages in thread
From: Ilya Dryomov @ 2020-09-09 20:34 UTC (permalink / raw)
  To: Xiubo Li; +Cc: Jeff Layton, Yan, Zheng, Patrick Donnelly, Ceph Development

On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
>
> On 2020/9/3 22:18, Jeff Layton wrote:
> > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> >> From: Xiubo Li <xiubli@redhat.com>
> >>
> >> Changed in V5:
> >> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> >> - Remove the is_opened member.
> >>
> >> Changed in V4:
> >> - A small fix about the total_inodes.
> >>
> >> Changed in V3:
> >> - Resend for V2 just forgot one patch, which is adding some helpers
> >> support to simplify the code.
> >>
> >> Changed in V2:
> >> - Add number of inodes that have opened files.
> >> - Remove the dir metrics and fold into files.
> >>
> >>
> >>
> >> Xiubo Li (2):
> >>    ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> >>    ceph: metrics for opened files, pinned caps and opened inodes
> >>
> >>   fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> >>   fs/ceph/debugfs.c | 11 +++++++++++
> >>   fs/ceph/dir.c     | 20 +++++++-------------
> >>   fs/ceph/file.c    | 13 ++++++-------
> >>   fs/ceph/inode.c   | 11 ++++++++---
> >>   fs/ceph/locks.c   |  2 +-
> >>   fs/ceph/metric.c  | 14 ++++++++++++++
> >>   fs/ceph/metric.h  |  7 +++++++
> >>   fs/ceph/quota.c   | 10 +++++-----
> >>   fs/ceph/snap.c    |  2 +-
> >>   fs/ceph/super.h   |  6 ++++++
> >>   11 files changed, 103 insertions(+), 34 deletions(-)
> >>
> > Looks good. I went ahead and merge this into testing.
> >
> > Small merge conflict in quota.c, which I guess is probably due to not
> > basing this on testing branch. I also dropped what looks like an
> > unrelated hunk in the second patch.
> >
> > In the future, if you can be sure that patches you post apply cleanly to
> > testing branch then that would make things easier.
>
> Okay, will do it.

Hi Xiubo,

There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
suggestion to move the decrement of total_inodes into ceph_free_inode(),
but it doesn't look like it can be easily deferred past ->evict_inode().

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-09 20:34     ` Ilya Dryomov
@ 2020-09-10  0:59       ` Xiubo Li
  2020-09-10  6:00         ` Ilya Dryomov
  0 siblings, 1 reply; 19+ messages in thread
From: Xiubo Li @ 2020-09-10  0:59 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Jeff Layton, Yan, Zheng, Patrick Donnelly, Ceph Development

On 2020/9/10 4:34, Ilya Dryomov wrote:
> On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
>> On 2020/9/3 22:18, Jeff Layton wrote:
>>> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> Changed in V5:
>>>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
>>>> - Remove the is_opened member.
>>>>
>>>> Changed in V4:
>>>> - A small fix about the total_inodes.
>>>>
>>>> Changed in V3:
>>>> - Resend for V2 just forgot one patch, which is adding some helpers
>>>> support to simplify the code.
>>>>
>>>> Changed in V2:
>>>> - Add number of inodes that have opened files.
>>>> - Remove the dir metrics and fold into files.
>>>>
>>>>
>>>>
>>>> Xiubo Li (2):
>>>>     ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>>>>     ceph: metrics for opened files, pinned caps and opened inodes
>>>>
>>>>    fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>>>>    fs/ceph/debugfs.c | 11 +++++++++++
>>>>    fs/ceph/dir.c     | 20 +++++++-------------
>>>>    fs/ceph/file.c    | 13 ++++++-------
>>>>    fs/ceph/inode.c   | 11 ++++++++---
>>>>    fs/ceph/locks.c   |  2 +-
>>>>    fs/ceph/metric.c  | 14 ++++++++++++++
>>>>    fs/ceph/metric.h  |  7 +++++++
>>>>    fs/ceph/quota.c   | 10 +++++-----
>>>>    fs/ceph/snap.c    |  2 +-
>>>>    fs/ceph/super.h   |  6 ++++++
>>>>    11 files changed, 103 insertions(+), 34 deletions(-)
>>>>
>>> Looks good. I went ahead and merge this into testing.
>>>
>>> Small merge conflict in quota.c, which I guess is probably due to not
>>> basing this on testing branch. I also dropped what looks like an
>>> unrelated hunk in the second patch.
>>>
>>> In the future, if you can be sure that patches you post apply cleanly to
>>> testing branch then that would make things easier.
>> Okay, will do it.
> Hi Xiubo,
>
> There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> suggestion to move the decrement of total_inodes into ceph_free_inode(),
> but it doesn't look like it can be easily deferred past ->evict_inode().

Okay, I will take a look.

Thanks Ilya.


> Thanks,
>
>                  Ilya
>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-10  0:59       ` Xiubo Li
@ 2020-09-10  6:00         ` Ilya Dryomov
  2020-09-10 12:13           ` Jeff Layton
  0 siblings, 1 reply; 19+ messages in thread
From: Ilya Dryomov @ 2020-09-10  6:00 UTC (permalink / raw)
  To: Xiubo Li; +Cc: Jeff Layton, Yan, Zheng, Patrick Donnelly, Ceph Development

On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
>
> On 2020/9/10 4:34, Ilya Dryomov wrote:
> > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> >> On 2020/9/3 22:18, Jeff Layton wrote:
> >>> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> >>>> From: Xiubo Li <xiubli@redhat.com>
> >>>>
> >>>> Changed in V5:
> >>>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> >>>> - Remove the is_opened member.
> >>>>
> >>>> Changed in V4:
> >>>> - A small fix about the total_inodes.
> >>>>
> >>>> Changed in V3:
> >>>> - Resend for V2 just forgot one patch, which is adding some helpers
> >>>> support to simplify the code.
> >>>>
> >>>> Changed in V2:
> >>>> - Add number of inodes that have opened files.
> >>>> - Remove the dir metrics and fold into files.
> >>>>
> >>>>
> >>>>
> >>>> Xiubo Li (2):
> >>>>     ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> >>>>     ceph: metrics for opened files, pinned caps and opened inodes
> >>>>
> >>>>    fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> >>>>    fs/ceph/debugfs.c | 11 +++++++++++
> >>>>    fs/ceph/dir.c     | 20 +++++++-------------
> >>>>    fs/ceph/file.c    | 13 ++++++-------
> >>>>    fs/ceph/inode.c   | 11 ++++++++---
> >>>>    fs/ceph/locks.c   |  2 +-
> >>>>    fs/ceph/metric.c  | 14 ++++++++++++++
> >>>>    fs/ceph/metric.h  |  7 +++++++
> >>>>    fs/ceph/quota.c   | 10 +++++-----
> >>>>    fs/ceph/snap.c    |  2 +-
> >>>>    fs/ceph/super.h   |  6 ++++++
> >>>>    11 files changed, 103 insertions(+), 34 deletions(-)
> >>>>
> >>> Looks good. I went ahead and merge this into testing.
> >>>
> >>> Small merge conflict in quota.c, which I guess is probably due to not
> >>> basing this on testing branch. I also dropped what looks like an
> >>> unrelated hunk in the second patch.
> >>>
> >>> In the future, if you can be sure that patches you post apply cleanly to
> >>> testing branch then that would make things easier.
> >> Okay, will do it.
> > Hi Xiubo,
> >
> > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > but it doesn't look like it can be easily deferred past ->evict_inode().
>
> Okay, I will take a look.

Given that it's just a counter which we don't care about if the
mount is going away, some form of "if (mdsc)" check might do, but
need to make sure that it covers possible races, if any.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-10  6:00         ` Ilya Dryomov
@ 2020-09-10 12:13           ` Jeff Layton
  2020-09-11  3:43             ` Xiubo Li
  0 siblings, 1 reply; 19+ messages in thread
From: Jeff Layton @ 2020-09-10 12:13 UTC (permalink / raw)
  To: Ilya Dryomov, Xiubo Li; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > 
> > > > > > Changed in V5:
> > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > - Remove the is_opened member.
> > > > > > 
> > > > > > Changed in V4:
> > > > > > - A small fix about the total_inodes.
> > > > > > 
> > > > > > Changed in V3:
> > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > support to simplify the code.
> > > > > > 
> > > > > > Changed in V2:
> > > > > > - Add number of inodes that have opened files.
> > > > > > - Remove the dir metrics and fold into files.
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > Xiubo Li (2):
> > > > > >     ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > >     ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > 
> > > > > >    fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > >    fs/ceph/debugfs.c | 11 +++++++++++
> > > > > >    fs/ceph/dir.c     | 20 +++++++-------------
> > > > > >    fs/ceph/file.c    | 13 ++++++-------
> > > > > >    fs/ceph/inode.c   | 11 ++++++++---
> > > > > >    fs/ceph/locks.c   |  2 +-
> > > > > >    fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > >    fs/ceph/metric.h  |  7 +++++++
> > > > > >    fs/ceph/quota.c   | 10 +++++-----
> > > > > >    fs/ceph/snap.c    |  2 +-
> > > > > >    fs/ceph/super.h   |  6 ++++++
> > > > > >    11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > 
> > > > > Looks good. I went ahead and merge this into testing.
> > > > > 
> > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > basing this on testing branch. I also dropped what looks like an
> > > > > unrelated hunk in the second patch.
> > > > > 
> > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > testing branch then that would make things easier.
> > > > Okay, will do it.
> > > Hi Xiubo,
> > > 
> > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > 
> > Okay, I will take a look.
> 
> Given that it's just a counter which we don't care about if the
> mount is going away, some form of "if (mdsc)" check might do, but
> need to make sure that it covers possible races, if any.
> 

Good catch, Ilya.

What may be best is to move the increment out of ceph_alloc_inode and
instead put it in ceph_set_ino_cb. Then the decrement can go back into
ceph_evict_inode.

That will mean that you're only counting hashed inodes, but that's
mostly what we're concerned with anyway, so I don't see that as a
problem.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-10 12:13           ` Jeff Layton
@ 2020-09-11  3:43             ` Xiubo Li
  2020-09-11 11:49               ` Jeff Layton
  0 siblings, 1 reply; 19+ messages in thread
From: Xiubo Li @ 2020-09-11  3:43 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On 2020/9/10 20:13, Jeff Layton wrote:
> On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
>> On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
>>> On 2020/9/10 4:34, Ilya Dryomov wrote:
>>>> On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
>>>>> On 2020/9/3 22:18, Jeff Layton wrote:
>>>>>> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>>
>>>>>>> Changed in V5:
>>>>>>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
>>>>>>> - Remove the is_opened member.
>>>>>>>
>>>>>>> Changed in V4:
>>>>>>> - A small fix about the total_inodes.
>>>>>>>
>>>>>>> Changed in V3:
>>>>>>> - Resend for V2 just forgot one patch, which is adding some helpers
>>>>>>> support to simplify the code.
>>>>>>>
>>>>>>> Changed in V2:
>>>>>>> - Add number of inodes that have opened files.
>>>>>>> - Remove the dir metrics and fold into files.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Xiubo Li (2):
>>>>>>>      ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>>>>>>>      ceph: metrics for opened files, pinned caps and opened inodes
>>>>>>>
>>>>>>>     fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>>>>>>>     fs/ceph/debugfs.c | 11 +++++++++++
>>>>>>>     fs/ceph/dir.c     | 20 +++++++-------------
>>>>>>>     fs/ceph/file.c    | 13 ++++++-------
>>>>>>>     fs/ceph/inode.c   | 11 ++++++++---
>>>>>>>     fs/ceph/locks.c   |  2 +-
>>>>>>>     fs/ceph/metric.c  | 14 ++++++++++++++
>>>>>>>     fs/ceph/metric.h  |  7 +++++++
>>>>>>>     fs/ceph/quota.c   | 10 +++++-----
>>>>>>>     fs/ceph/snap.c    |  2 +-
>>>>>>>     fs/ceph/super.h   |  6 ++++++
>>>>>>>     11 files changed, 103 insertions(+), 34 deletions(-)
>>>>>>>
>>>>>> Looks good. I went ahead and merge this into testing.
>>>>>>
>>>>>> Small merge conflict in quota.c, which I guess is probably due to not
>>>>>> basing this on testing branch. I also dropped what looks like an
>>>>>> unrelated hunk in the second patch.
>>>>>>
>>>>>> In the future, if you can be sure that patches you post apply cleanly to
>>>>>> testing branch then that would make things easier.
>>>>> Okay, will do it.
>>>> Hi Xiubo,
>>>>
>>>> There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
>>>> when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
>>>> in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
>>>> suggestion to move the decrement of total_inodes into ceph_free_inode(),
>>>> but it doesn't look like it can be easily deferred past ->evict_inode().
>>> Okay, I will take a look.
>> Given that it's just a counter which we don't care about if the
>> mount is going away, some form of "if (mdsc)" check might do, but
>> need to make sure that it covers possible races, if any.
>>
> Good catch, Ilya.
>
> What may be best is to move the increment out of ceph_alloc_inode and
> instead put it in ceph_set_ino_cb. Then the decrement can go back into
> ceph_evict_inode.

Hi Jeff, Ilya

Checked the code, it seems in the ceph_evict_inode() we will also hit 
the same issue .

With the '-f' options when umounting, it will skip the inodes whose 
i_count ref > 0. And then free the fsc/mdsc in ceph. And later the 
iput_final() will call the ceph_evict_inode() and then ceph_free_inode().

Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip 
the counting ?

Thanks


> That will mean that you're only counting hashed inodes, but that's
> mostly what we're concerned with anyway, so I don't see that as a
> problem.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-11  3:43             ` Xiubo Li
@ 2020-09-11 11:49               ` Jeff Layton
  2020-09-11 19:46                 ` Jeff Layton
  2020-09-12  3:54                 ` Xiubo Li
  0 siblings, 2 replies; 19+ messages in thread
From: Jeff Layton @ 2020-09-11 11:49 UTC (permalink / raw)
  To: Xiubo Li, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
> On 2020/9/10 20:13, Jeff Layton wrote:
> > On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> > > On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > > > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > 
> > > > > > > > Changed in V5:
> > > > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > > > - Remove the is_opened member.
> > > > > > > > 
> > > > > > > > Changed in V4:
> > > > > > > > - A small fix about the total_inodes.
> > > > > > > > 
> > > > > > > > Changed in V3:
> > > > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > > > support to simplify the code.
> > > > > > > > 
> > > > > > > > Changed in V2:
> > > > > > > > - Add number of inodes that have opened files.
> > > > > > > > - Remove the dir metrics and fold into files.
> > > > > > > > 
> > > > > > > > 
> > > > > > > > 
> > > > > > > > Xiubo Li (2):
> > > > > > > >      ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > > > >      ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > > > 
> > > > > > > >     fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > > > >     fs/ceph/debugfs.c | 11 +++++++++++
> > > > > > > >     fs/ceph/dir.c     | 20 +++++++-------------
> > > > > > > >     fs/ceph/file.c    | 13 ++++++-------
> > > > > > > >     fs/ceph/inode.c   | 11 ++++++++---
> > > > > > > >     fs/ceph/locks.c   |  2 +-
> > > > > > > >     fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > > > >     fs/ceph/metric.h  |  7 +++++++
> > > > > > > >     fs/ceph/quota.c   | 10 +++++-----
> > > > > > > >     fs/ceph/snap.c    |  2 +-
> > > > > > > >     fs/ceph/super.h   |  6 ++++++
> > > > > > > >     11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > > > 
> > > > > > > Looks good. I went ahead and merge this into testing.
> > > > > > > 
> > > > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > > > basing this on testing branch. I also dropped what looks like an
> > > > > > > unrelated hunk in the second patch.
> > > > > > > 
> > > > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > > > testing branch then that would make things easier.
> > > > > > Okay, will do it.
> > > > > Hi Xiubo,
> > > > > 
> > > > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > > > Okay, I will take a look.
> > > Given that it's just a counter which we don't care about if the
> > > mount is going away, some form of "if (mdsc)" check might do, but
> > > need to make sure that it covers possible races, if any.
> > > 
> > Good catch, Ilya.
> > 
> > What may be best is to move the increment out of ceph_alloc_inode and
> > instead put it in ceph_set_ino_cb. Then the decrement can go back into
> > ceph_evict_inode.
> 
> Hi Jeff, Ilya
> 
> Checked the code, it seems in the ceph_evict_inode() we will also hit 
> the same issue .
> 
> With the '-f' options when umounting, it will skip the inodes whose 
> i_count ref > 0. And then free the fsc/mdsc in ceph. And later the 
> iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
> 
> Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip 
> the counting ?
> 

Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
called before unmounting.

If what you're saying it true, then we have bigger problems.
ceph_evict_inode does this today when ci->i_snap_realm is set:

    struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;

...and then goes on to use that mdsc pointer.

I wonder if we ought to be moving move some of the operations in
ceph_kill_sb into ceph_put_super... particularly the call to
destroy_fs_client()?
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-11 11:49               ` Jeff Layton
@ 2020-09-11 19:46                 ` Jeff Layton
  2020-09-12  4:04                   ` Xiubo Li
  2020-09-13 10:40                   ` Ilya Dryomov
  2020-09-12  3:54                 ` Xiubo Li
  1 sibling, 2 replies; 19+ messages in thread
From: Jeff Layton @ 2020-09-11 19:46 UTC (permalink / raw)
  To: Xiubo Li, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On Fri, 2020-09-11 at 07:49 -0400, Jeff Layton wrote:
> On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
> > On 2020/9/10 20:13, Jeff Layton wrote:
> > > On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> > > > On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > > > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > > 
> > > > > > > > > Changed in V5:
> > > > > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > > > > - Remove the is_opened member.
> > > > > > > > > 
> > > > > > > > > Changed in V4:
> > > > > > > > > - A small fix about the total_inodes.
> > > > > > > > > 
> > > > > > > > > Changed in V3:
> > > > > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > > > > support to simplify the code.
> > > > > > > > > 
> > > > > > > > > Changed in V2:
> > > > > > > > > - Add number of inodes that have opened files.
> > > > > > > > > - Remove the dir metrics and fold into files.
> > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > 
> > > > > > > > > Xiubo Li (2):
> > > > > > > > >      ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > > > > >      ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > > > > 
> > > > > > > > >     fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > > > > >     fs/ceph/debugfs.c | 11 +++++++++++
> > > > > > > > >     fs/ceph/dir.c     | 20 +++++++-------------
> > > > > > > > >     fs/ceph/file.c    | 13 ++++++-------
> > > > > > > > >     fs/ceph/inode.c   | 11 ++++++++---
> > > > > > > > >     fs/ceph/locks.c   |  2 +-
> > > > > > > > >     fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > > > > >     fs/ceph/metric.h  |  7 +++++++
> > > > > > > > >     fs/ceph/quota.c   | 10 +++++-----
> > > > > > > > >     fs/ceph/snap.c    |  2 +-
> > > > > > > > >     fs/ceph/super.h   |  6 ++++++
> > > > > > > > >     11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > > > > 
> > > > > > > > Looks good. I went ahead and merge this into testing.
> > > > > > > > 
> > > > > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > > > > basing this on testing branch. I also dropped what looks like an
> > > > > > > > unrelated hunk in the second patch.
> > > > > > > > 
> > > > > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > > > > testing branch then that would make things easier.
> > > > > > > Okay, will do it.
> > > > > > Hi Xiubo,
> > > > > > 
> > > > > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > > > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > > > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > > > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > > > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > > > > Okay, I will take a look.
> > > > Given that it's just a counter which we don't care about if the
> > > > mount is going away, some form of "if (mdsc)" check might do, but
> > > > need to make sure that it covers possible races, if any.
> > > > 
> > > Good catch, Ilya.
> > > 
> > > What may be best is to move the increment out of ceph_alloc_inode and
> > > instead put it in ceph_set_ino_cb. Then the decrement can go back into
> > > ceph_evict_inode.
> > 
> > Hi Jeff, Ilya
> > 
> > Checked the code, it seems in the ceph_evict_inode() we will also hit 
> > the same issue .
> > 
> > With the '-f' options when umounting, it will skip the inodes whose 
> > i_count ref > 0. And then free the fsc/mdsc in ceph. And later the 
> > iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
> > 
> > Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip 
> > the counting ?
> > 
> 
> Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
> called before unmounting.
> 
> If what you're saying it true, then we have bigger problems.
> ceph_evict_inode does this today when ci->i_snap_realm is set:
> 
>     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> 
> ...and then goes on to use that mdsc pointer.
> 

Now that I look, I don't think that this is a problem. ceph_kill_sb
calls generic_shutdown_super, which calls evict_inodes before the client
is torn down. That should ensure that the mdsc is still good when evict
is called.

We will need to move the increment into the iget5_locked "set" function.
Maybe we can squash the patch below into yours?

----------------------8<---------------------------

ceph: use total_inodes to count hashed inodes instead of allocated ones

We can't guarantee that the mdsc will still be around when free_inode is
called, so move this into evict_inode instead. The increment then will
need to be moved when the thing is hashed, so move that into the set
callback.

Reported-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/inode.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5b9d2ff8af34..39c13fefba8a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work);
 static int ceph_set_ino_cb(struct inode *inode, void *data)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 
 	ci->i_vino = *(struct ceph_vino *)data;
 	inode->i_ino = ceph_vino_to_ino_t(ci->i_vino);
 	inode_set_iversion_raw(inode, 0);
+	percpu_counter_inc(&mdsc->metric.total_inodes);
+
 	return 0;
 }
 
@@ -425,7 +428,6 @@ static int ceph_fill_fragtree(struct inode *inode,
  */
 struct inode *ceph_alloc_inode(struct super_block *sb)
 {
-	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
 	struct ceph_inode_info *ci;
 	int i;
 
@@ -525,17 +527,12 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 
 	ci->i_meta_err = 0;
 
-	percpu_counter_inc(&mdsc->metric.total_inodes);
-
 	return &ci->vfs_inode;
 }
 
 void ceph_free_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
-
-	percpu_counter_dec(&mdsc->metric.total_inodes);
 
 	kfree(ci->i_symlink);
 	kmem_cache_free(ceph_inode_cachep, ci);
@@ -544,11 +541,14 @@ void ceph_free_inode(struct inode *inode)
 void ceph_evict_inode(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
 	struct ceph_inode_frag *frag;
 	struct rb_node *n;
 
 	dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
 
+	percpu_counter_dec(&mdsc->metric.total_inodes);
+
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
 
-- 
2.26.2



^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-11 11:49               ` Jeff Layton
  2020-09-11 19:46                 ` Jeff Layton
@ 2020-09-12  3:54                 ` Xiubo Li
  1 sibling, 0 replies; 19+ messages in thread
From: Xiubo Li @ 2020-09-12  3:54 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On 2020/9/11 19:49, Jeff Layton wrote:
> On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
>> On 2020/9/10 20:13, Jeff Layton wrote:
>>> On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
>>>> On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
>>>>> On 2020/9/10 4:34, Ilya Dryomov wrote:
>>>>>> On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
>>>>>>> On 2020/9/3 22:18, Jeff Layton wrote:
>>>>>>>> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>>>>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>>>>
>>>>>>>>> Changed in V5:
>>>>>>>>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
>>>>>>>>> - Remove the is_opened member.
>>>>>>>>>
>>>>>>>>> Changed in V4:
>>>>>>>>> - A small fix about the total_inodes.
>>>>>>>>>
>>>>>>>>> Changed in V3:
>>>>>>>>> - Resend for V2 just forgot one patch, which is adding some helpers
>>>>>>>>> support to simplify the code.
>>>>>>>>>
>>>>>>>>> Changed in V2:
>>>>>>>>> - Add number of inodes that have opened files.
>>>>>>>>> - Remove the dir metrics and fold into files.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Xiubo Li (2):
>>>>>>>>>       ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>>>>>>>>>       ceph: metrics for opened files, pinned caps and opened inodes
>>>>>>>>>
>>>>>>>>>      fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>>>>>>>>>      fs/ceph/debugfs.c | 11 +++++++++++
>>>>>>>>>      fs/ceph/dir.c     | 20 +++++++-------------
>>>>>>>>>      fs/ceph/file.c    | 13 ++++++-------
>>>>>>>>>      fs/ceph/inode.c   | 11 ++++++++---
>>>>>>>>>      fs/ceph/locks.c   |  2 +-
>>>>>>>>>      fs/ceph/metric.c  | 14 ++++++++++++++
>>>>>>>>>      fs/ceph/metric.h  |  7 +++++++
>>>>>>>>>      fs/ceph/quota.c   | 10 +++++-----
>>>>>>>>>      fs/ceph/snap.c    |  2 +-
>>>>>>>>>      fs/ceph/super.h   |  6 ++++++
>>>>>>>>>      11 files changed, 103 insertions(+), 34 deletions(-)
>>>>>>>>>
>>>>>>>> Looks good. I went ahead and merge this into testing.
>>>>>>>>
>>>>>>>> Small merge conflict in quota.c, which I guess is probably due to not
>>>>>>>> basing this on testing branch. I also dropped what looks like an
>>>>>>>> unrelated hunk in the second patch.
>>>>>>>>
>>>>>>>> In the future, if you can be sure that patches you post apply cleanly to
>>>>>>>> testing branch then that would make things easier.
>>>>>>> Okay, will do it.
>>>>>> Hi Xiubo,
>>>>>>
>>>>>> There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
>>>>>> when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
>>>>>> in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
>>>>>> suggestion to move the decrement of total_inodes into ceph_free_inode(),
>>>>>> but it doesn't look like it can be easily deferred past ->evict_inode().
>>>>> Okay, I will take a look.
>>>> Given that it's just a counter which we don't care about if the
>>>> mount is going away, some form of "if (mdsc)" check might do, but
>>>> need to make sure that it covers possible races, if any.
>>>>
>>> Good catch, Ilya.
>>>
>>> What may be best is to move the increment out of ceph_alloc_inode and
>>> instead put it in ceph_set_ino_cb. Then the decrement can go back into
>>> ceph_evict_inode.
>> Hi Jeff, Ilya
>>
>> Checked the code, it seems in the ceph_evict_inode() we will also hit
>> the same issue .
>>
>> With the '-f' options when umounting, it will skip the inodes whose
>> i_count ref > 0. And then free the fsc/mdsc in ceph. And later the
>> iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
>>
>> Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip
>> the counting ?
>>
> Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
> called before unmounting.

Yeah, right.

I misread the SB_FORCE and the MNT_FORCE.


> If what you're saying it true, then we have bigger problems.
> ceph_evict_inode does this today when ci->i_snap_realm is set:
>
>      struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
>
> ...and then goes on to use that mdsc pointer.
>
> I wonder if we ought to be moving move some of the operations in
> ceph_kill_sb into ceph_put_super... particularly the call to
> destroy_fs_client()?



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-11 19:46                 ` Jeff Layton
@ 2020-09-12  4:04                   ` Xiubo Li
  2020-09-12 10:18                     ` Jeff Layton
  2020-09-13 10:40                   ` Ilya Dryomov
  1 sibling, 1 reply; 19+ messages in thread
From: Xiubo Li @ 2020-09-12  4:04 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On 2020/9/12 3:46, Jeff Layton wrote:
> On Fri, 2020-09-11 at 07:49 -0400, Jeff Layton wrote:
>> On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
>>> On 2020/9/10 20:13, Jeff Layton wrote:
>>>> On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
>>>>> On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
>>>>>> On 2020/9/10 4:34, Ilya Dryomov wrote:
>>>>>>> On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
>>>>>>>> On 2020/9/3 22:18, Jeff Layton wrote:
>>>>>>>>> On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
>>>>>>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>>>>>>
>>>>>>>>>> Changed in V5:
>>>>>>>>>> - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
>>>>>>>>>> - Remove the is_opened member.
>>>>>>>>>>
>>>>>>>>>> Changed in V4:
>>>>>>>>>> - A small fix about the total_inodes.
>>>>>>>>>>
>>>>>>>>>> Changed in V3:
>>>>>>>>>> - Resend for V2 just forgot one patch, which is adding some helpers
>>>>>>>>>> support to simplify the code.
>>>>>>>>>>
>>>>>>>>>> Changed in V2:
>>>>>>>>>> - Add number of inodes that have opened files.
>>>>>>>>>> - Remove the dir metrics and fold into files.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Xiubo Li (2):
>>>>>>>>>>       ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
>>>>>>>>>>       ceph: metrics for opened files, pinned caps and opened inodes
>>>>>>>>>>
>>>>>>>>>>      fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
>>>>>>>>>>      fs/ceph/debugfs.c | 11 +++++++++++
>>>>>>>>>>      fs/ceph/dir.c     | 20 +++++++-------------
>>>>>>>>>>      fs/ceph/file.c    | 13 ++++++-------
>>>>>>>>>>      fs/ceph/inode.c   | 11 ++++++++---
>>>>>>>>>>      fs/ceph/locks.c   |  2 +-
>>>>>>>>>>      fs/ceph/metric.c  | 14 ++++++++++++++
>>>>>>>>>>      fs/ceph/metric.h  |  7 +++++++
>>>>>>>>>>      fs/ceph/quota.c   | 10 +++++-----
>>>>>>>>>>      fs/ceph/snap.c    |  2 +-
>>>>>>>>>>      fs/ceph/super.h   |  6 ++++++
>>>>>>>>>>      11 files changed, 103 insertions(+), 34 deletions(-)
>>>>>>>>>>
>>>>>>>>> Looks good. I went ahead and merge this into testing.
>>>>>>>>>
>>>>>>>>> Small merge conflict in quota.c, which I guess is probably due to not
>>>>>>>>> basing this on testing branch. I also dropped what looks like an
>>>>>>>>> unrelated hunk in the second patch.
>>>>>>>>>
>>>>>>>>> In the future, if you can be sure that patches you post apply cleanly to
>>>>>>>>> testing branch then that would make things easier.
>>>>>>>> Okay, will do it.
>>>>>>> Hi Xiubo,
>>>>>>>
>>>>>>> There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
>>>>>>> when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
>>>>>>> in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
>>>>>>> suggestion to move the decrement of total_inodes into ceph_free_inode(),
>>>>>>> but it doesn't look like it can be easily deferred past ->evict_inode().
>>>>>> Okay, I will take a look.
>>>>> Given that it's just a counter which we don't care about if the
>>>>> mount is going away, some form of "if (mdsc)" check might do, but
>>>>> need to make sure that it covers possible races, if any.
>>>>>
>>>> Good catch, Ilya.
>>>>
>>>> What may be best is to move the increment out of ceph_alloc_inode and
>>>> instead put it in ceph_set_ino_cb. Then the decrement can go back into
>>>> ceph_evict_inode.
>>> Hi Jeff, Ilya
>>>
>>> Checked the code, it seems in the ceph_evict_inode() we will also hit
>>> the same issue .
>>>
>>> With the '-f' options when umounting, it will skip the inodes whose
>>> i_count ref > 0. And then free the fsc/mdsc in ceph. And later the
>>> iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
>>>
>>> Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip
>>> the counting ?
>>>
>> Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
>> called before unmounting.
>>
>> If what you're saying it true, then we have bigger problems.
>> ceph_evict_inode does this today when ci->i_snap_realm is set:
>>
>>      struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
>>
>> ...and then goes on to use that mdsc pointer.
>>
> Now that I look, I don't think that this is a problem. ceph_kill_sb
> calls generic_shutdown_super, which calls evict_inodes before the client
> is torn down. That should ensure that the mdsc is still good when evict
> is called.
>
> We will need to move the increment into the iget5_locked "set" function.
> Maybe we can squash the patch below into yours?

Yeah, the following patch looks good.

Thanks.


>
> ----------------------8<---------------------------
>
> ceph: use total_inodes to count hashed inodes instead of allocated ones
>
> We can't guarantee that the mdsc will still be around when free_inode is
> called, so move this into evict_inode instead. The increment then will
> need to be moved when the thing is hashed, so move that into the set
> callback.
>
> Reported-by: Ilya Dryomov <idryomov@gmail.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>   fs/ceph/inode.c | 12 ++++++------
>   1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 5b9d2ff8af34..39c13fefba8a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work);
>   static int ceph_set_ino_cb(struct inode *inode, void *data)
>   {
>   	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
>   
>   	ci->i_vino = *(struct ceph_vino *)data;
>   	inode->i_ino = ceph_vino_to_ino_t(ci->i_vino);
>   	inode_set_iversion_raw(inode, 0);
> +	percpu_counter_inc(&mdsc->metric.total_inodes);
> +
>   	return 0;
>   }
>   
> @@ -425,7 +428,6 @@ static int ceph_fill_fragtree(struct inode *inode,
>    */
>   struct inode *ceph_alloc_inode(struct super_block *sb)
>   {
> -	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
>   	struct ceph_inode_info *ci;
>   	int i;
>   
> @@ -525,17 +527,12 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>   
>   	ci->i_meta_err = 0;
>   
> -	percpu_counter_inc(&mdsc->metric.total_inodes);
> -
>   	return &ci->vfs_inode;
>   }
>   
>   void ceph_free_inode(struct inode *inode)
>   {
>   	struct ceph_inode_info *ci = ceph_inode(inode);
> -	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> -
> -	percpu_counter_dec(&mdsc->metric.total_inodes);
>   
>   	kfree(ci->i_symlink);
>   	kmem_cache_free(ceph_inode_cachep, ci);
> @@ -544,11 +541,14 @@ void ceph_free_inode(struct inode *inode)
>   void ceph_evict_inode(struct inode *inode)
>   {
>   	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
>   	struct ceph_inode_frag *frag;
>   	struct rb_node *n;
>   
>   	dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode));
>   
> +	percpu_counter_dec(&mdsc->metric.total_inodes);
> +
>   	truncate_inode_pages_final(&inode->i_data);
>   	clear_inode(inode);
>   



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-12  4:04                   ` Xiubo Li
@ 2020-09-12 10:18                     ` Jeff Layton
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff Layton @ 2020-09-12 10:18 UTC (permalink / raw)
  To: Xiubo Li, Ilya Dryomov; +Cc: Yan, Zheng, Patrick Donnelly, Ceph Development

On Sat, 2020-09-12 at 12:04 +0800, Xiubo Li wrote:
> On 2020/9/12 3:46, Jeff Layton wrote:
> > On Fri, 2020-09-11 at 07:49 -0400, Jeff Layton wrote:
> > > On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
> > > > On 2020/9/10 20:13, Jeff Layton wrote:
> > > > > On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> > > > > > On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > > > > > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > > > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V5:
> > > > > > > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > > > > > > - Remove the is_opened member.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V4:
> > > > > > > > > > > - A small fix about the total_inodes.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V3:
> > > > > > > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > > > > > > support to simplify the code.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V2:
> > > > > > > > > > > - Add number of inodes that have opened files.
> > > > > > > > > > > - Remove the dir metrics and fold into files.
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > Xiubo Li (2):
> > > > > > > > > > >       ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > > > > > > >       ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > > > > > > 
> > > > > > > > > > >      fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > > > > > > >      fs/ceph/debugfs.c | 11 +++++++++++
> > > > > > > > > > >      fs/ceph/dir.c     | 20 +++++++-------------
> > > > > > > > > > >      fs/ceph/file.c    | 13 ++++++-------
> > > > > > > > > > >      fs/ceph/inode.c   | 11 ++++++++---
> > > > > > > > > > >      fs/ceph/locks.c   |  2 +-
> > > > > > > > > > >      fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > > > > > > >      fs/ceph/metric.h  |  7 +++++++
> > > > > > > > > > >      fs/ceph/quota.c   | 10 +++++-----
> > > > > > > > > > >      fs/ceph/snap.c    |  2 +-
> > > > > > > > > > >      fs/ceph/super.h   |  6 ++++++
> > > > > > > > > > >      11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > > > > > > 
> > > > > > > > > > Looks good. I went ahead and merge this into testing.
> > > > > > > > > > 
> > > > > > > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > > > > > > basing this on testing branch. I also dropped what looks like an
> > > > > > > > > > unrelated hunk in the second patch.
> > > > > > > > > > 
> > > > > > > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > > > > > > testing branch then that would make things easier.
> > > > > > > > > Okay, will do it.
> > > > > > > > Hi Xiubo,
> > > > > > > > 
> > > > > > > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > > > > > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > > > > > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > > > > > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > > > > > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > > > > > > Okay, I will take a look.
> > > > > > Given that it's just a counter which we don't care about if the
> > > > > > mount is going away, some form of "if (mdsc)" check might do, but
> > > > > > need to make sure that it covers possible races, if any.
> > > > > > 
> > > > > Good catch, Ilya.
> > > > > 
> > > > > What may be best is to move the increment out of ceph_alloc_inode and
> > > > > instead put it in ceph_set_ino_cb. Then the decrement can go back into
> > > > > ceph_evict_inode.
> > > > Hi Jeff, Ilya
> > > > 
> > > > Checked the code, it seems in the ceph_evict_inode() we will also hit
> > > > the same issue .
> > > > 
> > > > With the '-f' options when umounting, it will skip the inodes whose
> > > > i_count ref > 0. And then free the fsc/mdsc in ceph. And later the
> > > > iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
> > > > 
> > > > Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip
> > > > the counting ?
> > > > 
> > > Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
> > > called before unmounting.
> > > 
> > > If what you're saying it true, then we have bigger problems.
> > > ceph_evict_inode does this today when ci->i_snap_realm is set:
> > > 
> > >      struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> > > 
> > > ...and then goes on to use that mdsc pointer.
> > > 
> > Now that I look, I don't think that this is a problem. ceph_kill_sb
> > calls generic_shutdown_super, which calls evict_inodes before the client
> > is torn down. That should ensure that the mdsc is still good when evict
> > is called.
> > 
> > We will need to move the increment into the iget5_locked "set" function.
> > Maybe we can squash the patch below into yours?
> 
> Yeah, the following patch looks good.
> 

Great, folded and re-pushed into testing. Please take a look and make sure the resulting patch looks ok to you.

Thanks!
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-11 19:46                 ` Jeff Layton
  2020-09-12  4:04                   ` Xiubo Li
@ 2020-09-13 10:40                   ` Ilya Dryomov
  2020-09-13 12:45                     ` Jeff Layton
  1 sibling, 1 reply; 19+ messages in thread
From: Ilya Dryomov @ 2020-09-13 10:40 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Xiubo Li, Yan, Zheng, Patrick Donnelly, Ceph Development

On Fri, Sep 11, 2020 at 9:46 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Fri, 2020-09-11 at 07:49 -0400, Jeff Layton wrote:
> > On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
> > > On 2020/9/10 20:13, Jeff Layton wrote:
> > > > On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> > > > > On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > > > > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > > >
> > > > > > > > > > Changed in V5:
> > > > > > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > > > > > - Remove the is_opened member.
> > > > > > > > > >
> > > > > > > > > > Changed in V4:
> > > > > > > > > > - A small fix about the total_inodes.
> > > > > > > > > >
> > > > > > > > > > Changed in V3:
> > > > > > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > > > > > support to simplify the code.
> > > > > > > > > >
> > > > > > > > > > Changed in V2:
> > > > > > > > > > - Add number of inodes that have opened files.
> > > > > > > > > > - Remove the dir metrics and fold into files.
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > Xiubo Li (2):
> > > > > > > > > >      ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > > > > > >      ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > > > > >
> > > > > > > > > >     fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > > > > > >     fs/ceph/debugfs.c | 11 +++++++++++
> > > > > > > > > >     fs/ceph/dir.c     | 20 +++++++-------------
> > > > > > > > > >     fs/ceph/file.c    | 13 ++++++-------
> > > > > > > > > >     fs/ceph/inode.c   | 11 ++++++++---
> > > > > > > > > >     fs/ceph/locks.c   |  2 +-
> > > > > > > > > >     fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > > > > > >     fs/ceph/metric.h  |  7 +++++++
> > > > > > > > > >     fs/ceph/quota.c   | 10 +++++-----
> > > > > > > > > >     fs/ceph/snap.c    |  2 +-
> > > > > > > > > >     fs/ceph/super.h   |  6 ++++++
> > > > > > > > > >     11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > > > > >
> > > > > > > > > Looks good. I went ahead and merge this into testing.
> > > > > > > > >
> > > > > > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > > > > > basing this on testing branch. I also dropped what looks like an
> > > > > > > > > unrelated hunk in the second patch.
> > > > > > > > >
> > > > > > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > > > > > testing branch then that would make things easier.
> > > > > > > > Okay, will do it.
> > > > > > > Hi Xiubo,
> > > > > > >
> > > > > > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > > > > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > > > > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > > > > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > > > > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > > > > > Okay, I will take a look.
> > > > > Given that it's just a counter which we don't care about if the
> > > > > mount is going away, some form of "if (mdsc)" check might do, but
> > > > > need to make sure that it covers possible races, if any.
> > > > >
> > > > Good catch, Ilya.
> > > >
> > > > What may be best is to move the increment out of ceph_alloc_inode and
> > > > instead put it in ceph_set_ino_cb. Then the decrement can go back into
> > > > ceph_evict_inode.
> > >
> > > Hi Jeff, Ilya
> > >
> > > Checked the code, it seems in the ceph_evict_inode() we will also hit
> > > the same issue .
> > >
> > > With the '-f' options when umounting, it will skip the inodes whose
> > > i_count ref > 0. And then free the fsc/mdsc in ceph. And later the
> > > iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
> > >
> > > Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip
> > > the counting ?
> > >
> >
> > Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
> > called before unmounting.
> >
> > If what you're saying it true, then we have bigger problems.
> > ceph_evict_inode does this today when ci->i_snap_realm is set:
> >
> >     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> >
> > ...and then goes on to use that mdsc pointer.
> >
>
> Now that I look, I don't think that this is a problem. ceph_kill_sb
> calls generic_shutdown_super, which calls evict_inodes before the client
> is torn down. That should ensure that the mdsc is still good when evict
> is called.
>
> We will need to move the increment into the iget5_locked "set" function.
> Maybe we can squash the patch below into yours?
>
> ----------------------8<---------------------------
>
> ceph: use total_inodes to count hashed inodes instead of allocated ones
>
> We can't guarantee that the mdsc will still be around when free_inode is
> called, so move this into evict_inode instead. The increment then will
> need to be moved when the thing is hashed, so move that into the set
> callback.
>
> Reported-by: Ilya Dryomov <idryomov@gmail.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/ceph/inode.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 5b9d2ff8af34..39c13fefba8a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work);
>  static int ceph_set_ino_cb(struct inode *inode, void *data)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
>
>         ci->i_vino = *(struct ceph_vino *)data;
>         inode->i_ino = ceph_vino_to_ino_t(ci->i_vino);
>         inode_set_iversion_raw(inode, 0);
> +       percpu_counter_inc(&mdsc->metric.total_inodes);
> +
>         return 0;
>  }
>
> @@ -425,7 +428,6 @@ static int ceph_fill_fragtree(struct inode *inode,
>   */
>  struct inode *ceph_alloc_inode(struct super_block *sb)
>  {
> -       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
>         struct ceph_inode_info *ci;
>         int i;
>
> @@ -525,17 +527,12 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>
>         ci->i_meta_err = 0;
>
> -       percpu_counter_inc(&mdsc->metric.total_inodes);
> -
>         return &ci->vfs_inode;
>  }
>
>  void ceph_free_inode(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> -       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> -
> -       percpu_counter_dec(&mdsc->metric.total_inodes);
>
>         kfree(ci->i_symlink);
>         kmem_cache_free(ceph_inode_cachep, ci);
> @@ -544,11 +541,14 @@ void ceph_free_inode(struct inode *inode)
>  void ceph_evict_inode(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);

I'd also remove a duplicate mdsc variable declared in ci->i_snap_realm
branch.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes
  2020-09-13 10:40                   ` Ilya Dryomov
@ 2020-09-13 12:45                     ` Jeff Layton
  0 siblings, 0 replies; 19+ messages in thread
From: Jeff Layton @ 2020-09-13 12:45 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: Xiubo Li, Yan, Zheng, Patrick Donnelly, Ceph Development

On Sun, 2020-09-13 at 12:40 +0200, Ilya Dryomov wrote:
> On Fri, Sep 11, 2020 at 9:46 PM Jeff Layton <jlayton@kernel.org> wrote:
> > On Fri, 2020-09-11 at 07:49 -0400, Jeff Layton wrote:
> > > On Fri, 2020-09-11 at 11:43 +0800, Xiubo Li wrote:
> > > > On 2020/9/10 20:13, Jeff Layton wrote:
> > > > > On Thu, 2020-09-10 at 08:00 +0200, Ilya Dryomov wrote:
> > > > > > On Thu, Sep 10, 2020 at 2:59 AM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > On 2020/9/10 4:34, Ilya Dryomov wrote:
> > > > > > > > On Thu, Sep 3, 2020 at 4:22 PM Xiubo Li <xiubli@redhat.com> wrote:
> > > > > > > > > On 2020/9/3 22:18, Jeff Layton wrote:
> > > > > > > > > > On Thu, 2020-09-03 at 09:01 -0400, xiubli@redhat.com wrote:
> > > > > > > > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V5:
> > > > > > > > > > > - Remove mdsc parsing helpers except the ceph_sb_to_mdsc()
> > > > > > > > > > > - Remove the is_opened member.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V4:
> > > > > > > > > > > - A small fix about the total_inodes.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V3:
> > > > > > > > > > > - Resend for V2 just forgot one patch, which is adding some helpers
> > > > > > > > > > > support to simplify the code.
> > > > > > > > > > > 
> > > > > > > > > > > Changed in V2:
> > > > > > > > > > > - Add number of inodes that have opened files.
> > > > > > > > > > > - Remove the dir metrics and fold into files.
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > 
> > > > > > > > > > > Xiubo Li (2):
> > > > > > > > > > >      ceph: add ceph_sb_to_mdsc helper support to parse the mdsc
> > > > > > > > > > >      ceph: metrics for opened files, pinned caps and opened inodes
> > > > > > > > > > > 
> > > > > > > > > > >     fs/ceph/caps.c    | 41 +++++++++++++++++++++++++++++++++++++----
> > > > > > > > > > >     fs/ceph/debugfs.c | 11 +++++++++++
> > > > > > > > > > >     fs/ceph/dir.c     | 20 +++++++-------------
> > > > > > > > > > >     fs/ceph/file.c    | 13 ++++++-------
> > > > > > > > > > >     fs/ceph/inode.c   | 11 ++++++++---
> > > > > > > > > > >     fs/ceph/locks.c   |  2 +-
> > > > > > > > > > >     fs/ceph/metric.c  | 14 ++++++++++++++
> > > > > > > > > > >     fs/ceph/metric.h  |  7 +++++++
> > > > > > > > > > >     fs/ceph/quota.c   | 10 +++++-----
> > > > > > > > > > >     fs/ceph/snap.c    |  2 +-
> > > > > > > > > > >     fs/ceph/super.h   |  6 ++++++
> > > > > > > > > > >     11 files changed, 103 insertions(+), 34 deletions(-)
> > > > > > > > > > > 
> > > > > > > > > > Looks good. I went ahead and merge this into testing.
> > > > > > > > > > 
> > > > > > > > > > Small merge conflict in quota.c, which I guess is probably due to not
> > > > > > > > > > basing this on testing branch. I also dropped what looks like an
> > > > > > > > > > unrelated hunk in the second patch.
> > > > > > > > > > 
> > > > > > > > > > In the future, if you can be sure that patches you post apply cleanly to
> > > > > > > > > > testing branch then that would make things easier.
> > > > > > > > > Okay, will do it.
> > > > > > > > Hi Xiubo,
> > > > > > > > 
> > > > > > > > There is a problem with lifetimes here.  mdsc isn't guaranteed to exist
> > > > > > > > when ->free_inode() is called.  This can lead to crashes on a NULL mdsc
> > > > > > > > in ceph_free_inode() in case of e.g. "umount -f".  I know it was Jeff's
> > > > > > > > suggestion to move the decrement of total_inodes into ceph_free_inode(),
> > > > > > > > but it doesn't look like it can be easily deferred past ->evict_inode().
> > > > > > > Okay, I will take a look.
> > > > > > Given that it's just a counter which we don't care about if the
> > > > > > mount is going away, some form of "if (mdsc)" check might do, but
> > > > > > need to make sure that it covers possible races, if any.
> > > > > > 
> > > > > Good catch, Ilya.
> > > > > 
> > > > > What may be best is to move the increment out of ceph_alloc_inode and
> > > > > instead put it in ceph_set_ino_cb. Then the decrement can go back into
> > > > > ceph_evict_inode.
> > > > 
> > > > Hi Jeff, Ilya
> > > > 
> > > > Checked the code, it seems in the ceph_evict_inode() we will also hit
> > > > the same issue .
> > > > 
> > > > With the '-f' options when umounting, it will skip the inodes whose
> > > > i_count ref > 0. And then free the fsc/mdsc in ceph. And later the
> > > > iput_final() will call the ceph_evict_inode() and then ceph_free_inode().
> > > > 
> > > > Could we just check if !!(sb->s_flags & SB_ACTIVE) is false will we skip
> > > > the counting ?
> > > > 
> > > 
> > > Note that umount -f (MNT_FORCE) just means that ceph_umount_begin is
> > > called before unmounting.
> > > 
> > > If what you're saying it true, then we have bigger problems.
> > > ceph_evict_inode does this today when ci->i_snap_realm is set:
> > > 
> > >     struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> > > 
> > > ...and then goes on to use that mdsc pointer.
> > > 
> > 
> > Now that I look, I don't think that this is a problem. ceph_kill_sb
> > calls generic_shutdown_super, which calls evict_inodes before the client
> > is torn down. That should ensure that the mdsc is still good when evict
> > is called.
> > 
> > We will need to move the increment into the iget5_locked "set" function.
> > Maybe we can squash the patch below into yours?
> > 
> > ----------------------8<---------------------------
> > 
> > ceph: use total_inodes to count hashed inodes instead of allocated ones
> > 
> > We can't guarantee that the mdsc will still be around when free_inode is
> > called, so move this into evict_inode instead. The increment then will
> > need to be moved when the thing is hashed, so move that into the set
> > callback.
> > 
> > Reported-by: Ilya Dryomov <idryomov@gmail.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/ceph/inode.c | 12 ++++++------
> >  1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> > index 5b9d2ff8af34..39c13fefba8a 100644
> > --- a/fs/ceph/inode.c
> > +++ b/fs/ceph/inode.c
> > @@ -42,10 +42,13 @@ static void ceph_inode_work(struct work_struct *work);
> >  static int ceph_set_ino_cb(struct inode *inode, void *data)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> > 
> >         ci->i_vino = *(struct ceph_vino *)data;
> >         inode->i_ino = ceph_vino_to_ino_t(ci->i_vino);
> >         inode_set_iversion_raw(inode, 0);
> > +       percpu_counter_inc(&mdsc->metric.total_inodes);
> > +
> >         return 0;
> >  }
> > 
> > @@ -425,7 +428,6 @@ static int ceph_fill_fragtree(struct inode *inode,
> >   */
> >  struct inode *ceph_alloc_inode(struct super_block *sb)
> >  {
> > -       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
> >         struct ceph_inode_info *ci;
> >         int i;
> > 
> > @@ -525,17 +527,12 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> > 
> >         ci->i_meta_err = 0;
> > 
> > -       percpu_counter_inc(&mdsc->metric.total_inodes);
> > -
> >         return &ci->vfs_inode;
> >  }
> > 
> >  void ceph_free_inode(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > -       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> > -
> > -       percpu_counter_dec(&mdsc->metric.total_inodes);
> > 
> >         kfree(ci->i_symlink);
> >         kmem_cache_free(ceph_inode_cachep, ci);
> > @@ -544,11 +541,14 @@ void ceph_free_inode(struct inode *inode)
> >  void ceph_evict_inode(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
> 
> I'd also remove a duplicate mdsc variable declared in ci->i_snap_realm
> branch.
> 

Good catch. Fixed in testing branch.

Thanks,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-09-13 12:45 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-03 13:01 [PATCH v5 0/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
2020-09-03 13:01 ` [PATCH v5 1/2] ceph: add ceph_sb_to_mdsc helper support to parse the mdsc xiubli
2020-09-03 13:01 ` [PATCH v5 2/2] ceph: metrics for opened files, pinned caps and opened inodes xiubli
2020-09-03 14:16   ` Jeff Layton
2020-09-03 14:20     ` Xiubo Li
2020-09-03 14:18 ` [PATCH v5 0/2] " Jeff Layton
2020-09-03 14:22   ` Xiubo Li
2020-09-09 20:34     ` Ilya Dryomov
2020-09-10  0:59       ` Xiubo Li
2020-09-10  6:00         ` Ilya Dryomov
2020-09-10 12:13           ` Jeff Layton
2020-09-11  3:43             ` Xiubo Li
2020-09-11 11:49               ` Jeff Layton
2020-09-11 19:46                 ` Jeff Layton
2020-09-12  4:04                   ` Xiubo Li
2020-09-12 10:18                     ` Jeff Layton
2020-09-13 10:40                   ` Ilya Dryomov
2020-09-13 12:45                     ` Jeff Layton
2020-09-12  3:54                 ` Xiubo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).