All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] ceph: add remote object copy counter to fs client
@ 2021-10-20 14:37 Luís Henriques
  2021-10-20 16:27 ` Jeff Layton
  0 siblings, 1 reply; 20+ messages in thread
From: Luís Henriques @ 2021-10-20 14:37 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov
  Cc: ceph-devel, linux-kernel, Luís Henriques, Patrick Donnelly

This counter will keep track of the number of remote object copies done on
copy_file_range syscalls.  This counter will be filesystem per-client, and
can be accessed from the client debugfs directory.

Cc: Patrick Donnelly <pdonnell@redhat.com>
Signed-off-by: Luís Henriques <lhenriques@suse.de>
---
This is an RFC to reply to Patrick's request in [0].  Note that I'm not
100% sure about the usefulness of this patch, or if this is the best way
to provide the functionality Patrick requested.  Anyway, this is just to
get some feedback, hence the RFC.

Cheers,
--
Luís

[0] https://github.com/ceph/ceph/pull/42720

 fs/ceph/debugfs.c | 17 ++++++++++++++++-
 fs/ceph/file.c    |  1 +
 fs/ceph/super.c   |  1 +
 fs/ceph/super.h   |  2 ++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 38b78b45811f..09f4c04ade0e 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -346,13 +346,22 @@ static int status_show(struct seq_file *s, void *p)
 	return 0;
 }
 
+static int copyfrom_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+
+	seq_printf(s, "%llu\n", atomic64_read(&fsc->copyfrom_count));
+
+	return 0;
+}
+
 DEFINE_SHOW_ATTRIBUTE(mdsmap);
 DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
 DEFINE_SHOW_ATTRIBUTE(metric);
 DEFINE_SHOW_ATTRIBUTE(status);
-
+DEFINE_SHOW_ATTRIBUTE(copyfrom);
 
 /*
  * debugfs
@@ -387,6 +396,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_caps);
 	debugfs_remove(fsc->debugfs_metric);
 	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove(fsc->debugfs_copyfrom);
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
@@ -443,6 +453,11 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						  fsc->client->debugfs_dir,
 						  fsc,
 						  &status_fops);
+	fsc->debugfs_copyfrom = debugfs_create_file("copyfrom",
+						    0400,
+						    fsc->client->debugfs_dir,
+						    fsc,
+						    &copyfrom_fops);
 }
 
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d16fd2d5fd42..bbeb437ca4bf 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2254,6 +2254,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 				bytes = ret;
 			goto out;
 		}
+		atomic64_inc(&fsc->copyfrom_count);
 		len -= object_size;
 		bytes += object_size;
 		*src_off += object_size;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9b1b7f4cfdd4..4972554185e3 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -670,6 +670,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	fsc->have_copy_from2 = true;
 
 	atomic_long_set(&fsc->writeback_count, 0);
+	atomic64_set(&fsc->copyfrom_count, 0);
 
 	err = -ENOMEM;
 	/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a40eb14c282a..65846beca418 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -119,6 +119,7 @@ struct ceph_fs_client {
 	struct ceph_mds_client *mdsc;
 
 	atomic_long_t writeback_count;
+	atomic64_t copyfrom_count;
 
 	struct workqueue_struct *inode_wq;
 	struct workqueue_struct *cap_wq;
@@ -131,6 +132,7 @@ struct ceph_fs_client {
 	struct dentry *debugfs_metric;
 	struct dentry *debugfs_status;
 	struct dentry *debugfs_mds_sessions;
+	struct dentry *debugfs_copyfrom;
 #endif
 
 #ifdef CONFIG_CEPH_FSCACHE

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-20 14:37 [RFC PATCH] ceph: add remote object copy counter to fs client Luís Henriques
@ 2021-10-20 16:27 ` Jeff Layton
  2021-10-20 16:58   ` Luís Henriques
  2021-10-21 13:52   ` Patrick Donnelly
  0 siblings, 2 replies; 20+ messages in thread
From: Jeff Layton @ 2021-10-20 16:27 UTC (permalink / raw)
  To: Luís Henriques, Ilya Dryomov
  Cc: ceph-devel, linux-kernel, Patrick Donnelly

On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> This counter will keep track of the number of remote object copies done on
> copy_file_range syscalls.  This counter will be filesystem per-client, and
> can be accessed from the client debugfs directory.
> 
> Cc: Patrick Donnelly <pdonnell@redhat.com>
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
> This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> 100% sure about the usefulness of this patch, or if this is the best way
> to provide the functionality Patrick requested.  Anyway, this is just to
> get some feedback, hence the RFC.
> 
> Cheers,
> --
> Luís
> 
> [0] https://github.com/ceph/ceph/pull/42720
> 

I think this would be better integrated into the stats infrastructure.

Maybe you could add a new set of "copy" stats to struct
ceph_client_metric that tracks the total copy operations done, their
size and latency (similar to read and write ops)?


>  fs/ceph/debugfs.c | 17 ++++++++++++++++-
>  fs/ceph/file.c    |  1 +
>  fs/ceph/super.c   |  1 +
>  fs/ceph/super.h   |  2 ++
>  4 files changed, 20 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 38b78b45811f..09f4c04ade0e 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -346,13 +346,22 @@ static int status_show(struct seq_file *s, void *p)
>  	return 0;
>  }
>  
> +static int copyfrom_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +
> +	seq_printf(s, "%llu\n", atomic64_read(&fsc->copyfrom_count));
> +
> +	return 0;
> +}
> +
>  DEFINE_SHOW_ATTRIBUTE(mdsmap);
>  DEFINE_SHOW_ATTRIBUTE(mdsc);
>  DEFINE_SHOW_ATTRIBUTE(caps);
>  DEFINE_SHOW_ATTRIBUTE(mds_sessions);
>  DEFINE_SHOW_ATTRIBUTE(metric);
>  DEFINE_SHOW_ATTRIBUTE(status);
> -
> +DEFINE_SHOW_ATTRIBUTE(copyfrom);
>  
>  /*
>   * debugfs
> @@ -387,6 +396,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>  	debugfs_remove(fsc->debugfs_caps);
>  	debugfs_remove(fsc->debugfs_metric);
>  	debugfs_remove(fsc->debugfs_mdsc);
> +	debugfs_remove(fsc->debugfs_copyfrom);
>  }
>  
>  void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> @@ -443,6 +453,11 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>  						  fsc->client->debugfs_dir,
>  						  fsc,
>  						  &status_fops);
> +	fsc->debugfs_copyfrom = debugfs_create_file("copyfrom",
> +						    0400,
> +						    fsc->client->debugfs_dir,
> +						    fsc,
> +						    &copyfrom_fops);
>  }
>  
>  
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index d16fd2d5fd42..bbeb437ca4bf 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2254,6 +2254,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
>  				bytes = ret;
>  			goto out;
>  		}
> +		atomic64_inc(&fsc->copyfrom_count);
>  		len -= object_size;
>  		bytes += object_size;
>  		*src_off += object_size;
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 9b1b7f4cfdd4..4972554185e3 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -670,6 +670,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
>  	fsc->have_copy_from2 = true;
>  
>  	atomic_long_set(&fsc->writeback_count, 0);
> +	atomic64_set(&fsc->copyfrom_count, 0);
>  
>  	err = -ENOMEM;
>  	/*
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a40eb14c282a..65846beca418 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -119,6 +119,7 @@ struct ceph_fs_client {
>  	struct ceph_mds_client *mdsc;
>  
>  	atomic_long_t writeback_count;
> +	atomic64_t copyfrom_count;
>  
>  	struct workqueue_struct *inode_wq;
>  	struct workqueue_struct *cap_wq;
> @@ -131,6 +132,7 @@ struct ceph_fs_client {
>  	struct dentry *debugfs_metric;
>  	struct dentry *debugfs_status;
>  	struct dentry *debugfs_mds_sessions;
> +	struct dentry *debugfs_copyfrom;
>  #endif
>  
>  #ifdef CONFIG_CEPH_FSCACHE

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-20 16:27 ` Jeff Layton
@ 2021-10-20 16:58   ` Luís Henriques
  2021-10-21 13:52   ` Patrick Donnelly
  1 sibling, 0 replies; 20+ messages in thread
From: Luís Henriques @ 2021-10-20 16:58 UTC (permalink / raw)
  To: Jeff Layton; +Cc: Ilya Dryomov, ceph-devel, linux-kernel, Patrick Donnelly

On Wed, Oct 20, 2021 at 12:27:21PM -0400, Jeff Layton wrote:
> On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > This counter will keep track of the number of remote object copies done on
> > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > can be accessed from the client debugfs directory.
> > 
> > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > ---
> > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > 100% sure about the usefulness of this patch, or if this is the best way
> > to provide the functionality Patrick requested.  Anyway, this is just to
> > get some feedback, hence the RFC.
> > 
> > Cheers,
> > --
> > Luís
> > 
> > [0] https://github.com/ceph/ceph/pull/42720
> > 
> 
> I think this would be better integrated into the stats infrastructure.
> 
> Maybe you could add a new set of "copy" stats to struct
> ceph_client_metric that tracks the total copy operations done, their
> size and latency (similar to read and write ops)?

Ah, yeah.  That probably makes sense, I'll have a closer look at that
infrastructure.  I suspect that involves touching the MDS code, which
means that unfortunately it'll take me a bit longer to send out v2.

Cheers,
--
Luís


> >  fs/ceph/debugfs.c | 17 ++++++++++++++++-
> >  fs/ceph/file.c    |  1 +
> >  fs/ceph/super.c   |  1 +
> >  fs/ceph/super.h   |  2 ++
> >  4 files changed, 20 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> > index 38b78b45811f..09f4c04ade0e 100644
> > --- a/fs/ceph/debugfs.c
> > +++ b/fs/ceph/debugfs.c
> > @@ -346,13 +346,22 @@ static int status_show(struct seq_file *s, void *p)
> >  	return 0;
> >  }
> >  
> > +static int copyfrom_show(struct seq_file *s, void *p)
> > +{
> > +	struct ceph_fs_client *fsc = s->private;
> > +
> > +	seq_printf(s, "%llu\n", atomic64_read(&fsc->copyfrom_count));
> > +
> > +	return 0;
> > +}
> > +
> >  DEFINE_SHOW_ATTRIBUTE(mdsmap);
> >  DEFINE_SHOW_ATTRIBUTE(mdsc);
> >  DEFINE_SHOW_ATTRIBUTE(caps);
> >  DEFINE_SHOW_ATTRIBUTE(mds_sessions);
> >  DEFINE_SHOW_ATTRIBUTE(metric);
> >  DEFINE_SHOW_ATTRIBUTE(status);
> > -
> > +DEFINE_SHOW_ATTRIBUTE(copyfrom);
> >  
> >  /*
> >   * debugfs
> > @@ -387,6 +396,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
> >  	debugfs_remove(fsc->debugfs_caps);
> >  	debugfs_remove(fsc->debugfs_metric);
> >  	debugfs_remove(fsc->debugfs_mdsc);
> > +	debugfs_remove(fsc->debugfs_copyfrom);
> >  }
> >  
> >  void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> > @@ -443,6 +453,11 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> >  						  fsc->client->debugfs_dir,
> >  						  fsc,
> >  						  &status_fops);
> > +	fsc->debugfs_copyfrom = debugfs_create_file("copyfrom",
> > +						    0400,
> > +						    fsc->client->debugfs_dir,
> > +						    fsc,
> > +						    &copyfrom_fops);
> >  }
> >  
> >  
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index d16fd2d5fd42..bbeb437ca4bf 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -2254,6 +2254,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
> >  				bytes = ret;
> >  			goto out;
> >  		}
> > +		atomic64_inc(&fsc->copyfrom_count);
> >  		len -= object_size;
> >  		bytes += object_size;
> >  		*src_off += object_size;
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index 9b1b7f4cfdd4..4972554185e3 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -670,6 +670,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
> >  	fsc->have_copy_from2 = true;
> >  
> >  	atomic_long_set(&fsc->writeback_count, 0);
> > +	atomic64_set(&fsc->copyfrom_count, 0);
> >  
> >  	err = -ENOMEM;
> >  	/*
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index a40eb14c282a..65846beca418 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -119,6 +119,7 @@ struct ceph_fs_client {
> >  	struct ceph_mds_client *mdsc;
> >  
> >  	atomic_long_t writeback_count;
> > +	atomic64_t copyfrom_count;
> >  
> >  	struct workqueue_struct *inode_wq;
> >  	struct workqueue_struct *cap_wq;
> > @@ -131,6 +132,7 @@ struct ceph_fs_client {
> >  	struct dentry *debugfs_metric;
> >  	struct dentry *debugfs_status;
> >  	struct dentry *debugfs_mds_sessions;
> > +	struct dentry *debugfs_copyfrom;
> >  #endif
> >  
> >  #ifdef CONFIG_CEPH_FSCACHE
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
> 

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-20 16:27 ` Jeff Layton
  2021-10-20 16:58   ` Luís Henriques
@ 2021-10-21 13:52   ` Patrick Donnelly
  2021-10-21 15:43     ` Jeff Layton
  1 sibling, 1 reply; 20+ messages in thread
From: Patrick Donnelly @ 2021-10-21 13:52 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > This counter will keep track of the number of remote object copies done on
> > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > can be accessed from the client debugfs directory.
> >
> > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > ---
> > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > 100% sure about the usefulness of this patch, or if this is the best way
> > to provide the functionality Patrick requested.  Anyway, this is just to
> > get some feedback, hence the RFC.
> >
> > Cheers,
> > --
> > Luís
> >
> > [0] https://github.com/ceph/ceph/pull/42720
> >
>
> I think this would be better integrated into the stats infrastructure.
>
> Maybe you could add a new set of "copy" stats to struct
> ceph_client_metric that tracks the total copy operations done, their
> size and latency (similar to read and write ops)?

I think it's a good idea to integrate this into "stats" but I think a
local debugfs file for some counters is still useful. The "stats"
module is immature at this time and I'd rather not build any qa tests
(yet) that rely on it.

Can we generalize this patch-set to a file named "op_counters" or
similar and additionally add other OSD ops performed by the kclient?

-- 
Patrick Donnelly, Ph.D.
He / Him / His
Principal Software Engineer
Red Hat, Inc.
GPG: 19F28A586F808C2402351B93C3301A3E258DD79D


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 13:52   ` Patrick Donnelly
@ 2021-10-21 15:43     ` Jeff Layton
  2021-10-21 16:18       ` Patrick Donnelly
  0 siblings, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2021-10-21 15:43 UTC (permalink / raw)
  To: Patrick Donnelly
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > This counter will keep track of the number of remote object copies done on
> > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > can be accessed from the client debugfs directory.
> > > 
> > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > ---
> > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > 100% sure about the usefulness of this patch, or if this is the best way
> > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > get some feedback, hence the RFC.
> > > 
> > > Cheers,
> > > --
> > > Luís
> > > 
> > > [0] https://github.com/ceph/ceph/pull/42720
> > > 
> > 
> > I think this would be better integrated into the stats infrastructure.
> > 
> > Maybe you could add a new set of "copy" stats to struct
> > ceph_client_metric that tracks the total copy operations done, their
> > size and latency (similar to read and write ops)?
> 
> I think it's a good idea to integrate this into "stats" but I think a
> local debugfs file for some counters is still useful. The "stats"
> module is immature at this time and I'd rather not build any qa tests
> (yet) that rely on it.
> 
> Can we generalize this patch-set to a file named "op_counters" or
> similar and additionally add other OSD ops performed by the kclient?
> 


Tracking this sort of thing is the main purpose of the stats code. I'm
really not keen on adding a whole separate set of files for reporting
this. 

What's the specific problem with relying on the data in debugfs
"metrics" file?


-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 15:43     ` Jeff Layton
@ 2021-10-21 16:18       ` Patrick Donnelly
  2021-10-21 16:35         ` Jeff Layton
  0 siblings, 1 reply; 20+ messages in thread
From: Patrick Donnelly @ 2021-10-21 16:18 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > >
> > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > This counter will keep track of the number of remote object copies done on
> > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > can be accessed from the client debugfs directory.
> > > >
> > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > ---
> > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > get some feedback, hence the RFC.
> > > >
> > > > Cheers,
> > > > --
> > > > Luís
> > > >
> > > > [0] https://github.com/ceph/ceph/pull/42720
> > > >
> > >
> > > I think this would be better integrated into the stats infrastructure.
> > >
> > > Maybe you could add a new set of "copy" stats to struct
> > > ceph_client_metric that tracks the total copy operations done, their
> > > size and latency (similar to read and write ops)?
> >
> > I think it's a good idea to integrate this into "stats" but I think a
> > local debugfs file for some counters is still useful. The "stats"
> > module is immature at this time and I'd rather not build any qa tests
> > (yet) that rely on it.
> >
> > Can we generalize this patch-set to a file named "op_counters" or
> > similar and additionally add other OSD ops performed by the kclient?
> >
>
>
> Tracking this sort of thing is the main purpose of the stats code. I'm
> really not keen on adding a whole separate set of files for reporting
> this.

Maybe I'm confused. Is there some "file" which is already used for
this type of debugging information? Or do you mean the code for
sending stats to the MDS to support cephfs-top?

> What's the specific problem with relying on the data in debugfs
> "metrics" file?

Maybe no problem? I wasn't aware of a "metrics" file.

-- 
Patrick Donnelly, Ph.D.
He / Him / His
Principal Software Engineer
Red Hat, Inc.
GPG: 19F28A586F808C2402351B93C3301A3E258DD79D


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 16:18       ` Patrick Donnelly
@ 2021-10-21 16:35         ` Jeff Layton
  2021-10-21 17:30           ` Patrick Donnelly
  2021-10-25 10:12           ` Luís Henriques
  0 siblings, 2 replies; 20+ messages in thread
From: Jeff Layton @ 2021-10-21 16:35 UTC (permalink / raw)
  To: Patrick Donnelly
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > This counter will keep track of the number of remote object copies done on
> > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > can be accessed from the client debugfs directory.
> > > > > 
> > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > ---
> > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > get some feedback, hence the RFC.
> > > > > 
> > > > > Cheers,
> > > > > --
> > > > > Luís
> > > > > 
> > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > 
> > > > 
> > > > I think this would be better integrated into the stats infrastructure.
> > > > 
> > > > Maybe you could add a new set of "copy" stats to struct
> > > > ceph_client_metric that tracks the total copy operations done, their
> > > > size and latency (similar to read and write ops)?
> > > 
> > > I think it's a good idea to integrate this into "stats" but I think a
> > > local debugfs file for some counters is still useful. The "stats"
> > > module is immature at this time and I'd rather not build any qa tests
> > > (yet) that rely on it.
> > > 
> > > Can we generalize this patch-set to a file named "op_counters" or
> > > similar and additionally add other OSD ops performed by the kclient?
> > > 
> > 
> > 
> > Tracking this sort of thing is the main purpose of the stats code. I'm
> > really not keen on adding a whole separate set of files for reporting
> > this.
> 
> Maybe I'm confused. Is there some "file" which is already used for
> this type of debugging information? Or do you mean the code for
> sending stats to the MDS to support cephfs-top?
> 
> > What's the specific problem with relying on the data in debugfs
> > "metrics" file?
> 
> Maybe no problem? I wasn't aware of a "metrics" file.
> 

Yes. For instance:

# cat /sys/kernel/debug/ceph/*/metrics
item                               total
------------------------------------------
opened files  / total inodes       0 / 4
pinned i_caps / total inodes       5 / 4
opened inodes / total inodes       0 / 4

item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
-----------------------------------------------------------------------------------
read          0           0               0               0               0
write         5           914013          824797          1092343         103476
metadata      79          12856           1572            114572          13262

item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
----------------------------------------------------------------------------------------
read          0           0               0               0               0
write         5           4194304         4194304         4194304         20971520

item          total           miss            hit
-------------------------------------------------
d_lease       11              0               29
caps          5               68              10702


I'm proposing that Luis add new lines for "copy" to go along with the
"read" and "write" ones. The "total" counter should give you a count of
the number of operations.
 
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 16:35         ` Jeff Layton
@ 2021-10-21 17:30           ` Patrick Donnelly
  2021-10-21 17:33             ` Jeff Layton
  2021-10-26  3:05             ` Xiubo Li
  2021-10-25 10:12           ` Luís Henriques
  1 sibling, 2 replies; 20+ messages in thread
From: Patrick Donnelly @ 2021-10-21 17:30 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
>
> On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > >
> > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > >
> > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > can be accessed from the client debugfs directory.
> > > > > >
> > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > ---
> > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > get some feedback, hence the RFC.
> > > > > >
> > > > > > Cheers,
> > > > > > --
> > > > > > Luís
> > > > > >
> > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > >
> > > > >
> > > > > I think this would be better integrated into the stats infrastructure.
> > > > >
> > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > size and latency (similar to read and write ops)?
> > > >
> > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > local debugfs file for some counters is still useful. The "stats"
> > > > module is immature at this time and I'd rather not build any qa tests
> > > > (yet) that rely on it.
> > > >
> > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > similar and additionally add other OSD ops performed by the kclient?
> > > >
> > >
> > >
> > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > really not keen on adding a whole separate set of files for reporting
> > > this.
> >
> > Maybe I'm confused. Is there some "file" which is already used for
> > this type of debugging information? Or do you mean the code for
> > sending stats to the MDS to support cephfs-top?
> >
> > > What's the specific problem with relying on the data in debugfs
> > > "metrics" file?
> >
> > Maybe no problem? I wasn't aware of a "metrics" file.
> >
>
> Yes. For instance:
>
> # cat /sys/kernel/debug/ceph/*/metrics
> item                               total
> ------------------------------------------
> opened files  / total inodes       0 / 4
> pinned i_caps / total inodes       5 / 4
> opened inodes / total inodes       0 / 4
>
> item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> -----------------------------------------------------------------------------------
> read          0           0               0               0               0
> write         5           914013          824797          1092343         103476
> metadata      79          12856           1572            114572          13262
>
> item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> ----------------------------------------------------------------------------------------
> read          0           0               0               0               0
> write         5           4194304         4194304         4194304         20971520
>
> item          total           miss            hit
> -------------------------------------------------
> d_lease       11              0               29
> caps          5               68              10702
>
>
> I'm proposing that Luis add new lines for "copy" to go along with the
> "read" and "write" ones. The "total" counter should give you a count of
> the number of operations.

Okay that makes more sense!

Side note: I am a bit horrified by how computer-unfriendly that
table-formatted data is.

-- 
Patrick Donnelly, Ph.D.
He / Him / His
Principal Software Engineer
Red Hat, Inc.
GPG: 19F28A586F808C2402351B93C3301A3E258DD79D


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 17:30           ` Patrick Donnelly
@ 2021-10-21 17:33             ` Jeff Layton
  2021-10-26  3:05             ` Xiubo Li
  1 sibling, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2021-10-21 17:33 UTC (permalink / raw)
  To: Patrick Donnelly
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, 2021-10-21 at 13:30 -0400, Patrick Donnelly wrote:
> On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
> > 
> > On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > 
> > > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > > can be accessed from the client debugfs directory.
> > > > > > > 
> > > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > > ---
> > > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > > get some feedback, hence the RFC.
> > > > > > > 
> > > > > > > Cheers,
> > > > > > > --
> > > > > > > Luís
> > > > > > > 
> > > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > > 
> > > > > > 
> > > > > > I think this would be better integrated into the stats infrastructure.
> > > > > > 
> > > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > > size and latency (similar to read and write ops)?
> > > > > 
> > > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > > local debugfs file for some counters is still useful. The "stats"
> > > > > module is immature at this time and I'd rather not build any qa tests
> > > > > (yet) that rely on it.
> > > > > 
> > > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > > similar and additionally add other OSD ops performed by the kclient?
> > > > > 
> > > > 
> > > > 
> > > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > > really not keen on adding a whole separate set of files for reporting
> > > > this.
> > > 
> > > Maybe I'm confused. Is there some "file" which is already used for
> > > this type of debugging information? Or do you mean the code for
> > > sending stats to the MDS to support cephfs-top?
> > > 
> > > > What's the specific problem with relying on the data in debugfs
> > > > "metrics" file?
> > > 
> > > Maybe no problem? I wasn't aware of a "metrics" file.
> > > 
> > 
> > Yes. For instance:
> > 
> > # cat /sys/kernel/debug/ceph/*/metrics
> > item                               total
> > ------------------------------------------
> > opened files  / total inodes       0 / 4
> > pinned i_caps / total inodes       5 / 4
> > opened inodes / total inodes       0 / 4
> > 
> > item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> > -----------------------------------------------------------------------------------
> > read          0           0               0               0               0
> > write         5           914013          824797          1092343         103476
> > metadata      79          12856           1572            114572          13262
> > 
> > item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> > ----------------------------------------------------------------------------------------
> > read          0           0               0               0               0
> > write         5           4194304         4194304         4194304         20971520
> > 
> > item          total           miss            hit
> > -------------------------------------------------
> > d_lease       11              0               29
> > caps          5               68              10702
> > 
> > 
> > I'm proposing that Luis add new lines for "copy" to go along with the
> > "read" and "write" ones. The "total" counter should give you a count of
> > the number of operations.
> 
> Okay that makes more sense!
> 
> Side note: I am a bit horrified by how computer-unfriendly that
> table-formatted data is.
> 

Not going to disagree with you there. I'm happy to consider patches to
reformat that to be more machine-readable.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 16:35         ` Jeff Layton
  2021-10-21 17:30           ` Patrick Donnelly
@ 2021-10-25 10:12           ` Luís Henriques
  2021-10-25 10:20             ` Jeff Layton
  1 sibling, 1 reply; 20+ messages in thread
From: Luís Henriques @ 2021-10-25 10:12 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Patrick Donnelly, Ilya Dryomov, Ceph Development, linux-kernel

On Thu, Oct 21, 2021 at 12:35:18PM -0400, Jeff Layton wrote:
> On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > 
> > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > 
> > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > can be accessed from the client debugfs directory.
> > > > > > 
> > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > ---
> > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > get some feedback, hence the RFC.
> > > > > > 
> > > > > > Cheers,
> > > > > > --
> > > > > > Luís
> > > > > > 
> > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > 
> > > > > 
> > > > > I think this would be better integrated into the stats infrastructure.
> > > > > 
> > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > size and latency (similar to read and write ops)?
> > > > 
> > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > local debugfs file for some counters is still useful. The "stats"
> > > > module is immature at this time and I'd rather not build any qa tests
> > > > (yet) that rely on it.
> > > > 
> > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > similar and additionally add other OSD ops performed by the kclient?
> > > > 
> > > 
> > > 
> > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > really not keen on adding a whole separate set of files for reporting
> > > this.
> > 
> > Maybe I'm confused. Is there some "file" which is already used for
> > this type of debugging information? Or do you mean the code for
> > sending stats to the MDS to support cephfs-top?
> > 
> > > What's the specific problem with relying on the data in debugfs
> > > "metrics" file?
> > 
> > Maybe no problem? I wasn't aware of a "metrics" file.
> > 
> 
> Yes. For instance:
> 
> # cat /sys/kernel/debug/ceph/*/metrics
> item                               total
> ------------------------------------------
> opened files  / total inodes       0 / 4
> pinned i_caps / total inodes       5 / 4
> opened inodes / total inodes       0 / 4
> 
> item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> -----------------------------------------------------------------------------------
> read          0           0               0               0               0
> write         5           914013          824797          1092343         103476
> metadata      79          12856           1572            114572          13262
> 
> item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> ----------------------------------------------------------------------------------------
> read          0           0               0               0               0
> write         5           4194304         4194304         4194304         20971520
> 
> item          total           miss            hit
> -------------------------------------------------
> d_lease       11              0               29
> caps          5               68              10702
> 
> 
> I'm proposing that Luis add new lines for "copy" to go along with the
> "read" and "write" ones. The "total" counter should give you a count of
> the number of operations.

The problem with this is that it will require quite some work on the
MDS-side because, AFAIU, the MDS will need to handle different versions of
the CEPH_MSG_CLIENT_METRICS message (with and without the new copy-from
metrics).

Will this extra metric ever be useful on the MDS side?  From what I
understood Patrick's initial request was to have a way to find out, on the
client, if remote copies are really happening.  (*sigh* for not having
tracepoints.)

Anyway, I can look into adding this to the metrics infrastructure, but
it'll likely take me some more time to get to it and to figure out (once
again) how the messages versioning work.

Cheers,
--
Luís

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-25 10:12           ` Luís Henriques
@ 2021-10-25 10:20             ` Jeff Layton
  2021-10-25 10:52               ` Luís Henriques
  0 siblings, 1 reply; 20+ messages in thread
From: Jeff Layton @ 2021-10-25 10:20 UTC (permalink / raw)
  To: Luís Henriques
  Cc: Patrick Donnelly, Ilya Dryomov, Ceph Development, linux-kernel

On Mon, 2021-10-25 at 11:12 +0100, Luís Henriques wrote:
> On Thu, Oct 21, 2021 at 12:35:18PM -0400, Jeff Layton wrote:
> > On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > 
> > > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > 
> > > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > > can be accessed from the client debugfs directory.
> > > > > > > 
> > > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > > ---
> > > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > > get some feedback, hence the RFC.
> > > > > > > 
> > > > > > > Cheers,
> > > > > > > --
> > > > > > > Luís
> > > > > > > 
> > > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > > 
> > > > > > 
> > > > > > I think this would be better integrated into the stats infrastructure.
> > > > > > 
> > > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > > size and latency (similar to read and write ops)?
> > > > > 
> > > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > > local debugfs file for some counters is still useful. The "stats"
> > > > > module is immature at this time and I'd rather not build any qa tests
> > > > > (yet) that rely on it.
> > > > > 
> > > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > > similar and additionally add other OSD ops performed by the kclient?
> > > > > 
> > > > 
> > > > 
> > > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > > really not keen on adding a whole separate set of files for reporting
> > > > this.
> > > 
> > > Maybe I'm confused. Is there some "file" which is already used for
> > > this type of debugging information? Or do you mean the code for
> > > sending stats to the MDS to support cephfs-top?
> > > 
> > > > What's the specific problem with relying on the data in debugfs
> > > > "metrics" file?
> > > 
> > > Maybe no problem? I wasn't aware of a "metrics" file.
> > > 
> > 
> > Yes. For instance:
> > 
> > # cat /sys/kernel/debug/ceph/*/metrics
> > item                               total
> > ------------------------------------------
> > opened files  / total inodes       0 / 4
> > pinned i_caps / total inodes       5 / 4
> > opened inodes / total inodes       0 / 4
> > 
> > item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> > -----------------------------------------------------------------------------------
> > read          0           0               0               0               0
> > write         5           914013          824797          1092343         103476
> > metadata      79          12856           1572            114572          13262
> > 
> > item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> > ----------------------------------------------------------------------------------------
> > read          0           0               0               0               0
> > write         5           4194304         4194304         4194304         20971520
> > 
> > item          total           miss            hit
> > -------------------------------------------------
> > d_lease       11              0               29
> > caps          5               68              10702
> > 
> > 
> > I'm proposing that Luis add new lines for "copy" to go along with the
> > "read" and "write" ones. The "total" counter should give you a count of
> > the number of operations.
> 
> The problem with this is that it will require quite some work on the
> MDS-side because, AFAIU, the MDS will need to handle different versions of
> the CEPH_MSG_CLIENT_METRICS message (with and without the new copy-from
> metrics).
> 
> Will this extra metric ever be useful on the MDS side?  From what I
> understood Patrick's initial request was to have a way to find out, on the
> client, if remote copies are really happening.  (*sigh* for not having
> tracepoints.)
> 
> Anyway, I can look into adding this to the metrics infrastructure, but
> it'll likely take me some more time to get to it and to figure out (once
> again) how the messages versioning work.
> 

I think it is useful info to report to the MDS, but it's not required to
send these to the MDS to solve the current problem. My suggestion would
be to add what's needed to track these stats in the kclient and report
them via debugfs, but don't send the info to the MDS just yet.

Later, we could extend the protocol with COPY stats, and add the
necessary infrastructure to the MDS to deal with it. Once that's in
place, we can then extend the kclient to start sending this info along
when it reports the stats.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-25 10:20             ` Jeff Layton
@ 2021-10-25 10:52               ` Luís Henriques
  0 siblings, 0 replies; 20+ messages in thread
From: Luís Henriques @ 2021-10-25 10:52 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Patrick Donnelly, Ilya Dryomov, Ceph Development, linux-kernel

On Mon, Oct 25, 2021 at 06:20:40AM -0400, Jeff Layton wrote:
> On Mon, 2021-10-25 at 11:12 +0100, Luís Henriques wrote:
> > On Thu, Oct 21, 2021 at 12:35:18PM -0400, Jeff Layton wrote:
> > > On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > > > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > 
> > > > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > 
> > > > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > > > can be accessed from the client debugfs directory.
> > > > > > > > 
> > > > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > > > ---
> > > > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > > > get some feedback, hence the RFC.
> > > > > > > > 
> > > > > > > > Cheers,
> > > > > > > > --
> > > > > > > > Luís
> > > > > > > > 
> > > > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > > > 
> > > > > > > 
> > > > > > > I think this would be better integrated into the stats infrastructure.
> > > > > > > 
> > > > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > > > size and latency (similar to read and write ops)?
> > > > > > 
> > > > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > > > local debugfs file for some counters is still useful. The "stats"
> > > > > > module is immature at this time and I'd rather not build any qa tests
> > > > > > (yet) that rely on it.
> > > > > > 
> > > > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > > > similar and additionally add other OSD ops performed by the kclient?
> > > > > > 
> > > > > 
> > > > > 
> > > > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > > > really not keen on adding a whole separate set of files for reporting
> > > > > this.
> > > > 
> > > > Maybe I'm confused. Is there some "file" which is already used for
> > > > this type of debugging information? Or do you mean the code for
> > > > sending stats to the MDS to support cephfs-top?
> > > > 
> > > > > What's the specific problem with relying on the data in debugfs
> > > > > "metrics" file?
> > > > 
> > > > Maybe no problem? I wasn't aware of a "metrics" file.
> > > > 
> > > 
> > > Yes. For instance:
> > > 
> > > # cat /sys/kernel/debug/ceph/*/metrics
> > > item                               total
> > > ------------------------------------------
> > > opened files  / total inodes       0 / 4
> > > pinned i_caps / total inodes       5 / 4
> > > opened inodes / total inodes       0 / 4
> > > 
> > > item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> > > -----------------------------------------------------------------------------------
> > > read          0           0               0               0               0
> > > write         5           914013          824797          1092343         103476
> > > metadata      79          12856           1572            114572          13262
> > > 
> > > item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> > > ----------------------------------------------------------------------------------------
> > > read          0           0               0               0               0
> > > write         5           4194304         4194304         4194304         20971520
> > > 
> > > item          total           miss            hit
> > > -------------------------------------------------
> > > d_lease       11              0               29
> > > caps          5               68              10702
> > > 
> > > 
> > > I'm proposing that Luis add new lines for "copy" to go along with the
> > > "read" and "write" ones. The "total" counter should give you a count of
> > > the number of operations.
> > 
> > The problem with this is that it will require quite some work on the
> > MDS-side because, AFAIU, the MDS will need to handle different versions of
> > the CEPH_MSG_CLIENT_METRICS message (with and without the new copy-from
> > metrics).
> > 
> > Will this extra metric ever be useful on the MDS side?  From what I
> > understood Patrick's initial request was to have a way to find out, on the
> > client, if remote copies are really happening.  (*sigh* for not having
> > tracepoints.)
> > 
> > Anyway, I can look into adding this to the metrics infrastructure, but
> > it'll likely take me some more time to get to it and to figure out (once
> > again) how the messages versioning work.
> > 
> 
> I think it is useful info to report to the MDS, but it's not required to
> send these to the MDS to solve the current problem. My suggestion would
> be to add what's needed to track these stats in the kclient and report
> them via debugfs, but don't send the info to the MDS just yet.
> 
> Later, we could extend the protocol with COPY stats, and add the
> necessary infrastructure to the MDS to deal with it. Once that's in
> place, we can then extend the kclient to start sending this info along
> when it reports the stats.

Awesome, that sounds good to me.  I'll look into re-writing this patch
following your suggestion.  Thanks!

Cheers,
--
Luís

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-21 17:30           ` Patrick Donnelly
  2021-10-21 17:33             ` Jeff Layton
@ 2021-10-26  3:05             ` Xiubo Li
  2021-10-26 11:40               ` Jeff Layton
  1 sibling, 1 reply; 20+ messages in thread
From: Xiubo Li @ 2021-10-26  3:05 UTC (permalink / raw)
  To: Patrick Donnelly, Jeff Layton
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel


On 10/22/21 1:30 AM, Patrick Donnelly wrote:
> On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
>> On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
>>> On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
>>>> On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
>>>>> On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>>>> On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
>>>>>>> This counter will keep track of the number of remote object copies done on
>>>>>>> copy_file_range syscalls.  This counter will be filesystem per-client, and
>>>>>>> can be accessed from the client debugfs directory.
>>>>>>>
>>>>>>> Cc: Patrick Donnelly <pdonnell@redhat.com>
>>>>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>>>>> ---
>>>>>>> This is an RFC to reply to Patrick's request in [0].  Note that I'm not
>>>>>>> 100% sure about the usefulness of this patch, or if this is the best way
>>>>>>> to provide the functionality Patrick requested.  Anyway, this is just to
>>>>>>> get some feedback, hence the RFC.
>>>>>>>
>>>>>>> Cheers,
>>>>>>> --
>>>>>>> Luís
>>>>>>>
>>>>>>> [0] https://github.com/ceph/ceph/pull/42720
>>>>>>>
>>>>>> I think this would be better integrated into the stats infrastructure.
>>>>>>
>>>>>> Maybe you could add a new set of "copy" stats to struct
>>>>>> ceph_client_metric that tracks the total copy operations done, their
>>>>>> size and latency (similar to read and write ops)?
>>>>> I think it's a good idea to integrate this into "stats" but I think a
>>>>> local debugfs file for some counters is still useful. The "stats"
>>>>> module is immature at this time and I'd rather not build any qa tests
>>>>> (yet) that rely on it.
>>>>>
>>>>> Can we generalize this patch-set to a file named "op_counters" or
>>>>> similar and additionally add other OSD ops performed by the kclient?
>>>>>
>>>>
>>>> Tracking this sort of thing is the main purpose of the stats code. I'm
>>>> really not keen on adding a whole separate set of files for reporting
>>>> this.
>>> Maybe I'm confused. Is there some "file" which is already used for
>>> this type of debugging information? Or do you mean the code for
>>> sending stats to the MDS to support cephfs-top?
>>>
>>>> What's the specific problem with relying on the data in debugfs
>>>> "metrics" file?
>>> Maybe no problem? I wasn't aware of a "metrics" file.
>>>
>> Yes. For instance:
>>
>> # cat /sys/kernel/debug/ceph/*/metrics
>> item                               total
>> ------------------------------------------
>> opened files  / total inodes       0 / 4
>> pinned i_caps / total inodes       5 / 4
>> opened inodes / total inodes       0 / 4
>>
>> item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
>> -----------------------------------------------------------------------------------
>> read          0           0               0               0               0
>> write         5           914013          824797          1092343         103476
>> metadata      79          12856           1572            114572          13262
>>
>> item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
>> ----------------------------------------------------------------------------------------
>> read          0           0               0               0               0
>> write         5           4194304         4194304         4194304         20971520
>>
>> item          total           miss            hit
>> -------------------------------------------------
>> d_lease       11              0               29
>> caps          5               68              10702
>>
>>
>> I'm proposing that Luis add new lines for "copy" to go along with the
>> "read" and "write" ones. The "total" counter should give you a count of
>> the number of operations.
> Okay that makes more sense!
>
> Side note: I am a bit horrified by how computer-unfriendly that
> table-formatted data is.

Any suggestion to improve this ?

How about just make the "metric" file writable like a switch ? And as 
default it will show the data as above and if tools want the 
computer-friendly format, just write none-zero to it, then show raw data 
just like:

# cat /sys/kernel/debug/ceph/*/metrics
opened_files:0
pinned_i_caps:5
opened_inodes:0
total_inodes:4

read_latency:0,0,0,0,0
write_latency:5,914013,824797,1092343,103476
metadata_latency:79,12856,1572,114572,13262

read_size:0,0,0,0,0
write_size:5,4194304,4194304,4194304,20971520

d_lease:11,0,29
caps:5,68,10702



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-26  3:05             ` Xiubo Li
@ 2021-10-26 11:40               ` Jeff Layton
  2021-10-26 15:31                 ` Luís Henriques
  2021-10-27  4:52                 ` [RFC PATCH] ceph: add remote object copy counter to fs client Xiubo Li
  0 siblings, 2 replies; 20+ messages in thread
From: Jeff Layton @ 2021-10-26 11:40 UTC (permalink / raw)
  To: Xiubo Li, Patrick Donnelly
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel

On Tue, 2021-10-26 at 11:05 +0800, Xiubo Li wrote:
> On 10/22/21 1:30 AM, Patrick Donnelly wrote:
> > On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > > > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > > > can be accessed from the client debugfs directory.
> > > > > > > > 
> > > > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > > > ---
> > > > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > > > get some feedback, hence the RFC.
> > > > > > > > 
> > > > > > > > Cheers,
> > > > > > > > --
> > > > > > > > Luís
> > > > > > > > 
> > > > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > > > 
> > > > > > > I think this would be better integrated into the stats infrastructure.
> > > > > > > 
> > > > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > > > size and latency (similar to read and write ops)?
> > > > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > > > local debugfs file for some counters is still useful. The "stats"
> > > > > > module is immature at this time and I'd rather not build any qa tests
> > > > > > (yet) that rely on it.
> > > > > > 
> > > > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > > > similar and additionally add other OSD ops performed by the kclient?
> > > > > > 
> > > > > 
> > > > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > > > really not keen on adding a whole separate set of files for reporting
> > > > > this.
> > > > Maybe I'm confused. Is there some "file" which is already used for
> > > > this type of debugging information? Or do you mean the code for
> > > > sending stats to the MDS to support cephfs-top?
> > > > 
> > > > > What's the specific problem with relying on the data in debugfs
> > > > > "metrics" file?
> > > > Maybe no problem? I wasn't aware of a "metrics" file.
> > > > 
> > > Yes. For instance:
> > > 
> > > # cat /sys/kernel/debug/ceph/*/metrics
> > > item                               total
> > > ------------------------------------------
> > > opened files  / total inodes       0 / 4
> > > pinned i_caps / total inodes       5 / 4
> > > opened inodes / total inodes       0 / 4
> > > 
> > > item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> > > -----------------------------------------------------------------------------------
> > > read          0           0               0               0               0
> > > write         5           914013          824797          1092343         103476
> > > metadata      79          12856           1572            114572          13262
> > > 
> > > item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> > > ----------------------------------------------------------------------------------------
> > > read          0           0               0               0               0
> > > write         5           4194304         4194304         4194304         20971520
> > > 
> > > item          total           miss            hit
> > > -------------------------------------------------
> > > d_lease       11              0               29
> > > caps          5               68              10702
> > > 
> > > 
> > > I'm proposing that Luis add new lines for "copy" to go along with the
> > > "read" and "write" ones. The "total" counter should give you a count of
> > > the number of operations.
> > Okay that makes more sense!
> > 
> > Side note: I am a bit horrified by how computer-unfriendly that
> > table-formatted data is.
> 
> Any suggestion to improve this ?
> 
> How about just make the "metric" file writable like a switch ? And as 
> default it will show the data as above and if tools want the 
> computer-friendly format, just write none-zero to it, then show raw data 
> just like:
> 
> # cat /sys/kernel/debug/ceph/*/metrics
> opened_files:0
> pinned_i_caps:5
> opened_inodes:0
> total_inodes:4
> 
> read_latency:0,0,0,0,0
> write_latency:5,914013,824797,1092343,103476
> metadata_latency:79,12856,1572,114572,13262
> 
> read_size:0,0,0,0,0
> write_size:5,4194304,4194304,4194304,20971520
> 
> d_lease:11,0,29
> caps:5,68,10702
> 
> 

I'd rather not multiplex the output of this file based on some input.
That would also be rather hard to do -- write() and read() are two
different syscalls, so you'd need to track a bool (or something) across
them somehow.

Currently, I doubt there are many scripts in the field that scrape this
info and debugfs is specifically excluded from ABI concerns. If we want
to make it more machine-readable (which sounds like a good thing), then
I suggest we just change the output to something like what you have
above and not worry about preserving the "legacy" output.
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-26 11:40               ` Jeff Layton
@ 2021-10-26 15:31                 ` Luís Henriques
  2021-10-27  6:46                   ` Xiubo Li
  2021-10-27  4:52                 ` [RFC PATCH] ceph: add remote object copy counter to fs client Xiubo Li
  1 sibling, 1 reply; 20+ messages in thread
From: Luís Henriques @ 2021-10-26 15:31 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Xiubo Li, Patrick Donnelly, Ilya Dryomov, Ceph Development, linux-kernel

On Tue, Oct 26, 2021 at 07:40:51AM -0400, Jeff Layton wrote:
> On Tue, 2021-10-26 at 11:05 +0800, Xiubo Li wrote:
> > On 10/22/21 1:30 AM, Patrick Donnelly wrote:
> > > On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
> > > > > On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
> > > > > > > On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
> > > > > > > > On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
> > > > > > > > > This counter will keep track of the number of remote object copies done on
> > > > > > > > > copy_file_range syscalls.  This counter will be filesystem per-client, and
> > > > > > > > > can be accessed from the client debugfs directory.
> > > > > > > > > 
> > > > > > > > > Cc: Patrick Donnelly <pdonnell@redhat.com>
> > > > > > > > > Signed-off-by: Luís Henriques <lhenriques@suse.de>
> > > > > > > > > ---
> > > > > > > > > This is an RFC to reply to Patrick's request in [0].  Note that I'm not
> > > > > > > > > 100% sure about the usefulness of this patch, or if this is the best way
> > > > > > > > > to provide the functionality Patrick requested.  Anyway, this is just to
> > > > > > > > > get some feedback, hence the RFC.
> > > > > > > > > 
> > > > > > > > > Cheers,
> > > > > > > > > --
> > > > > > > > > Luís
> > > > > > > > > 
> > > > > > > > > [0] https://github.com/ceph/ceph/pull/42720
> > > > > > > > > 
> > > > > > > > I think this would be better integrated into the stats infrastructure.
> > > > > > > > 
> > > > > > > > Maybe you could add a new set of "copy" stats to struct
> > > > > > > > ceph_client_metric that tracks the total copy operations done, their
> > > > > > > > size and latency (similar to read and write ops)?
> > > > > > > I think it's a good idea to integrate this into "stats" but I think a
> > > > > > > local debugfs file for some counters is still useful. The "stats"
> > > > > > > module is immature at this time and I'd rather not build any qa tests
> > > > > > > (yet) that rely on it.
> > > > > > > 
> > > > > > > Can we generalize this patch-set to a file named "op_counters" or
> > > > > > > similar and additionally add other OSD ops performed by the kclient?
> > > > > > > 
> > > > > > 
> > > > > > Tracking this sort of thing is the main purpose of the stats code. I'm
> > > > > > really not keen on adding a whole separate set of files for reporting
> > > > > > this.
> > > > > Maybe I'm confused. Is there some "file" which is already used for
> > > > > this type of debugging information? Or do you mean the code for
> > > > > sending stats to the MDS to support cephfs-top?
> > > > > 
> > > > > > What's the specific problem with relying on the data in debugfs
> > > > > > "metrics" file?
> > > > > Maybe no problem? I wasn't aware of a "metrics" file.
> > > > > 
> > > > Yes. For instance:
> > > > 
> > > > # cat /sys/kernel/debug/ceph/*/metrics
> > > > item                               total
> > > > ------------------------------------------
> > > > opened files  / total inodes       0 / 4
> > > > pinned i_caps / total inodes       5 / 4
> > > > opened inodes / total inodes       0 / 4
> > > > 
> > > > item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
> > > > -----------------------------------------------------------------------------------
> > > > read          0           0               0               0               0
> > > > write         5           914013          824797          1092343         103476
> > > > metadata      79          12856           1572            114572          13262
> > > > 
> > > > item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
> > > > ----------------------------------------------------------------------------------------
> > > > read          0           0               0               0               0
> > > > write         5           4194304         4194304         4194304         20971520
> > > > 
> > > > item          total           miss            hit
> > > > -------------------------------------------------
> > > > d_lease       11              0               29
> > > > caps          5               68              10702
> > > > 
> > > > 
> > > > I'm proposing that Luis add new lines for "copy" to go along with the
> > > > "read" and "write" ones. The "total" counter should give you a count of
> > > > the number of operations.
> > > Okay that makes more sense!
> > > 
> > > Side note: I am a bit horrified by how computer-unfriendly that
> > > table-formatted data is.
> > 
> > Any suggestion to improve this ?
> > 
> > How about just make the "metric" file writable like a switch ? And as 
> > default it will show the data as above and if tools want the 
> > computer-friendly format, just write none-zero to it, then show raw data 
> > just like:
> > 
> > # cat /sys/kernel/debug/ceph/*/metrics
> > opened_files:0
> > pinned_i_caps:5
> > opened_inodes:0
> > total_inodes:4
> > 
> > read_latency:0,0,0,0,0
> > write_latency:5,914013,824797,1092343,103476
> > metadata_latency:79,12856,1572,114572,13262
> > 
> > read_size:0,0,0,0,0
> > write_size:5,4194304,4194304,4194304,20971520
> > 
> > d_lease:11,0,29
> > caps:5,68,10702
> > 
> > 
> 
> I'd rather not multiplex the output of this file based on some input.
> That would also be rather hard to do -- write() and read() are two
> different syscalls, so you'd need to track a bool (or something) across
> them somehow.
> 
> Currently, I doubt there are many scripts in the field that scrape this
> info and debugfs is specifically excluded from ABI concerns. If we want
> to make it more machine-readable (which sounds like a good thing), then
> I suggest we just change the output to something like what you have
> above and not worry about preserving the "legacy" output.

Ok, before submitting any new revision of this patch I should probably
clean this up.  I can submit a patch to change the format to what Xiubo is
proposing.  Obviously, that patch will also need to document what all
those fields actually mean.

Alternatively, the metrics file could be changed into a directory and have
4 different files, one per each section:

  metrics/
   |- files <-- not sure how to name the 1st section
   |- latency
   |- size
   \- caps

Each of these files would then have the header but, since it's a single
header, parsing it in a script would be pretty easy.  The advantage is
that this would be self-documented (with filenames and headers).

Cheers,
--
Luís

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-26 11:40               ` Jeff Layton
  2021-10-26 15:31                 ` Luís Henriques
@ 2021-10-27  4:52                 ` Xiubo Li
  1 sibling, 0 replies; 20+ messages in thread
From: Xiubo Li @ 2021-10-27  4:52 UTC (permalink / raw)
  To: Jeff Layton, Patrick Donnelly
  Cc: Luís Henriques, Ilya Dryomov, Ceph Development, linux-kernel


On 10/26/21 7:40 PM, Jeff Layton wrote:
> On Tue, 2021-10-26 at 11:05 +0800, Xiubo Li wrote:
>> On 10/22/21 1:30 AM, Patrick Donnelly wrote:
>>> On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>> On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
>>>>> On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
>>>>>> On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
>>>>>>> On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>>>>>> On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
>>>>>>>>> This counter will keep track of the number of remote object copies done on
>>>>>>>>> copy_file_range syscalls.  This counter will be filesystem per-client, and
>>>>>>>>> can be accessed from the client debugfs directory.
>>>>>>>>>
>>>>>>>>> Cc: Patrick Donnelly <pdonnell@redhat.com>
>>>>>>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>>>>>>> ---
>>>>>>>>> This is an RFC to reply to Patrick's request in [0].  Note that I'm not
>>>>>>>>> 100% sure about the usefulness of this patch, or if this is the best way
>>>>>>>>> to provide the functionality Patrick requested.  Anyway, this is just to
>>>>>>>>> get some feedback, hence the RFC.
>>>>>>>>>
>>>>>>>>> Cheers,
>>>>>>>>> --
>>>>>>>>> Luís
>>>>>>>>>
>>>>>>>>> [0] https://github.com/ceph/ceph/pull/42720
>>>>>>>>>
>>>>>>>> I think this would be better integrated into the stats infrastructure.
>>>>>>>>
>>>>>>>> Maybe you could add a new set of "copy" stats to struct
>>>>>>>> ceph_client_metric that tracks the total copy operations done, their
>>>>>>>> size and latency (similar to read and write ops)?
>>>>>>> I think it's a good idea to integrate this into "stats" but I think a
>>>>>>> local debugfs file for some counters is still useful. The "stats"
>>>>>>> module is immature at this time and I'd rather not build any qa tests
>>>>>>> (yet) that rely on it.
>>>>>>>
>>>>>>> Can we generalize this patch-set to a file named "op_counters" or
>>>>>>> similar and additionally add other OSD ops performed by the kclient?
>>>>>>>
>>>>>> Tracking this sort of thing is the main purpose of the stats code. I'm
>>>>>> really not keen on adding a whole separate set of files for reporting
>>>>>> this.
>>>>> Maybe I'm confused. Is there some "file" which is already used for
>>>>> this type of debugging information? Or do you mean the code for
>>>>> sending stats to the MDS to support cephfs-top?
>>>>>
>>>>>> What's the specific problem with relying on the data in debugfs
>>>>>> "metrics" file?
>>>>> Maybe no problem? I wasn't aware of a "metrics" file.
>>>>>
>>>> Yes. For instance:
>>>>
>>>> # cat /sys/kernel/debug/ceph/*/metrics
>>>> item                               total
>>>> ------------------------------------------
>>>> opened files  / total inodes       0 / 4
>>>> pinned i_caps / total inodes       5 / 4
>>>> opened inodes / total inodes       0 / 4
>>>>
>>>> item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
>>>> -----------------------------------------------------------------------------------
>>>> read          0           0               0               0               0
>>>> write         5           914013          824797          1092343         103476
>>>> metadata      79          12856           1572            114572          13262
>>>>
>>>> item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
>>>> ----------------------------------------------------------------------------------------
>>>> read          0           0               0               0               0
>>>> write         5           4194304         4194304         4194304         20971520
>>>>
>>>> item          total           miss            hit
>>>> -------------------------------------------------
>>>> d_lease       11              0               29
>>>> caps          5               68              10702
>>>>
>>>>
>>>> I'm proposing that Luis add new lines for "copy" to go along with the
>>>> "read" and "write" ones. The "total" counter should give you a count of
>>>> the number of operations.
>>> Okay that makes more sense!
>>>
>>> Side note: I am a bit horrified by how computer-unfriendly that
>>> table-formatted data is.
>> Any suggestion to improve this ?
>>
>> How about just make the "metric" file writable like a switch ? And as
>> default it will show the data as above and if tools want the
>> computer-friendly format, just write none-zero to it, then show raw data
>> just like:
>>
>> # cat /sys/kernel/debug/ceph/*/metrics
>> opened_files:0
>> pinned_i_caps:5
>> opened_inodes:0
>> total_inodes:4
>>
>> read_latency:0,0,0,0,0
>> write_latency:5,914013,824797,1092343,103476
>> metadata_latency:79,12856,1572,114572,13262
>>
>> read_size:0,0,0,0,0
>> write_size:5,4194304,4194304,4194304,20971520
>>
>> d_lease:11,0,29
>> caps:5,68,10702
>>
>>
> I'd rather not multiplex the output of this file based on some input.
> That would also be rather hard to do -- write() and read() are two
> different syscalls, so you'd need to track a bool (or something) across
> them somehow.
>
> Currently, I doubt there are many scripts in the field that scrape this
> info and debugfs is specifically excluded from ABI concerns. If we want
> to make it more machine-readable (which sounds like a good thing), then
> I suggest we just change the output to something like what you have
> above and not worry about preserving the "legacy" output.

Sound good to me.



^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [RFC PATCH] ceph: add remote object copy counter to fs client
  2021-10-26 15:31                 ` Luís Henriques
@ 2021-10-27  6:46                   ` Xiubo Li
  2021-10-27 10:01                     ` [PATCH] ceph: split 'metric' debugfs file into several files Luís Henriques
  0 siblings, 1 reply; 20+ messages in thread
From: Xiubo Li @ 2021-10-27  6:46 UTC (permalink / raw)
  To: Luís Henriques, Jeff Layton
  Cc: Patrick Donnelly, Ilya Dryomov, Ceph Development, linux-kernel


On 10/26/21 11:31 PM, Luís Henriques wrote:
> On Tue, Oct 26, 2021 at 07:40:51AM -0400, Jeff Layton wrote:
>> On Tue, 2021-10-26 at 11:05 +0800, Xiubo Li wrote:
>>> On 10/22/21 1:30 AM, Patrick Donnelly wrote:
>>>> On Thu, Oct 21, 2021 at 12:35 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>>> On Thu, 2021-10-21 at 12:18 -0400, Patrick Donnelly wrote:
>>>>>> On Thu, Oct 21, 2021 at 11:44 AM Jeff Layton <jlayton@kernel.org> wrote:
>>>>>>> On Thu, 2021-10-21 at 09:52 -0400, Patrick Donnelly wrote:
>>>>>>>> On Wed, Oct 20, 2021 at 12:27 PM Jeff Layton <jlayton@kernel.org> wrote:
>>>>>>>>> On Wed, 2021-10-20 at 15:37 +0100, Luís Henriques wrote:
>>>>>>>>>> This counter will keep track of the number of remote object copies done on
>>>>>>>>>> copy_file_range syscalls.  This counter will be filesystem per-client, and
>>>>>>>>>> can be accessed from the client debugfs directory.
>>>>>>>>>>
>>>>>>>>>> Cc: Patrick Donnelly <pdonnell@redhat.com>
>>>>>>>>>> Signed-off-by: Luís Henriques <lhenriques@suse.de>
>>>>>>>>>> ---
>>>>>>>>>> This is an RFC to reply to Patrick's request in [0].  Note that I'm not
>>>>>>>>>> 100% sure about the usefulness of this patch, or if this is the best way
>>>>>>>>>> to provide the functionality Patrick requested.  Anyway, this is just to
>>>>>>>>>> get some feedback, hence the RFC.
>>>>>>>>>>
>>>>>>>>>> Cheers,
>>>>>>>>>> --
>>>>>>>>>> Luís
>>>>>>>>>>
>>>>>>>>>> [0] https://github.com/ceph/ceph/pull/42720
>>>>>>>>>>
>>>>>>>>> I think this would be better integrated into the stats infrastructure.
>>>>>>>>>
>>>>>>>>> Maybe you could add a new set of "copy" stats to struct
>>>>>>>>> ceph_client_metric that tracks the total copy operations done, their
>>>>>>>>> size and latency (similar to read and write ops)?
>>>>>>>> I think it's a good idea to integrate this into "stats" but I think a
>>>>>>>> local debugfs file for some counters is still useful. The "stats"
>>>>>>>> module is immature at this time and I'd rather not build any qa tests
>>>>>>>> (yet) that rely on it.
>>>>>>>>
>>>>>>>> Can we generalize this patch-set to a file named "op_counters" or
>>>>>>>> similar and additionally add other OSD ops performed by the kclient?
>>>>>>>>
>>>>>>> Tracking this sort of thing is the main purpose of the stats code. I'm
>>>>>>> really not keen on adding a whole separate set of files for reporting
>>>>>>> this.
>>>>>> Maybe I'm confused. Is there some "file" which is already used for
>>>>>> this type of debugging information? Or do you mean the code for
>>>>>> sending stats to the MDS to support cephfs-top?
>>>>>>
>>>>>>> What's the specific problem with relying on the data in debugfs
>>>>>>> "metrics" file?
>>>>>> Maybe no problem? I wasn't aware of a "metrics" file.
>>>>>>
>>>>> Yes. For instance:
>>>>>
>>>>> # cat /sys/kernel/debug/ceph/*/metrics
>>>>> item                               total
>>>>> ------------------------------------------
>>>>> opened files  / total inodes       0 / 4
>>>>> pinned i_caps / total inodes       5 / 4
>>>>> opened inodes / total inodes       0 / 4
>>>>>
>>>>> item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)
>>>>> -----------------------------------------------------------------------------------
>>>>> read          0           0               0               0               0
>>>>> write         5           914013          824797          1092343         103476
>>>>> metadata      79          12856           1572            114572          13262
>>>>>
>>>>> item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)
>>>>> ----------------------------------------------------------------------------------------
>>>>> read          0           0               0               0               0
>>>>> write         5           4194304         4194304         4194304         20971520
>>>>>
>>>>> item          total           miss            hit
>>>>> -------------------------------------------------
>>>>> d_lease       11              0               29
>>>>> caps          5               68              10702
>>>>>
>>>>>
>>>>> I'm proposing that Luis add new lines for "copy" to go along with the
>>>>> "read" and "write" ones. The "total" counter should give you a count of
>>>>> the number of operations.
>>>> Okay that makes more sense!
>>>>
>>>> Side note: I am a bit horrified by how computer-unfriendly that
>>>> table-formatted data is.
>>> Any suggestion to improve this ?
>>>
>>> How about just make the "metric" file writable like a switch ? And as
>>> default it will show the data as above and if tools want the
>>> computer-friendly format, just write none-zero to it, then show raw data
>>> just like:
>>>
>>> # cat /sys/kernel/debug/ceph/*/metrics
>>> opened_files:0
>>> pinned_i_caps:5
>>> opened_inodes:0
>>> total_inodes:4
>>>
>>> read_latency:0,0,0,0,0
>>> write_latency:5,914013,824797,1092343,103476
>>> metadata_latency:79,12856,1572,114572,13262
>>>
>>> read_size:0,0,0,0,0
>>> write_size:5,4194304,4194304,4194304,20971520
>>>
>>> d_lease:11,0,29
>>> caps:5,68,10702
>>>
>>>
>> I'd rather not multiplex the output of this file based on some input.
>> That would also be rather hard to do -- write() and read() are two
>> different syscalls, so you'd need to track a bool (or something) across
>> them somehow.
>>
>> Currently, I doubt there are many scripts in the field that scrape this
>> info and debugfs is specifically excluded from ABI concerns. If we want
>> to make it more machine-readable (which sounds like a good thing), then
>> I suggest we just change the output to something like what you have
>> above and not worry about preserving the "legacy" output.
> Ok, before submitting any new revision of this patch I should probably
> clean this up.  I can submit a patch to change the format to what Xiubo is
> proposing.  Obviously, that patch will also need to document what all
> those fields actually mean.
>
> Alternatively, the metrics file could be changed into a directory and have
> 4 different files, one per each section:
>
>    metrics/
>     |- files <-- not sure how to name the 1st section
>     |- latency
>     |- size
>     \- caps
>
> Each of these files would then have the header but, since it's a single
> header, parsing it in a script would be pretty easy.  The advantage is
> that this would be self-documented (with filenames and headers).

This sounds good to me.


>
> Cheers,
> --
> Luís
>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH] ceph: split 'metric' debugfs file into several files
  2021-10-27  6:46                   ` Xiubo Li
@ 2021-10-27 10:01                     ` Luís Henriques
  2021-10-27 11:53                       ` Jeff Layton
  2021-10-27 12:00                       ` Xiubo Li
  0 siblings, 2 replies; 20+ messages in thread
From: Luís Henriques @ 2021-10-27 10:01 UTC (permalink / raw)
  To: Jeff Layton, Ilya Dryomov, Xiubo Li, Patrick Donnelly
  Cc: ceph-devel, linux-kernel, Luís Henriques

Currently, all the metrics are grouped together in a single file, making
it difficult to process this file from scripts.  Furthermore, as new
metrics are added, processing this file will become even more challenging.

This patch turns the 'metric' file into a directory that will contain
several files, one for each metric.

Signed-off-by: Luís Henriques <lhenriques@suse.de>
---
Here's a patch that does just what I proposed.  It simply turns the metric
file into a directory.  Not sure this is the best option, but here it is
for discussion.

Cheers,
--
Luís

 fs/ceph/debugfs.c | 81 ++++++++++++++++++++++++++++++++---------------
 fs/ceph/super.h   |  2 +-
 2 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 38b78b45811f..55426514491b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -146,26 +146,30 @@ static int mdsc_show(struct seq_file *s, void *p)
 		   name, total, avg, _min, max, sum);			\
 }
 
-static int metric_show(struct seq_file *s, void *p)
+static int metrics_file_show(struct seq_file *s, void *p)
 {
 	struct ceph_fs_client *fsc = s->private;
-	struct ceph_mds_client *mdsc = fsc->mdsc;
-	struct ceph_client_metric *m = &mdsc->metric;
-	int nr_caps = 0;
-	s64 total, sum, avg, min, max, sq;
-	u64 sum_sz, avg_sz, min_sz, max_sz;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
 
-	sum = percpu_counter_sum(&m->total_inodes);
 	seq_printf(s, "item                               total\n");
 	seq_printf(s, "------------------------------------------\n");
-	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
-		   atomic64_read(&m->opened_files), sum);
-	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
-		   atomic64_read(&m->total_caps), sum);
-	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
-		   percpu_counter_sum(&m->opened_inodes), sum);
-
-	seq_printf(s, "\n");
+	seq_printf(s, "%-35s%lld\n", "total inodes",
+		   percpu_counter_sum(&m->total_inodes));
+	seq_printf(s, "%-35s%lld\n", "opened files",
+		   atomic64_read(&m->opened_files));
+	seq_printf(s, "%-35s%lld\n", "pinned i_caps",
+		   atomic64_read(&m->total_caps));
+	seq_printf(s, "%-35s%lld\n", "opened inodes",
+		   percpu_counter_sum(&m->opened_inodes));
+	return 0;
+}
+
+static int metrics_latency_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
+	s64 total, sum, avg, min, max, sq;
+
 	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
 	seq_printf(s, "-----------------------------------------------------------------------------------\n");
 
@@ -199,7 +203,16 @@ static int metric_show(struct seq_file *s, void *p)
 	spin_unlock(&m->metadata_metric_lock);
 	CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq);
 
-	seq_printf(s, "\n");
+	return 0;
+}
+
+static int metrics_size_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
+	s64 total;
+	u64 sum_sz, avg_sz, min_sz, max_sz;
+
 	seq_printf(s, "item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)\n");
 	seq_printf(s, "----------------------------------------------------------------------------------------\n");
 
@@ -221,7 +234,15 @@ static int metric_show(struct seq_file *s, void *p)
 	spin_unlock(&m->write_metric_lock);
 	CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz);
 
-	seq_printf(s, "\n");
+	return 0;
+}
+
+static int metrics_caps_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_client_metric *m = &fsc->mdsc->metric;
+	int nr_caps = 0;
+
 	seq_printf(s, "item          total           miss            hit\n");
 	seq_printf(s, "-------------------------------------------------\n");
 
@@ -350,8 +371,11 @@ DEFINE_SHOW_ATTRIBUTE(mdsmap);
 DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
-DEFINE_SHOW_ATTRIBUTE(metric);
 DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(metrics_file);
+DEFINE_SHOW_ATTRIBUTE(metrics_latency);
+DEFINE_SHOW_ATTRIBUTE(metrics_size);
+DEFINE_SHOW_ATTRIBUTE(metrics_caps);
 
 
 /*
@@ -385,8 +409,9 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
 	debugfs_remove(fsc->debugfs_mdsmap);
 	debugfs_remove(fsc->debugfs_mds_sessions);
 	debugfs_remove(fsc->debugfs_caps);
-	debugfs_remove(fsc->debugfs_metric);
+	debugfs_remove(fsc->debugfs_status);
 	debugfs_remove(fsc->debugfs_mdsc);
+	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
@@ -426,12 +451,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc,
 						&mdsc_fops);
 
-	fsc->debugfs_metric = debugfs_create_file("metrics",
-						  0400,
-						  fsc->client->debugfs_dir,
-						  fsc,
-						  &metric_fops);
-
 	fsc->debugfs_caps = debugfs_create_file("caps",
 						0400,
 						fsc->client->debugfs_dir,
@@ -443,6 +462,18 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						  fsc->client->debugfs_dir,
 						  fsc,
 						  &status_fops);
+
+	fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
+						      fsc->client->debugfs_dir);
+
+	debugfs_create_file("file", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_file_fops);
+	debugfs_create_file("latency", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_latency_fops);
+	debugfs_create_file("size", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_size_fops);
+	debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
+			    &metrics_caps_fops);
 }
 
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 14f951cd5b61..795b077143d6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -128,9 +128,9 @@ struct ceph_fs_client {
 	struct dentry *debugfs_congestion_kb;
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
-	struct dentry *debugfs_metric;
 	struct dentry *debugfs_status;
 	struct dentry *debugfs_mds_sessions;
+	struct dentry *debugfs_metrics_dir;
 #endif
 
 #ifdef CONFIG_CEPH_FSCACHE

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH] ceph: split 'metric' debugfs file into several files
  2021-10-27 10:01                     ` [PATCH] ceph: split 'metric' debugfs file into several files Luís Henriques
@ 2021-10-27 11:53                       ` Jeff Layton
  2021-10-27 12:00                       ` Xiubo Li
  1 sibling, 0 replies; 20+ messages in thread
From: Jeff Layton @ 2021-10-27 11:53 UTC (permalink / raw)
  To: Luís Henriques, Ilya Dryomov, Xiubo Li, Patrick Donnelly
  Cc: ceph-devel, linux-kernel

On Wed, 2021-10-27 at 11:01 +0100, Luís Henriques wrote:
> Currently, all the metrics are grouped together in a single file, making
> it difficult to process this file from scripts.  Furthermore, as new
> metrics are added, processing this file will become even more challenging.
> 
> This patch turns the 'metric' file into a directory that will contain
> several files, one for each metric.
> 
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
> Here's a patch that does just what I proposed.  It simply turns the metric
> file into a directory.  Not sure this is the best option, but here it is
> for discussion.
> 
> Cheers,
> --
> Luís
> 

Thanks Luis,

I'm fine with this change, and it is a lot more readable. I'll plan to
merge this into testing later today, unless anyone has objections.

>  fs/ceph/debugfs.c | 81 ++++++++++++++++++++++++++++++++---------------
>  fs/ceph/super.h   |  2 +-
>  2 files changed, 57 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 38b78b45811f..55426514491b 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -146,26 +146,30 @@ static int mdsc_show(struct seq_file *s, void *p)
>  		   name, total, avg, _min, max, sum);			\
>  }
>  
> -static int metric_show(struct seq_file *s, void *p)
> +static int metrics_file_show(struct seq_file *s, void *p)
>  {
>  	struct ceph_fs_client *fsc = s->private;
> -	struct ceph_mds_client *mdsc = fsc->mdsc;
> -	struct ceph_client_metric *m = &mdsc->metric;
> -	int nr_caps = 0;
> -	s64 total, sum, avg, min, max, sq;
> -	u64 sum_sz, avg_sz, min_sz, max_sz;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
>  
> -	sum = percpu_counter_sum(&m->total_inodes);
>  	seq_printf(s, "item                               total\n");
>  	seq_printf(s, "------------------------------------------\n");
> -	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
> -		   atomic64_read(&m->opened_files), sum);
> -	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
> -		   atomic64_read(&m->total_caps), sum);
> -	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
> -		   percpu_counter_sum(&m->opened_inodes), sum);
> -
> -	seq_printf(s, "\n");
> +	seq_printf(s, "%-35s%lld\n", "total inodes",
> +		   percpu_counter_sum(&m->total_inodes));
> +	seq_printf(s, "%-35s%lld\n", "opened files",
> +		   atomic64_read(&m->opened_files));
> +	seq_printf(s, "%-35s%lld\n", "pinned i_caps",
> +		   atomic64_read(&m->total_caps));
> +	seq_printf(s, "%-35s%lld\n", "opened inodes",
> +		   percpu_counter_sum(&m->opened_inodes));
> +	return 0;
> +}
> +
> +static int metrics_latency_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	s64 total, sum, avg, min, max, sq;
> +
>  	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
>  	seq_printf(s, "-----------------------------------------------------------------------------------\n");
>  
> @@ -199,7 +203,16 @@ static int metric_show(struct seq_file *s, void *p)
>  	spin_unlock(&m->metadata_metric_lock);
>  	CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq);
>  
> -	seq_printf(s, "\n");
> +	return 0;
> +}
> +
> +static int metrics_size_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	s64 total;
> +	u64 sum_sz, avg_sz, min_sz, max_sz;
> +
>  	seq_printf(s, "item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)\n");
>  	seq_printf(s, "----------------------------------------------------------------------------------------\n");
>  
> @@ -221,7 +234,15 @@ static int metric_show(struct seq_file *s, void *p)
>  	spin_unlock(&m->write_metric_lock);
>  	CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz);
>  
> -	seq_printf(s, "\n");
> +	return 0;
> +}
> +
> +static int metrics_caps_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	int nr_caps = 0;
> +
>  	seq_printf(s, "item          total           miss            hit\n");
>  	seq_printf(s, "-------------------------------------------------\n");
>  
> @@ -350,8 +371,11 @@ DEFINE_SHOW_ATTRIBUTE(mdsmap);
>  DEFINE_SHOW_ATTRIBUTE(mdsc);
>  DEFINE_SHOW_ATTRIBUTE(caps);
>  DEFINE_SHOW_ATTRIBUTE(mds_sessions);
> -DEFINE_SHOW_ATTRIBUTE(metric);
>  DEFINE_SHOW_ATTRIBUTE(status);
> +DEFINE_SHOW_ATTRIBUTE(metrics_file);
> +DEFINE_SHOW_ATTRIBUTE(metrics_latency);
> +DEFINE_SHOW_ATTRIBUTE(metrics_size);
> +DEFINE_SHOW_ATTRIBUTE(metrics_caps);
>  
>  
>  /*
> @@ -385,8 +409,9 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>  	debugfs_remove(fsc->debugfs_mdsmap);
>  	debugfs_remove(fsc->debugfs_mds_sessions);
>  	debugfs_remove(fsc->debugfs_caps);
> -	debugfs_remove(fsc->debugfs_metric);
> +	debugfs_remove(fsc->debugfs_status);
>  	debugfs_remove(fsc->debugfs_mdsc);
> +	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
>  }
>  
>  void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> @@ -426,12 +451,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>  						fsc,
>  						&mdsc_fops);
>  
> -	fsc->debugfs_metric = debugfs_create_file("metrics",
> -						  0400,
> -						  fsc->client->debugfs_dir,
> -						  fsc,
> -						  &metric_fops);
> -
>  	fsc->debugfs_caps = debugfs_create_file("caps",
>  						0400,
>  						fsc->client->debugfs_dir,
> @@ -443,6 +462,18 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>  						  fsc->client->debugfs_dir,
>  						  fsc,
>  						  &status_fops);
> +
> +	fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
> +						      fsc->client->debugfs_dir);
> +
> +	debugfs_create_file("file", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_file_fops);
> +	debugfs_create_file("latency", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_latency_fops);
> +	debugfs_create_file("size", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_size_fops);
> +	debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_caps_fops);
>  }
>  
>  
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 14f951cd5b61..795b077143d6 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -128,9 +128,9 @@ struct ceph_fs_client {
>  	struct dentry *debugfs_congestion_kb;
>  	struct dentry *debugfs_bdi;
>  	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> -	struct dentry *debugfs_metric;
>  	struct dentry *debugfs_status;
>  	struct dentry *debugfs_mds_sessions;
> +	struct dentry *debugfs_metrics_dir;
>  #endif
>  
>  #ifdef CONFIG_CEPH_FSCACHE

-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] ceph: split 'metric' debugfs file into several files
  2021-10-27 10:01                     ` [PATCH] ceph: split 'metric' debugfs file into several files Luís Henriques
  2021-10-27 11:53                       ` Jeff Layton
@ 2021-10-27 12:00                       ` Xiubo Li
  1 sibling, 0 replies; 20+ messages in thread
From: Xiubo Li @ 2021-10-27 12:00 UTC (permalink / raw)
  To: Luís Henriques, Jeff Layton, Ilya Dryomov, Patrick Donnelly
  Cc: ceph-devel, linux-kernel


On 10/27/21 6:01 PM, Luís Henriques wrote:
> Currently, all the metrics are grouped together in a single file, making
> it difficult to process this file from scripts.  Furthermore, as new
> metrics are added, processing this file will become even more challenging.
>
> This patch turns the 'metric' file into a directory that will contain
> several files, one for each metric.
>
> Signed-off-by: Luís Henriques <lhenriques@suse.de>
> ---
> Here's a patch that does just what I proposed.  It simply turns the metric
> file into a directory.  Not sure this is the best option, but here it is
> for discussion.
>
> Cheers,
> --
> Luís
>
>   fs/ceph/debugfs.c | 81 ++++++++++++++++++++++++++++++++---------------
>   fs/ceph/super.h   |  2 +-
>   2 files changed, 57 insertions(+), 26 deletions(-)
>
> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
> index 38b78b45811f..55426514491b 100644
> --- a/fs/ceph/debugfs.c
> +++ b/fs/ceph/debugfs.c
> @@ -146,26 +146,30 @@ static int mdsc_show(struct seq_file *s, void *p)
>   		   name, total, avg, _min, max, sum);			\
>   }
>   
> -static int metric_show(struct seq_file *s, void *p)
> +static int metrics_file_show(struct seq_file *s, void *p)
>   {
>   	struct ceph_fs_client *fsc = s->private;
> -	struct ceph_mds_client *mdsc = fsc->mdsc;
> -	struct ceph_client_metric *m = &mdsc->metric;
> -	int nr_caps = 0;
> -	s64 total, sum, avg, min, max, sq;
> -	u64 sum_sz, avg_sz, min_sz, max_sz;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
>   
> -	sum = percpu_counter_sum(&m->total_inodes);
>   	seq_printf(s, "item                               total\n");
>   	seq_printf(s, "------------------------------------------\n");
> -	seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
> -		   atomic64_read(&m->opened_files), sum);
> -	seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
> -		   atomic64_read(&m->total_caps), sum);
> -	seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
> -		   percpu_counter_sum(&m->opened_inodes), sum);
> -
> -	seq_printf(s, "\n");
> +	seq_printf(s, "%-35s%lld\n", "total inodes",
> +		   percpu_counter_sum(&m->total_inodes));
> +	seq_printf(s, "%-35s%lld\n", "opened files",
> +		   atomic64_read(&m->opened_files));
> +	seq_printf(s, "%-35s%lld\n", "pinned i_caps",
> +		   atomic64_read(&m->total_caps));
> +	seq_printf(s, "%-35s%lld\n", "opened inodes",
> +		   percpu_counter_sum(&m->opened_inodes));
> +	return 0;
> +}
> +
> +static int metrics_latency_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	s64 total, sum, avg, min, max, sq;
> +
>   	seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
>   	seq_printf(s, "-----------------------------------------------------------------------------------\n");
>   
> @@ -199,7 +203,16 @@ static int metric_show(struct seq_file *s, void *p)
>   	spin_unlock(&m->metadata_metric_lock);
>   	CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq);
>   
> -	seq_printf(s, "\n");
> +	return 0;
> +}
> +
> +static int metrics_size_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	s64 total;
> +	u64 sum_sz, avg_sz, min_sz, max_sz;
> +
>   	seq_printf(s, "item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)\n");
>   	seq_printf(s, "----------------------------------------------------------------------------------------\n");
>   
> @@ -221,7 +234,15 @@ static int metric_show(struct seq_file *s, void *p)
>   	spin_unlock(&m->write_metric_lock);
>   	CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz);
>   
> -	seq_printf(s, "\n");
> +	return 0;
> +}
> +
> +static int metrics_caps_show(struct seq_file *s, void *p)
> +{
> +	struct ceph_fs_client *fsc = s->private;
> +	struct ceph_client_metric *m = &fsc->mdsc->metric;
> +	int nr_caps = 0;
> +
>   	seq_printf(s, "item          total           miss            hit\n");
>   	seq_printf(s, "-------------------------------------------------\n");
>   
> @@ -350,8 +371,11 @@ DEFINE_SHOW_ATTRIBUTE(mdsmap);
>   DEFINE_SHOW_ATTRIBUTE(mdsc);
>   DEFINE_SHOW_ATTRIBUTE(caps);
>   DEFINE_SHOW_ATTRIBUTE(mds_sessions);
> -DEFINE_SHOW_ATTRIBUTE(metric);
>   DEFINE_SHOW_ATTRIBUTE(status);
> +DEFINE_SHOW_ATTRIBUTE(metrics_file);
> +DEFINE_SHOW_ATTRIBUTE(metrics_latency);
> +DEFINE_SHOW_ATTRIBUTE(metrics_size);
> +DEFINE_SHOW_ATTRIBUTE(metrics_caps);
>   
>   
>   /*
> @@ -385,8 +409,9 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
>   	debugfs_remove(fsc->debugfs_mdsmap);
>   	debugfs_remove(fsc->debugfs_mds_sessions);
>   	debugfs_remove(fsc->debugfs_caps);
> -	debugfs_remove(fsc->debugfs_metric);
> +	debugfs_remove(fsc->debugfs_status);
>   	debugfs_remove(fsc->debugfs_mdsc);
> +	debugfs_remove_recursive(fsc->debugfs_metrics_dir);
>   }
>   
>   void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
> @@ -426,12 +451,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>   						fsc,
>   						&mdsc_fops);
>   
> -	fsc->debugfs_metric = debugfs_create_file("metrics",
> -						  0400,
> -						  fsc->client->debugfs_dir,
> -						  fsc,
> -						  &metric_fops);
> -
>   	fsc->debugfs_caps = debugfs_create_file("caps",
>   						0400,
>   						fsc->client->debugfs_dir,
> @@ -443,6 +462,18 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
>   						  fsc->client->debugfs_dir,
>   						  fsc,
>   						  &status_fops);
> +
> +	fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
> +						      fsc->client->debugfs_dir);
> +
> +	debugfs_create_file("file", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_file_fops);
> +	debugfs_create_file("latency", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_latency_fops);
> +	debugfs_create_file("size", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_size_fops);
> +	debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
> +			    &metrics_caps_fops);
>   }
>   
>   
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 14f951cd5b61..795b077143d6 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -128,9 +128,9 @@ struct ceph_fs_client {
>   	struct dentry *debugfs_congestion_kb;
>   	struct dentry *debugfs_bdi;
>   	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
> -	struct dentry *debugfs_metric;
>   	struct dentry *debugfs_status;
>   	struct dentry *debugfs_mds_sessions;
> +	struct dentry *debugfs_metrics_dir;
>   #endif
>   
>   #ifdef CONFIG_CEPH_FSCACHE
>
LGTM.

Reviewed-by: Xiubo Li <xiubli@redhat.com>



^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-10-27 12:00 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-20 14:37 [RFC PATCH] ceph: add remote object copy counter to fs client Luís Henriques
2021-10-20 16:27 ` Jeff Layton
2021-10-20 16:58   ` Luís Henriques
2021-10-21 13:52   ` Patrick Donnelly
2021-10-21 15:43     ` Jeff Layton
2021-10-21 16:18       ` Patrick Donnelly
2021-10-21 16:35         ` Jeff Layton
2021-10-21 17:30           ` Patrick Donnelly
2021-10-21 17:33             ` Jeff Layton
2021-10-26  3:05             ` Xiubo Li
2021-10-26 11:40               ` Jeff Layton
2021-10-26 15:31                 ` Luís Henriques
2021-10-27  6:46                   ` Xiubo Li
2021-10-27 10:01                     ` [PATCH] ceph: split 'metric' debugfs file into several files Luís Henriques
2021-10-27 11:53                       ` Jeff Layton
2021-10-27 12:00                       ` Xiubo Li
2021-10-27  4:52                 ` [RFC PATCH] ceph: add remote object copy counter to fs client Xiubo Li
2021-10-25 10:12           ` Luís Henriques
2021-10-25 10:20             ` Jeff Layton
2021-10-25 10:52               ` Luís Henriques

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.