All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ceph: show more detail logs during mount
@ 2020-02-25  3:30 xiubli
  2020-02-25 10:39 ` Ilya Dryomov
  0 siblings, 1 reply; 6+ messages in thread
From: xiubli @ 2020-02-25  3:30 UTC (permalink / raw)
  To: jlayton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Print the logs in error level to give a helpful hint to make it
more user-friendly to debug.

URL: https://tracker.ceph.com/issues/44215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/super.c       | 8 ++++++--
 net/ceph/mon_client.c | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index c7f150686a53..e33c2f86647b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 				     fsc->mount_options->server_path + 1 : "";
 
 		err = __ceph_open_session(fsc->client, started);
-		if (err < 0)
+		if (err < 0) {
+			pr_err("mount joining the ceph cluster fail %d\n", err);
 			goto out;
+		}
 
 		/* setup fscache */
 		if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
@@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 		root = open_root_dentry(fsc, path, started);
 		if (IS_ERR(root)) {
 			err = PTR_ERR(root);
+			pr_err("mount opening the root directory fail %d\n",
+			       err);
 			goto out;
 		}
 		fsc->sb->s_root = dget(root);
@@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
 
 out_splat:
 	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
-		pr_info("No mds server is up or the cluster is laggy\n");
+		pr_err("No mds server is up or the cluster is laggy\n");
 		err = -EHOSTUNREACH;
 	}
 
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d9e4e4ea600..6f1372f5f2a7 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 
 	if (ret < 0) {
 		monc->client->auth_err = ret;
+		pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
+			ceph_pr_addr(&monc->con.peer_addr));
 	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
 		dout("authenticated, starting session\n");
 
-- 
2.21.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] ceph: show more detail logs during mount
  2020-02-25  3:30 [PATCH] ceph: show more detail logs during mount xiubli
@ 2020-02-25 10:39 ` Ilya Dryomov
  2020-02-26  0:58   ` Xiubo Li
  0 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2020-02-25 10:39 UTC (permalink / raw)
  To: Xiubo Li
  Cc: Jeff Layton, Sage Weil, Yan, Zheng, Patrick Donnelly, Ceph Development

On Tue, Feb 25, 2020 at 4:30 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> Print the logs in error level to give a helpful hint to make it
> more user-friendly to debug.
>
> URL: https://tracker.ceph.com/issues/44215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/super.c       | 8 ++++++--
>  net/ceph/mon_client.c | 2 ++
>  2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index c7f150686a53..e33c2f86647b 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>                                      fsc->mount_options->server_path + 1 : "";
>
>                 err = __ceph_open_session(fsc->client, started);
> -               if (err < 0)
> +               if (err < 0) {
> +                       pr_err("mount joining the ceph cluster fail %d\n", err);
>                         goto out;
> +               }
>
>                 /* setup fscache */
>                 if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> @@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>                 root = open_root_dentry(fsc, path, started);
>                 if (IS_ERR(root)) {
>                         err = PTR_ERR(root);
> +                       pr_err("mount opening the root directory fail %d\n",
> +                              err);

Hi Xiubo,

Given that these are new user-level filesystem log messages, they
should probably go into fs_context log.

>                         goto out;
>                 }
>                 fsc->sb->s_root = dget(root);
> @@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
>
>  out_splat:
>         if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
> -               pr_info("No mds server is up or the cluster is laggy\n");
> +               pr_err("No mds server is up or the cluster is laggy\n");
>                 err = -EHOSTUNREACH;
>         }

If you are changing this one, it should be directed to fs_context log
as well.

>
> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d9e4e4ea600..6f1372f5f2a7 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>
>         if (ret < 0) {
>                 monc->client->auth_err = ret;
> +               pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
> +                       ceph_pr_addr(&monc->con.peer_addr));

I don't think this is needed.  Authentication errors are already logged
in ceph_handle_auth_reply().

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ceph: show more detail logs during mount
  2020-02-25 10:39 ` Ilya Dryomov
@ 2020-02-26  0:58   ` Xiubo Li
  0 siblings, 0 replies; 6+ messages in thread
From: Xiubo Li @ 2020-02-26  0:58 UTC (permalink / raw)
  To: Ilya Dryomov
  Cc: Jeff Layton, Sage Weil, Yan, Zheng, Patrick Donnelly, Ceph Development

On 2020/2/25 18:39, Ilya Dryomov wrote:
> On Tue, Feb 25, 2020 at 4:30 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Print the logs in error level to give a helpful hint to make it
>> more user-friendly to debug.
>>
>> URL: https://tracker.ceph.com/issues/44215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/super.c       | 8 ++++++--
>>   net/ceph/mon_client.c | 2 ++
>>   2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index c7f150686a53..e33c2f86647b 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>>                                       fsc->mount_options->server_path + 1 : "";
>>
>>                  err = __ceph_open_session(fsc->client, started);
>> -               if (err < 0)
>> +               if (err < 0) {
>> +                       pr_err("mount joining the ceph cluster fail %d\n", err);
>>                          goto out;
>> +               }
>>
>>                  /* setup fscache */
>>                  if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> @@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>>                  root = open_root_dentry(fsc, path, started);
>>                  if (IS_ERR(root)) {
>>                          err = PTR_ERR(root);
>> +                       pr_err("mount opening the root directory fail %d\n",
>> +                              err);
> Hi Xiubo,
>
> Given that these are new user-level filesystem log messages, they
> should probably go into fs_context log.

Yeah, will fix it.


>
>>                          goto out;
>>                  }
>>                  fsc->sb->s_root = dget(root);
>> @@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
>>
>>   out_splat:
>>          if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
>> -               pr_info("No mds server is up or the cluster is laggy\n");
>> +               pr_err("No mds server is up or the cluster is laggy\n");
>>                  err = -EHOSTUNREACH;
>>          }
> If you are changing this one, it should be directed to fs_context log
> as well.
Will do it.
>> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
>> index 9d9e4e4ea600..6f1372f5f2a7 100644
>> --- a/net/ceph/mon_client.c
>> +++ b/net/ceph/mon_client.c
>> @@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>>
>>          if (ret < 0) {
>>                  monc->client->auth_err = ret;
>> +               pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
>> +                       ceph_pr_addr(&monc->con.peer_addr));
> I don't think this is needed.  Authentication errors are already logged
> in ceph_handle_auth_reply().

Yeah, it is, here just want to give more messages, such as the monX. But 
it is not necessary. So I will delete it.

Thanks Ilya.

BRs

> Thanks,
>
>                  Ilya
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ceph: show more detail logs during mount
  2020-02-23 19:18 ` Jeff Layton
@ 2020-02-24  3:10   ` Xiubo Li
  0 siblings, 0 replies; 6+ messages in thread
From: Xiubo Li @ 2020-02-24  3:10 UTC (permalink / raw)
  To: Jeff Layton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel

On 2020/2/24 3:18, Jeff Layton wrote:
> On Sun, 2020-02-23 at 07:18 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Return -ETIMEDOUT when the requests are timed out instead of -EIO
>> to make it cleaner for the userland. And just print the logs in
>> error level to give a helpful hint.
>>
>> URL: https://tracker.ceph.com/issues/44215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c   |  4 ++--
>>   fs/ceph/super.c        | 28 ++++++++++++++++++++--------
>>   net/ceph/ceph_common.c |  7 +++++--
>>   net/ceph/mon_client.c  |  1 +
>>   4 files changed, 28 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 82f63ef2694c..0dfea8cdb50a 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
>>   	if (req->r_timeout &&
>>   	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
>>   		dout("do_request timed out\n");
>> -		err = -EIO;
>> +		err = -ETIMEDOUT;
>>   		goto finish;
>>   	}
>>   	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>> @@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
>>   		if (timeleft > 0)
>>   			err = 0;
>>   		else if (!timeleft)
>> -			err = -EIO;  /* timed out */
>> +			err = -ETIMEDOUT;  /* timed out */
>>   		else
>>   			err = timeleft;  /* killed */
>>   	}
> I'm fine with the two hunks above.
>
> Note that AFAICT, r_timeout is only set at mount time, so we should
> never see this in other codepaths. This probably ought to be done in a
> separate patch from the rest.
Sure, will split it.
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 31acb4fe1f2c..6778f2a7d6d4 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>>   {
>>   	struct ceph_mds_client *mdsc = fsc->mdsc;
>>   	struct ceph_mds_request *req = NULL;
>> +	struct ceph_mds_session *session;
>>   	int err;
>>   	struct dentry *root;
>> +	char buf[32] = {0};
>>   
>>   	/* open dir */
>> -	dout("open_root_inode opening '%s'\n", path);
>> +	dout("mount open_root_inode opening '%s'\n", path);
>>   	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>>   	if (IS_ERR(req))
>>   		return ERR_CAST(req);
>> @@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>>   	if (err == 0) {
>>   		struct inode *inode = req->r_target_inode;
>>   		req->r_target_inode = NULL;
>> -		dout("open_root_inode success\n");
>>   		root = d_make_root(inode);
>>   		if (!root) {
>>   			root = ERR_PTR(-ENOMEM);
>>   			goto out;
>>   		}
>> -		dout("open_root_inode success, root dentry is %p\n", root);
>> +		dout(" root dentry is %p\n", root);
>>   	} else {
>>   		root = ERR_PTR(err);
>>   	}
>>   out:
>> +	session = ceph_get_mds_session(req->r_session);
>> +	if (session)
>> +		snprintf(buf, 32, " on mds%d", session->s_mds);
>> +
> Where is this new session reference put? I'm not sure I understand this
> change.

Ah okay, forgot to add the put.

Usually the mount request will do in one specified MDS, here will show 
in which MDS it was trying to. Will it make sense ?

>>   	ceph_mdsc_put_request(req);
>> +	if (!IS_ERR(root))
>> +		dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
>> +	else
>> +		pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
>> +		       buf[0] ? buf : "");
>>   	return root;
>>   }
>>   
> This goes to the generic kernel log. This may warrant an error message
> there, but it needs to be something a typical user would understand.

Will fix it.

>> @@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>>   
>>   out:
>>   	mutex_unlock(&fsc->client->mount_mutex);
>> +	pr_err("mount fail\n");
> This is not something we'd want to add here.
Okay, will delete it.
>>   	return ERR_PTR(err);
>>   }
>>   
>> @@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
>>   		ceph_compare_super;
>>   	int err;
>>   
>> -	dout("ceph_get_tree\n");
>> +	dout("ceph_get_tree start\n");
>>   
>>   	if (!fc->source)
>>   		return invalfc(fc, "No source");
>> @@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
>>   		err = PTR_ERR(res);
>>   		goto out_splat;
>>   	}
>> -	dout("root %p inode %p ino %llx.%llx\n", res,
>> -	     d_inode(res), ceph_vinop(d_inode(res)));
>> +        dout(" root %p inode %p ino %llx.%llx\n",
>> +	     res, d_inode(res), ceph_vinop(d_inode(res)));
>>   	fc->root = fsc->sb->s_root;
>> +	dout("ceph_get_tree success\n");
>>   	return 0;
>>   
>>   out_splat:
>>   	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
>> -		pr_info("No mds server is up or the cluster is laggy\n");
>> +		pr_err("No mds server is up or the cluster is laggy\n");
>> 		err = -EHOSTUNREACH;
> Is pr_err the right infolevel here? It may be, I'm just curious why the
> change after making this pr_info before.

This should be as an error IMO, before we may just want an hint but 
ignore the level stuff.


>>   	}
>>   
>> @@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
>>   out:
>>   	destroy_fs_client(fsc);
>>   out_final:
>> -	dout("ceph_get_tree fail %d\n", err);
>> +	pr_err("ceph_get_tree fail %d\n", err);
> This doesn't seem to be something we want to pr_err.
>
>>   	return err;
>>   }
>>   
>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>> index a0e97f6c1072..5971a815fb8e 100644
>> --- a/net/ceph/ceph_common.c
>> +++ b/net/ceph/ceph_common.c
>> @@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
>>   		return err;
>>   
>>   	while (!have_mon_and_osd_map(client)) {
>> -		if (timeout && time_after_eq(jiffies, started + timeout))
>> +		if (timeout && time_after_eq(jiffies, started + timeout)) {
>> +			pr_err("mount wating for mon/osd maps timed out on mon%d\n",
>> +			       client->monc.cur_mon);
>>   			return -ETIMEDOUT;
>> +		}
>>
> This code is also called from RBD codepaths and those don't necessarily
> involve a "mount". You should consider how to make the new message more
> generic, or maybe handle it at a higher level, so that rbd isn't
> affected.

Okay, actually the RBD is also in the 'mount' codepath here.


>>   		/* wait */
>> -		dout("mount waiting for mon_map\n");
>> +		dout("mount waiting for mon/osd maps\n");
>>   		err = wait_event_interruptible_timeout(client->auth_wq,
>>   			have_mon_and_osd_map(client) || (client->auth_err < 0),
>>   			ceph_timeout_jiffies(timeout));
>
>> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
>> index 9d9e4e4ea600..8f09df9c3aee 100644
>> --- a/net/ceph/mon_client.c
>> +++ b/net/ceph/mon_client.c
>> @@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>>   
>>   	if (ret < 0) {
>>   		monc->client->auth_err = ret;
>> +		pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
>>   	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
>>   		dout("authenticated, starting session\n");
>>   
> Again, these need to be human-readable, and not just debugging messages
> that are hard to disable.

Okay, will fix it.

Thanks.

BRs

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] ceph: show more detail logs during mount
  2020-02-23 12:18 xiubli
@ 2020-02-23 19:18 ` Jeff Layton
  2020-02-24  3:10   ` Xiubo Li
  0 siblings, 1 reply; 6+ messages in thread
From: Jeff Layton @ 2020-02-23 19:18 UTC (permalink / raw)
  To: xiubli, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel

On Sun, 2020-02-23 at 07:18 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Return -ETIMEDOUT when the requests are timed out instead of -EIO
> to make it cleaner for the userland. And just print the logs in
> error level to give a helpful hint.
> 
> URL: https://tracker.ceph.com/issues/44215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c   |  4 ++--
>  fs/ceph/super.c        | 28 ++++++++++++++++++++--------
>  net/ceph/ceph_common.c |  7 +++++--
>  net/ceph/mon_client.c  |  1 +
>  4 files changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 82f63ef2694c..0dfea8cdb50a 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
>  	if (req->r_timeout &&
>  	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
>  		dout("do_request timed out\n");
> -		err = -EIO;
> +		err = -ETIMEDOUT;
>  		goto finish;
>  	}
>  	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
> @@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
>  		if (timeleft > 0)
>  			err = 0;
>  		else if (!timeleft)
> -			err = -EIO;  /* timed out */
> +			err = -ETIMEDOUT;  /* timed out */
>  		else
>  			err = timeleft;  /* killed */
>  	}

I'm fine with the two hunks above.

Note that AFAICT, r_timeout is only set at mount time, so we should
never see this in other codepaths. This probably ought to be done in a
separate patch from the rest.

> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 31acb4fe1f2c..6778f2a7d6d4 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>  {
>  	struct ceph_mds_client *mdsc = fsc->mdsc;
>  	struct ceph_mds_request *req = NULL;
> +	struct ceph_mds_session *session;
>  	int err;
>  	struct dentry *root;
> +	char buf[32] = {0};
>  
>  	/* open dir */
> -	dout("open_root_inode opening '%s'\n", path);
> +	dout("mount open_root_inode opening '%s'\n", path);
>  	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>  	if (IS_ERR(req))
>  		return ERR_CAST(req);
> @@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>  	if (err == 0) {
>  		struct inode *inode = req->r_target_inode;
>  		req->r_target_inode = NULL;
> -		dout("open_root_inode success\n");
>  		root = d_make_root(inode);
>  		if (!root) {
>  			root = ERR_PTR(-ENOMEM);
>  			goto out;
>  		}
> -		dout("open_root_inode success, root dentry is %p\n", root);
> +		dout(" root dentry is %p\n", root);
>  	} else {
>  		root = ERR_PTR(err);
>  	}
>  out:
> +	session = ceph_get_mds_session(req->r_session);
> +	if (session)
> +		snprintf(buf, 32, " on mds%d", session->s_mds);
> +

Where is this new session reference put? I'm not sure I understand this
change.

>  	ceph_mdsc_put_request(req);
> +	if (!IS_ERR(root))
> +		dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
> +	else
> +		pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
> +		       buf[0] ? buf : "");
>  	return root;
>  }
>  

This goes to the generic kernel log. This may warrant an error message
there, but it needs to be something a typical user would understand.

> @@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>  
>  out:
>  	mutex_unlock(&fsc->client->mount_mutex);
> +	pr_err("mount fail\n");

This is not something we'd want to add here.

>  	return ERR_PTR(err);
>  }
>  
> @@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
>  		ceph_compare_super;
>  	int err;
>  
> -	dout("ceph_get_tree\n");
> +	dout("ceph_get_tree start\n");
>  
>  	if (!fc->source)
>  		return invalfc(fc, "No source");
> @@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
>  		err = PTR_ERR(res);
>  		goto out_splat;
>  	}
> -	dout("root %p inode %p ino %llx.%llx\n", res,
> -	     d_inode(res), ceph_vinop(d_inode(res)));
> +        dout(" root %p inode %p ino %llx.%llx\n",
> +	     res, d_inode(res), ceph_vinop(d_inode(res)));
>  	fc->root = fsc->sb->s_root;
> +	dout("ceph_get_tree success\n");
>  	return 0;
>  
>  out_splat:
>  	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
> -		pr_info("No mds server is up or the cluster is laggy\n");
> +		pr_err("No mds server is up or the cluster is laggy\n");
> 		err = -EHOSTUNREACH;

Is pr_err the right infolevel here? It may be, I'm just curious why the
change after making this pr_info before.

>  	}
>  
> @@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
>  out:
>  	destroy_fs_client(fsc);
>  out_final:
> -	dout("ceph_get_tree fail %d\n", err);
> +	pr_err("ceph_get_tree fail %d\n", err);

This doesn't seem to be something we want to pr_err.

>  	return err;
>  }
>  
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index a0e97f6c1072..5971a815fb8e 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
>  		return err;
>  
>  	while (!have_mon_and_osd_map(client)) {
> -		if (timeout && time_after_eq(jiffies, started + timeout))
> +		if (timeout && time_after_eq(jiffies, started + timeout)) {
> +			pr_err("mount wating for mon/osd maps timed out on mon%d\n",
> +			       client->monc.cur_mon);
>  			return -ETIMEDOUT;
> +		}
> 

This code is also called from RBD codepaths and those don't necessarily
involve a "mount". You should consider how to make the new message more
generic, or maybe handle it at a higher level, so that rbd isn't
affected.

>  		/* wait */
> -		dout("mount waiting for mon_map\n");
> +		dout("mount waiting for mon/osd maps\n");
>  		err = wait_event_interruptible_timeout(client->auth_wq,
>  			have_mon_and_osd_map(client) || (client->auth_err < 0),
>  			ceph_timeout_jiffies(timeout));


> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d9e4e4ea600..8f09df9c3aee 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>  
>  	if (ret < 0) {
>  		monc->client->auth_err = ret;
> +		pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
>  	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
>  		dout("authenticated, starting session\n");
>  

Again, these need to be human-readable, and not just debugging messages
that are hard to disable.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] ceph: show more detail logs during mount
@ 2020-02-23 12:18 xiubli
  2020-02-23 19:18 ` Jeff Layton
  0 siblings, 1 reply; 6+ messages in thread
From: xiubli @ 2020-02-23 12:18 UTC (permalink / raw)
  To: jlayton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Return -ETIMEDOUT when the requests are timed out instead of -EIO
to make it cleaner for the userland. And just print the logs in
error level to give a helpful hint.

URL: https://tracker.ceph.com/issues/44215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c   |  4 ++--
 fs/ceph/super.c        | 28 ++++++++++++++++++++--------
 net/ceph/ceph_common.c |  7 +++++--
 net/ceph/mon_client.c  |  1 +
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 82f63ef2694c..0dfea8cdb50a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	if (req->r_timeout &&
 	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
 		dout("do_request timed out\n");
-		err = -EIO;
+		err = -ETIMEDOUT;
 		goto finish;
 	}
 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
@@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
 		if (timeleft > 0)
 			err = 0;
 		else if (!timeleft)
-			err = -EIO;  /* timed out */
+			err = -ETIMEDOUT;  /* timed out */
 		else
 			err = timeleft;  /* killed */
 	}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 31acb4fe1f2c..6778f2a7d6d4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 {
 	struct ceph_mds_client *mdsc = fsc->mdsc;
 	struct ceph_mds_request *req = NULL;
+	struct ceph_mds_session *session;
 	int err;
 	struct dentry *root;
+	char buf[32] = {0};
 
 	/* open dir */
-	dout("open_root_inode opening '%s'\n", path);
+	dout("mount open_root_inode opening '%s'\n", path);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 	if (IS_ERR(req))
 		return ERR_CAST(req);
@@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	if (err == 0) {
 		struct inode *inode = req->r_target_inode;
 		req->r_target_inode = NULL;
-		dout("open_root_inode success\n");
 		root = d_make_root(inode);
 		if (!root) {
 			root = ERR_PTR(-ENOMEM);
 			goto out;
 		}
-		dout("open_root_inode success, root dentry is %p\n", root);
+		dout(" root dentry is %p\n", root);
 	} else {
 		root = ERR_PTR(err);
 	}
 out:
+	session = ceph_get_mds_session(req->r_session);
+	if (session)
+		snprintf(buf, 32, " on mds%d", session->s_mds);
+
 	ceph_mdsc_put_request(req);
+	if (!IS_ERR(root))
+		dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
+	else
+		pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
+		       buf[0] ? buf : "");
 	return root;
 }
 
@@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 
 out:
 	mutex_unlock(&fsc->client->mount_mutex);
+	pr_err("mount fail\n");
 	return ERR_PTR(err);
 }
 
@@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
 		ceph_compare_super;
 	int err;
 
-	dout("ceph_get_tree\n");
+	dout("ceph_get_tree start\n");
 
 	if (!fc->source)
 		return invalfc(fc, "No source");
@@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
 		err = PTR_ERR(res);
 		goto out_splat;
 	}
-	dout("root %p inode %p ino %llx.%llx\n", res,
-	     d_inode(res), ceph_vinop(d_inode(res)));
+        dout(" root %p inode %p ino %llx.%llx\n",
+	     res, d_inode(res), ceph_vinop(d_inode(res)));
 	fc->root = fsc->sb->s_root;
+	dout("ceph_get_tree success\n");
 	return 0;
 
 out_splat:
 	if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
-		pr_info("No mds server is up or the cluster is laggy\n");
+		pr_err("No mds server is up or the cluster is laggy\n");
 		err = -EHOSTUNREACH;
 	}
 
@@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
 out:
 	destroy_fs_client(fsc);
 out_final:
-	dout("ceph_get_tree fail %d\n", err);
+	pr_err("ceph_get_tree fail %d\n", err);
 	return err;
 }
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a0e97f6c1072..5971a815fb8e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
 		return err;
 
 	while (!have_mon_and_osd_map(client)) {
-		if (timeout && time_after_eq(jiffies, started + timeout))
+		if (timeout && time_after_eq(jiffies, started + timeout)) {
+			pr_err("mount wating for mon/osd maps timed out on mon%d\n",
+			       client->monc.cur_mon);
 			return -ETIMEDOUT;
+		}
 
 		/* wait */
-		dout("mount waiting for mon_map\n");
+		dout("mount waiting for mon/osd maps\n");
 		err = wait_event_interruptible_timeout(client->auth_wq,
 			have_mon_and_osd_map(client) || (client->auth_err < 0),
 			ceph_timeout_jiffies(timeout));
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d9e4e4ea600..8f09df9c3aee 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
 
 	if (ret < 0) {
 		monc->client->auth_err = ret;
+		pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
 	} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
 		dout("authenticated, starting session\n");
 
-- 
2.21.0

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-02-26  0:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25  3:30 [PATCH] ceph: show more detail logs during mount xiubli
2020-02-25 10:39 ` Ilya Dryomov
2020-02-26  0:58   ` Xiubo Li
  -- strict thread matches above, loose matches on Subject: below --
2020-02-23 12:18 xiubli
2020-02-23 19:18 ` Jeff Layton
2020-02-24  3:10   ` Xiubo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.