* [PATCH] ceph: show more detail logs during mount
@ 2020-02-25 3:30 xiubli
2020-02-25 10:39 ` Ilya Dryomov
0 siblings, 1 reply; 6+ messages in thread
From: xiubli @ 2020-02-25 3:30 UTC (permalink / raw)
To: jlayton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Print the logs in error level to give a helpful hint to make it
more user-friendly to debug.
URL: https://tracker.ceph.com/issues/44215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/super.c | 8 ++++++--
net/ceph/mon_client.c | 2 ++
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index c7f150686a53..e33c2f86647b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
fsc->mount_options->server_path + 1 : "";
err = __ceph_open_session(fsc->client, started);
- if (err < 0)
+ if (err < 0) {
+ pr_err("mount joining the ceph cluster fail %d\n", err);
goto out;
+ }
/* setup fscache */
if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
@@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
root = open_root_dentry(fsc, path, started);
if (IS_ERR(root)) {
err = PTR_ERR(root);
+ pr_err("mount opening the root directory fail %d\n",
+ err);
goto out;
}
fsc->sb->s_root = dget(root);
@@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
out_splat:
if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
- pr_info("No mds server is up or the cluster is laggy\n");
+ pr_err("No mds server is up or the cluster is laggy\n");
err = -EHOSTUNREACH;
}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d9e4e4ea600..6f1372f5f2a7 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
if (ret < 0) {
monc->client->auth_err = ret;
+ pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
+ ceph_pr_addr(&monc->con.peer_addr));
} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
--
2.21.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] ceph: show more detail logs during mount
2020-02-25 3:30 [PATCH] ceph: show more detail logs during mount xiubli
@ 2020-02-25 10:39 ` Ilya Dryomov
2020-02-26 0:58 ` Xiubo Li
0 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2020-02-25 10:39 UTC (permalink / raw)
To: Xiubo Li
Cc: Jeff Layton, Sage Weil, Yan, Zheng, Patrick Donnelly, Ceph Development
On Tue, Feb 25, 2020 at 4:30 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> Print the logs in error level to give a helpful hint to make it
> more user-friendly to debug.
>
> URL: https://tracker.ceph.com/issues/44215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/super.c | 8 ++++++--
> net/ceph/mon_client.c | 2 ++
> 2 files changed, 8 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index c7f150686a53..e33c2f86647b 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
> fsc->mount_options->server_path + 1 : "";
>
> err = __ceph_open_session(fsc->client, started);
> - if (err < 0)
> + if (err < 0) {
> + pr_err("mount joining the ceph cluster fail %d\n", err);
> goto out;
> + }
>
> /* setup fscache */
> if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
> @@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
> root = open_root_dentry(fsc, path, started);
> if (IS_ERR(root)) {
> err = PTR_ERR(root);
> + pr_err("mount opening the root directory fail %d\n",
> + err);
Hi Xiubo,
Given that these are new user-level filesystem log messages, they
should probably go into fs_context log.
> goto out;
> }
> fsc->sb->s_root = dget(root);
> @@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
>
> out_splat:
> if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
> - pr_info("No mds server is up or the cluster is laggy\n");
> + pr_err("No mds server is up or the cluster is laggy\n");
> err = -EHOSTUNREACH;
> }
If you are changing this one, it should be directed to fs_context log
as well.
>
> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d9e4e4ea600..6f1372f5f2a7 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>
> if (ret < 0) {
> monc->client->auth_err = ret;
> + pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
> + ceph_pr_addr(&monc->con.peer_addr));
I don't think this is needed. Authentication errors are already logged
in ceph_handle_auth_reply().
Thanks,
Ilya
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] ceph: show more detail logs during mount
2020-02-25 10:39 ` Ilya Dryomov
@ 2020-02-26 0:58 ` Xiubo Li
0 siblings, 0 replies; 6+ messages in thread
From: Xiubo Li @ 2020-02-26 0:58 UTC (permalink / raw)
To: Ilya Dryomov
Cc: Jeff Layton, Sage Weil, Yan, Zheng, Patrick Donnelly, Ceph Development
On 2020/2/25 18:39, Ilya Dryomov wrote:
> On Tue, Feb 25, 2020 at 4:30 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Print the logs in error level to give a helpful hint to make it
>> more user-friendly to debug.
>>
>> URL: https://tracker.ceph.com/issues/44215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/super.c | 8 ++++++--
>> net/ceph/mon_client.c | 2 ++
>> 2 files changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index c7f150686a53..e33c2f86647b 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -905,8 +905,10 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>> fsc->mount_options->server_path + 1 : "";
>>
>> err = __ceph_open_session(fsc->client, started);
>> - if (err < 0)
>> + if (err < 0) {
>> + pr_err("mount joining the ceph cluster fail %d\n", err);
>> goto out;
>> + }
>>
>> /* setup fscache */
>> if (fsc->mount_options->flags & CEPH_MOUNT_OPT_FSCACHE) {
>> @@ -922,6 +924,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>> root = open_root_dentry(fsc, path, started);
>> if (IS_ERR(root)) {
>> err = PTR_ERR(root);
>> + pr_err("mount opening the root directory fail %d\n",
>> + err);
> Hi Xiubo,
>
> Given that these are new user-level filesystem log messages, they
> should probably go into fs_context log.
Yeah, will fix it.
>
>> goto out;
>> }
>> fsc->sb->s_root = dget(root);
>> @@ -1079,7 +1083,7 @@ static int ceph_get_tree(struct fs_context *fc)
>>
>> out_splat:
>> if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
>> - pr_info("No mds server is up or the cluster is laggy\n");
>> + pr_err("No mds server is up or the cluster is laggy\n");
>> err = -EHOSTUNREACH;
>> }
> If you are changing this one, it should be directed to fs_context log
> as well.
Will do it.
>> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
>> index 9d9e4e4ea600..6f1372f5f2a7 100644
>> --- a/net/ceph/mon_client.c
>> +++ b/net/ceph/mon_client.c
>> @@ -1179,6 +1179,8 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>>
>> if (ret < 0) {
>> monc->client->auth_err = ret;
>> + pr_err("authenticate fail on mon%d %s\n", monc->cur_mon,
>> + ceph_pr_addr(&monc->con.peer_addr));
> I don't think this is needed. Authentication errors are already logged
> in ceph_handle_auth_reply().
Yeah, it is, here just want to give more messages, such as the monX. But
it is not necessary. So I will delete it.
Thanks Ilya.
BRs
> Thanks,
>
> Ilya
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] ceph: show more detail logs during mount
2020-02-23 19:18 ` Jeff Layton
@ 2020-02-24 3:10 ` Xiubo Li
0 siblings, 0 replies; 6+ messages in thread
From: Xiubo Li @ 2020-02-24 3:10 UTC (permalink / raw)
To: Jeff Layton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel
On 2020/2/24 3:18, Jeff Layton wrote:
> On Sun, 2020-02-23 at 07:18 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Return -ETIMEDOUT when the requests are timed out instead of -EIO
>> to make it cleaner for the userland. And just print the logs in
>> error level to give a helpful hint.
>>
>> URL: https://tracker.ceph.com/issues/44215
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/mds_client.c | 4 ++--
>> fs/ceph/super.c | 28 ++++++++++++++++++++--------
>> net/ceph/ceph_common.c | 7 +++++--
>> net/ceph/mon_client.c | 1 +
>> 4 files changed, 28 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 82f63ef2694c..0dfea8cdb50a 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
>> if (req->r_timeout &&
>> time_after_eq(jiffies, req->r_started + req->r_timeout)) {
>> dout("do_request timed out\n");
>> - err = -EIO;
>> + err = -ETIMEDOUT;
>> goto finish;
>> }
>> if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>> @@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
>> if (timeleft > 0)
>> err = 0;
>> else if (!timeleft)
>> - err = -EIO; /* timed out */
>> + err = -ETIMEDOUT; /* timed out */
>> else
>> err = timeleft; /* killed */
>> }
> I'm fine with the two hunks above.
>
> Note that AFAICT, r_timeout is only set at mount time, so we should
> never see this in other codepaths. This probably ought to be done in a
> separate patch from the rest.
Sure, will split it.
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 31acb4fe1f2c..6778f2a7d6d4 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>> {
>> struct ceph_mds_client *mdsc = fsc->mdsc;
>> struct ceph_mds_request *req = NULL;
>> + struct ceph_mds_session *session;
>> int err;
>> struct dentry *root;
>> + char buf[32] = {0};
>>
>> /* open dir */
>> - dout("open_root_inode opening '%s'\n", path);
>> + dout("mount open_root_inode opening '%s'\n", path);
>> req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>> if (IS_ERR(req))
>> return ERR_CAST(req);
>> @@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>> if (err == 0) {
>> struct inode *inode = req->r_target_inode;
>> req->r_target_inode = NULL;
>> - dout("open_root_inode success\n");
>> root = d_make_root(inode);
>> if (!root) {
>> root = ERR_PTR(-ENOMEM);
>> goto out;
>> }
>> - dout("open_root_inode success, root dentry is %p\n", root);
>> + dout(" root dentry is %p\n", root);
>> } else {
>> root = ERR_PTR(err);
>> }
>> out:
>> + session = ceph_get_mds_session(req->r_session);
>> + if (session)
>> + snprintf(buf, 32, " on mds%d", session->s_mds);
>> +
> Where is this new session reference put? I'm not sure I understand this
> change.
Ah okay, forgot to add the put.
Usually the mount request will do in one specified MDS, here will show
in which MDS it was trying to. Will it make sense ?
>> ceph_mdsc_put_request(req);
>> + if (!IS_ERR(root))
>> + dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
>> + else
>> + pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
>> + buf[0] ? buf : "");
>> return root;
>> }
>>
> This goes to the generic kernel log. This may warrant an error message
> there, but it needs to be something a typical user would understand.
Will fix it.
>> @@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>>
>> out:
>> mutex_unlock(&fsc->client->mount_mutex);
>> + pr_err("mount fail\n");
> This is not something we'd want to add here.
Okay, will delete it.
>> return ERR_PTR(err);
>> }
>>
>> @@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
>> ceph_compare_super;
>> int err;
>>
>> - dout("ceph_get_tree\n");
>> + dout("ceph_get_tree start\n");
>>
>> if (!fc->source)
>> return invalfc(fc, "No source");
>> @@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
>> err = PTR_ERR(res);
>> goto out_splat;
>> }
>> - dout("root %p inode %p ino %llx.%llx\n", res,
>> - d_inode(res), ceph_vinop(d_inode(res)));
>> + dout(" root %p inode %p ino %llx.%llx\n",
>> + res, d_inode(res), ceph_vinop(d_inode(res)));
>> fc->root = fsc->sb->s_root;
>> + dout("ceph_get_tree success\n");
>> return 0;
>>
>> out_splat:
>> if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
>> - pr_info("No mds server is up or the cluster is laggy\n");
>> + pr_err("No mds server is up or the cluster is laggy\n");
>> err = -EHOSTUNREACH;
> Is pr_err the right infolevel here? It may be, I'm just curious why the
> change after making this pr_info before.
This should be as an error IMO, before we may just want an hint but
ignore the level stuff.
>> }
>>
>> @@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
>> out:
>> destroy_fs_client(fsc);
>> out_final:
>> - dout("ceph_get_tree fail %d\n", err);
>> + pr_err("ceph_get_tree fail %d\n", err);
> This doesn't seem to be something we want to pr_err.
>
>> return err;
>> }
>>
>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>> index a0e97f6c1072..5971a815fb8e 100644
>> --- a/net/ceph/ceph_common.c
>> +++ b/net/ceph/ceph_common.c
>> @@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
>> return err;
>>
>> while (!have_mon_and_osd_map(client)) {
>> - if (timeout && time_after_eq(jiffies, started + timeout))
>> + if (timeout && time_after_eq(jiffies, started + timeout)) {
>> + pr_err("mount wating for mon/osd maps timed out on mon%d\n",
>> + client->monc.cur_mon);
>> return -ETIMEDOUT;
>> + }
>>
> This code is also called from RBD codepaths and those don't necessarily
> involve a "mount". You should consider how to make the new message more
> generic, or maybe handle it at a higher level, so that rbd isn't
> affected.
Okay, actually the RBD is also in the 'mount' codepath here.
>> /* wait */
>> - dout("mount waiting for mon_map\n");
>> + dout("mount waiting for mon/osd maps\n");
>> err = wait_event_interruptible_timeout(client->auth_wq,
>> have_mon_and_osd_map(client) || (client->auth_err < 0),
>> ceph_timeout_jiffies(timeout));
>
>> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
>> index 9d9e4e4ea600..8f09df9c3aee 100644
>> --- a/net/ceph/mon_client.c
>> +++ b/net/ceph/mon_client.c
>> @@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>>
>> if (ret < 0) {
>> monc->client->auth_err = ret;
>> + pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
>> } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
>> dout("authenticated, starting session\n");
>>
> Again, these need to be human-readable, and not just debugging messages
> that are hard to disable.
Okay, will fix it.
Thanks.
BRs
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] ceph: show more detail logs during mount
2020-02-23 12:18 xiubli
@ 2020-02-23 19:18 ` Jeff Layton
2020-02-24 3:10 ` Xiubo Li
0 siblings, 1 reply; 6+ messages in thread
From: Jeff Layton @ 2020-02-23 19:18 UTC (permalink / raw)
To: xiubli, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel
On Sun, 2020-02-23 at 07:18 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> Return -ETIMEDOUT when the requests are timed out instead of -EIO
> to make it cleaner for the userland. And just print the logs in
> error level to give a helpful hint.
>
> URL: https://tracker.ceph.com/issues/44215
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/mds_client.c | 4 ++--
> fs/ceph/super.c | 28 ++++++++++++++++++++--------
> net/ceph/ceph_common.c | 7 +++++--
> net/ceph/mon_client.c | 1 +
> 4 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 82f63ef2694c..0dfea8cdb50a 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
> if (req->r_timeout &&
> time_after_eq(jiffies, req->r_started + req->r_timeout)) {
> dout("do_request timed out\n");
> - err = -EIO;
> + err = -ETIMEDOUT;
> goto finish;
> }
> if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
> @@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
> if (timeleft > 0)
> err = 0;
> else if (!timeleft)
> - err = -EIO; /* timed out */
> + err = -ETIMEDOUT; /* timed out */
> else
> err = timeleft; /* killed */
> }
I'm fine with the two hunks above.
Note that AFAICT, r_timeout is only set at mount time, so we should
never see this in other codepaths. This probably ought to be done in a
separate patch from the rest.
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 31acb4fe1f2c..6778f2a7d6d4 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
> {
> struct ceph_mds_client *mdsc = fsc->mdsc;
> struct ceph_mds_request *req = NULL;
> + struct ceph_mds_session *session;
> int err;
> struct dentry *root;
> + char buf[32] = {0};
>
> /* open dir */
> - dout("open_root_inode opening '%s'\n", path);
> + dout("mount open_root_inode opening '%s'\n", path);
> req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
> if (IS_ERR(req))
> return ERR_CAST(req);
> @@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
> if (err == 0) {
> struct inode *inode = req->r_target_inode;
> req->r_target_inode = NULL;
> - dout("open_root_inode success\n");
> root = d_make_root(inode);
> if (!root) {
> root = ERR_PTR(-ENOMEM);
> goto out;
> }
> - dout("open_root_inode success, root dentry is %p\n", root);
> + dout(" root dentry is %p\n", root);
> } else {
> root = ERR_PTR(err);
> }
> out:
> + session = ceph_get_mds_session(req->r_session);
> + if (session)
> + snprintf(buf, 32, " on mds%d", session->s_mds);
> +
Where is this new session reference put? I'm not sure I understand this
change.
> ceph_mdsc_put_request(req);
> + if (!IS_ERR(root))
> + dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
> + else
> + pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
> + buf[0] ? buf : "");
> return root;
> }
>
This goes to the generic kernel log. This may warrant an error message
there, but it needs to be something a typical user would understand.
> @@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
>
> out:
> mutex_unlock(&fsc->client->mount_mutex);
> + pr_err("mount fail\n");
This is not something we'd want to add here.
> return ERR_PTR(err);
> }
>
> @@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
> ceph_compare_super;
> int err;
>
> - dout("ceph_get_tree\n");
> + dout("ceph_get_tree start\n");
>
> if (!fc->source)
> return invalfc(fc, "No source");
> @@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
> err = PTR_ERR(res);
> goto out_splat;
> }
> - dout("root %p inode %p ino %llx.%llx\n", res,
> - d_inode(res), ceph_vinop(d_inode(res)));
> + dout(" root %p inode %p ino %llx.%llx\n",
> + res, d_inode(res), ceph_vinop(d_inode(res)));
> fc->root = fsc->sb->s_root;
> + dout("ceph_get_tree success\n");
> return 0;
>
> out_splat:
> if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
> - pr_info("No mds server is up or the cluster is laggy\n");
> + pr_err("No mds server is up or the cluster is laggy\n");
> err = -EHOSTUNREACH;
Is pr_err the right infolevel here? It may be, I'm just curious why the
change after making this pr_info before.
> }
>
> @@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
> out:
> destroy_fs_client(fsc);
> out_final:
> - dout("ceph_get_tree fail %d\n", err);
> + pr_err("ceph_get_tree fail %d\n", err);
This doesn't seem to be something we want to pr_err.
> return err;
> }
>
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index a0e97f6c1072..5971a815fb8e 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
> return err;
>
> while (!have_mon_and_osd_map(client)) {
> - if (timeout && time_after_eq(jiffies, started + timeout))
> + if (timeout && time_after_eq(jiffies, started + timeout)) {
> + pr_err("mount wating for mon/osd maps timed out on mon%d\n",
> + client->monc.cur_mon);
> return -ETIMEDOUT;
> + }
>
This code is also called from RBD codepaths and those don't necessarily
involve a "mount". You should consider how to make the new message more
generic, or maybe handle it at a higher level, so that rbd isn't
affected.
> /* wait */
> - dout("mount waiting for mon_map\n");
> + dout("mount waiting for mon/osd maps\n");
> err = wait_event_interruptible_timeout(client->auth_wq,
> have_mon_and_osd_map(client) || (client->auth_err < 0),
> ceph_timeout_jiffies(timeout));
> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d9e4e4ea600..8f09df9c3aee 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
>
> if (ret < 0) {
> monc->client->auth_err = ret;
> + pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
> } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
> dout("authenticated, starting session\n");
>
Again, these need to be human-readable, and not just debugging messages
that are hard to disable.
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH] ceph: show more detail logs during mount
@ 2020-02-23 12:18 xiubli
2020-02-23 19:18 ` Jeff Layton
0 siblings, 1 reply; 6+ messages in thread
From: xiubli @ 2020-02-23 12:18 UTC (permalink / raw)
To: jlayton, idryomov; +Cc: sage, zyan, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Return -ETIMEDOUT when the requests are timed out instead of -EIO
to make it cleaner for the userland. And just print the logs in
error level to give a helpful hint.
URL: https://tracker.ceph.com/issues/44215
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/mds_client.c | 4 ++--
fs/ceph/super.c | 28 ++++++++++++++++++++--------
net/ceph/ceph_common.c | 7 +++++--
net/ceph/mon_client.c | 1 +
4 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 82f63ef2694c..0dfea8cdb50a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2578,7 +2578,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
dout("do_request timed out\n");
- err = -EIO;
+ err = -ETIMEDOUT;
goto finish;
}
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
@@ -2752,7 +2752,7 @@ static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
if (timeleft > 0)
err = 0;
else if (!timeleft)
- err = -EIO; /* timed out */
+ err = -ETIMEDOUT; /* timed out */
else
err = timeleft; /* killed */
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 31acb4fe1f2c..6778f2a7d6d4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -849,11 +849,13 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
{
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req = NULL;
+ struct ceph_mds_session *session;
int err;
struct dentry *root;
+ char buf[32] = {0};
/* open dir */
- dout("open_root_inode opening '%s'\n", path);
+ dout("mount open_root_inode opening '%s'\n", path);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
if (IS_ERR(req))
return ERR_CAST(req);
@@ -873,18 +875,26 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
if (err == 0) {
struct inode *inode = req->r_target_inode;
req->r_target_inode = NULL;
- dout("open_root_inode success\n");
root = d_make_root(inode);
if (!root) {
root = ERR_PTR(-ENOMEM);
goto out;
}
- dout("open_root_inode success, root dentry is %p\n", root);
+ dout(" root dentry is %p\n", root);
} else {
root = ERR_PTR(err);
}
out:
+ session = ceph_get_mds_session(req->r_session);
+ if (session)
+ snprintf(buf, 32, " on mds%d", session->s_mds);
+
ceph_mdsc_put_request(req);
+ if (!IS_ERR(root))
+ dout("mount open_root_inode success%s\n", buf[0] ? buf : "");
+ else
+ pr_err("mount open_root_inode fail %ld%s\n", PTR_ERR(root),
+ buf[0] ? buf : "");
return root;
}
@@ -937,6 +947,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
out:
mutex_unlock(&fsc->client->mount_mutex);
+ pr_err("mount fail\n");
return ERR_PTR(err);
}
@@ -1028,7 +1039,7 @@ static int ceph_get_tree(struct fs_context *fc)
ceph_compare_super;
int err;
- dout("ceph_get_tree\n");
+ dout("ceph_get_tree start\n");
if (!fc->source)
return invalfc(fc, "No source");
@@ -1073,14 +1084,15 @@ static int ceph_get_tree(struct fs_context *fc)
err = PTR_ERR(res);
goto out_splat;
}
- dout("root %p inode %p ino %llx.%llx\n", res,
- d_inode(res), ceph_vinop(d_inode(res)));
+ dout(" root %p inode %p ino %llx.%llx\n",
+ res, d_inode(res), ceph_vinop(d_inode(res)));
fc->root = fsc->sb->s_root;
+ dout("ceph_get_tree success\n");
return 0;
out_splat:
if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) {
- pr_info("No mds server is up or the cluster is laggy\n");
+ pr_err("No mds server is up or the cluster is laggy\n");
err = -EHOSTUNREACH;
}
@@ -1091,7 +1103,7 @@ static int ceph_get_tree(struct fs_context *fc)
out:
destroy_fs_client(fsc);
out_final:
- dout("ceph_get_tree fail %d\n", err);
+ pr_err("ceph_get_tree fail %d\n", err);
return err;
}
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index a0e97f6c1072..5971a815fb8e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -700,11 +700,14 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
return err;
while (!have_mon_and_osd_map(client)) {
- if (timeout && time_after_eq(jiffies, started + timeout))
+ if (timeout && time_after_eq(jiffies, started + timeout)) {
+ pr_err("mount wating for mon/osd maps timed out on mon%d\n",
+ client->monc.cur_mon);
return -ETIMEDOUT;
+ }
/* wait */
- dout("mount waiting for mon_map\n");
+ dout("mount waiting for mon/osd maps\n");
err = wait_event_interruptible_timeout(client->auth_wq,
have_mon_and_osd_map(client) || (client->auth_err < 0),
ceph_timeout_jiffies(timeout));
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 9d9e4e4ea600..8f09df9c3aee 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -1179,6 +1179,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
if (ret < 0) {
monc->client->auth_err = ret;
+ pr_err("mon%d session auth failed %d\n", monc->cur_mon, ret);
} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
--
2.21.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2020-02-26 0:58 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-25 3:30 [PATCH] ceph: show more detail logs during mount xiubli
2020-02-25 10:39 ` Ilya Dryomov
2020-02-26 0:58 ` Xiubo Li
-- strict thread matches above, loose matches on Subject: below --
2020-02-23 12:18 xiubli
2020-02-23 19:18 ` Jeff Layton
2020-02-24 3:10 ` Xiubo Li
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.