All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ondřej Jirman" <megous@megous.com>
To: Frank Lee <tiny.windzz@gmail.com>
Cc: linux-sunxi@googlegroups.com,
	Vasily Khoruzhick <anarsoul@gmail.com>,
	Zhang Rui <rui.zhang@intel.com>,
	Daniel Lezcano <daniel.lezcano@linaro.org>,
	Amit Kucheria <amit.kucheria@verdurent.com>,
	Maxime Ripard <mripard@kernel.org>, Chen-Yu Tsai <wens@csie.org>,
	"open list:ALLWINNER THERMAL DRIVER" <linux-pm@vger.kernel.org>,
	"moderated list:ARM/Allwinner sunXi SoC support" 
	<linux-arm-kernel@lists.infradead.org>,
	open list <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] thermal: sun8i: Be loud when probe fails
Date: Wed, 8 Jul 2020 15:21:24 +0200	[thread overview]
Message-ID: <20200708132124.3b3iaavms43o622g@core.my.home> (raw)
In-Reply-To: <CAEExFWvR4QnAQsXBnxk3V776P+YVJzs4PU-HWJ7dfo4B6cdtkg@mail.gmail.com>

On Wed, Jul 08, 2020 at 07:55:40PM +0800, Frank Lee wrote:
> HI Ondrej,
> On Wed, Jul 8, 2020 at 6:55 PM Ondrej Jirman <megous@megous.com> wrote:
> >
> > I noticed several mobile Linux distributions failing to enable the
> > thermal regulation correctly, because the kernel is silent
> > when thermal driver fails to probe. Add enough error reporting
> > to debug issues and warn users in case thermal sensor is failing
> > to probe.
> >
> > Failing to notify users means, that SoC can easily overheat under
> > load.
> >
> > Signed-off-by: Ondrej Jirman <megous@megous.com>
> > ---
> >  drivers/thermal/sun8i_thermal.c | 55 ++++++++++++++++++++++++++-------
> >  1 file changed, 43 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
> > index 74d73be16496..9065e79ae743 100644
> > --- a/drivers/thermal/sun8i_thermal.c
> > +++ b/drivers/thermal/sun8i_thermal.c
> > @@ -287,8 +287,12 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
> >
> >         calcell = devm_nvmem_cell_get(dev, "calibration");
> >         if (IS_ERR(calcell)) {
> > +               dev_err(dev, "Failed to get calibration nvmem cell (%ld)\n",
> > +                       PTR_ERR(calcell));
> > +
> >                 if (PTR_ERR(calcell) == -EPROBE_DEFER)
> >                         return -EPROBE_DEFER;
> > +
> >                 /*
> >                  * Even if the external calibration data stored in sid is
> >                  * not accessible, the THS hardware can still work, although
> > @@ -308,6 +312,8 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
> >         caldata = nvmem_cell_read(calcell, &callen);
> >         if (IS_ERR(caldata)) {
> >                 ret = PTR_ERR(caldata);
> > +               dev_err(dev, "Failed to read calibration data (%d)\n",
> > +                       ret);
> >                 goto out;
> >         }
> >
> > @@ -330,23 +336,35 @@ static int sun8i_ths_resource_init(struct ths_device *tmdev)
> >                 return PTR_ERR(base);
> >
> >         tmdev->regmap = devm_regmap_init_mmio(dev, base, &config);
> > -       if (IS_ERR(tmdev->regmap))
> > +       if (IS_ERR(tmdev->regmap)) {
> > +               dev_err(dev, "Failed to init regmap (%ld)\n",
> > +                       PTR_ERR(tmdev->regmap));
> >                 return PTR_ERR(tmdev->regmap);
> > +       }
> >
> >         if (tmdev->chip->has_bus_clk_reset) {
> >                 tmdev->reset = devm_reset_control_get(dev, NULL);
> > -               if (IS_ERR(tmdev->reset))
> > +               if (IS_ERR(tmdev->reset)) {
> > +                       dev_err(dev, "Failed to get reset (%ld)\n",
> > +                               PTR_ERR(tmdev->reset));
> >                         return PTR_ERR(tmdev->reset);
> > +               }
> >
> >                 tmdev->bus_clk = devm_clk_get(&pdev->dev, "bus");
> > -               if (IS_ERR(tmdev->bus_clk))
> > +               if (IS_ERR(tmdev->bus_clk)) {
> > +                       dev_err(dev, "Failed to get bus clock (%ld)\n",
> > +                               PTR_ERR(tmdev->bus_clk));
> >                         return PTR_ERR(tmdev->bus_clk);
> > +               }
> >         }
> >
> >         if (tmdev->chip->has_mod_clk) {
> >                 tmdev->mod_clk = devm_clk_get(&pdev->dev, "mod");
> > -               if (IS_ERR(tmdev->mod_clk))
> > +               if (IS_ERR(tmdev->mod_clk)) {
> > +                       dev_err(dev, "Failed to get mod clock (%ld)\n",
> > +                               PTR_ERR(tmdev->mod_clk));
> >                         return PTR_ERR(tmdev->mod_clk);
> > +               }
> >         }
> >
> >         ret = reset_control_deassert(tmdev->reset);
> > @@ -471,8 +489,12 @@ static int sun8i_ths_register(struct ths_device *tmdev)
> >                                                              i,
> >                                                              &tmdev->sensor[i],
> >                                                              &ths_ops);
> > -               if (IS_ERR(tmdev->sensor[i].tzd))
> > +               if (IS_ERR(tmdev->sensor[i].tzd)) {
> > +                       dev_err(tmdev->dev,
> > +                               "Failed to register sensor %d (%ld)\n",
> > +                               i, PTR_ERR(tmdev->sensor[i].tzd));
> >                         return PTR_ERR(tmdev->sensor[i].tzd);
> > +               }
> >
> >                 if (devm_thermal_add_hwmon_sysfs(tmdev->sensor[i].tzd))
> >                         dev_warn(tmdev->dev,
> > @@ -501,19 +523,21 @@ static int sun8i_ths_probe(struct platform_device *pdev)
> >
> >         ret = sun8i_ths_resource_init(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         irq = platform_get_irq(pdev, 0);
> > -       if (irq < 0)
> > -               return irq;
> > +       if (irq < 0) {
> > +               ret = irq;
> > +               goto err_out;
> > +       }
> >
> >         ret = tmdev->chip->init(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         ret = sun8i_ths_register(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         /*
> >          * Avoid entering the interrupt handler, the thermal device is not
> > @@ -523,10 +547,17 @@ static int sun8i_ths_probe(struct platform_device *pdev)
> >         ret = devm_request_threaded_irq(dev, irq, NULL,
> >                                         sun8i_irq_thread,
> >                                         IRQF_ONESHOT, "ths", tmdev);
> > -       if (ret)
> > -               return ret;
> > +       if (ret) {
> > +               dev_err(dev, "Failed to request irq (%d)\n", ret);
> > +               goto err_out;
> > +       }
> >
> > +       dev_info(dev, "Thermal sensor ready!\n");
> >         return 0;
> > +
> > +err_out:
> > +       dev_err(dev, "Failed to probe thermal sensor (%d)\n", ret);
> 
> When the driver fails, there will be this print. Isn't it superfluous
> for you to add these?
> 
> sun8i-thermal: probe of 5070400.thermal-sensor failed with error

There's no such failure message in the case I investigated, which is
EPROBE_DEFER failure waiting for nvmem driver that never loads,
because it's not configured by the user to build.

regards,
	o.

> 
> Yangtao

WARNING: multiple messages have this Message-ID (diff)
From: "Ondřej Jirman" <megous@megous.com>
To: Frank Lee <tiny.windzz@gmail.com>
Cc: Amit Kucheria <amit.kucheria@verdurent.com>,
	"open list:ALLWINNER THERMAL DRIVER" <linux-pm@vger.kernel.org>,
	Daniel Lezcano <daniel.lezcano@linaro.org>,
	Chen-Yu Tsai <wens@csie.org>, Maxime Ripard <mripard@kernel.org>,
	open list <linux-kernel@vger.kernel.org>,
	Vasily Khoruzhick <anarsoul@gmail.com>,
	linux-sunxi@googlegroups.com, Zhang Rui <rui.zhang@intel.com>,
	"moderated list:ARM/Allwinner sunXi SoC support"
	<linux-arm-kernel@lists.infradead.org>
Subject: Re: [PATCH] thermal: sun8i: Be loud when probe fails
Date: Wed, 8 Jul 2020 15:21:24 +0200	[thread overview]
Message-ID: <20200708132124.3b3iaavms43o622g@core.my.home> (raw)
In-Reply-To: <CAEExFWvR4QnAQsXBnxk3V776P+YVJzs4PU-HWJ7dfo4B6cdtkg@mail.gmail.com>

On Wed, Jul 08, 2020 at 07:55:40PM +0800, Frank Lee wrote:
> HI Ondrej,
> On Wed, Jul 8, 2020 at 6:55 PM Ondrej Jirman <megous@megous.com> wrote:
> >
> > I noticed several mobile Linux distributions failing to enable the
> > thermal regulation correctly, because the kernel is silent
> > when thermal driver fails to probe. Add enough error reporting
> > to debug issues and warn users in case thermal sensor is failing
> > to probe.
> >
> > Failing to notify users means, that SoC can easily overheat under
> > load.
> >
> > Signed-off-by: Ondrej Jirman <megous@megous.com>
> > ---
> >  drivers/thermal/sun8i_thermal.c | 55 ++++++++++++++++++++++++++-------
> >  1 file changed, 43 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c
> > index 74d73be16496..9065e79ae743 100644
> > --- a/drivers/thermal/sun8i_thermal.c
> > +++ b/drivers/thermal/sun8i_thermal.c
> > @@ -287,8 +287,12 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
> >
> >         calcell = devm_nvmem_cell_get(dev, "calibration");
> >         if (IS_ERR(calcell)) {
> > +               dev_err(dev, "Failed to get calibration nvmem cell (%ld)\n",
> > +                       PTR_ERR(calcell));
> > +
> >                 if (PTR_ERR(calcell) == -EPROBE_DEFER)
> >                         return -EPROBE_DEFER;
> > +
> >                 /*
> >                  * Even if the external calibration data stored in sid is
> >                  * not accessible, the THS hardware can still work, although
> > @@ -308,6 +312,8 @@ static int sun8i_ths_calibrate(struct ths_device *tmdev)
> >         caldata = nvmem_cell_read(calcell, &callen);
> >         if (IS_ERR(caldata)) {
> >                 ret = PTR_ERR(caldata);
> > +               dev_err(dev, "Failed to read calibration data (%d)\n",
> > +                       ret);
> >                 goto out;
> >         }
> >
> > @@ -330,23 +336,35 @@ static int sun8i_ths_resource_init(struct ths_device *tmdev)
> >                 return PTR_ERR(base);
> >
> >         tmdev->regmap = devm_regmap_init_mmio(dev, base, &config);
> > -       if (IS_ERR(tmdev->regmap))
> > +       if (IS_ERR(tmdev->regmap)) {
> > +               dev_err(dev, "Failed to init regmap (%ld)\n",
> > +                       PTR_ERR(tmdev->regmap));
> >                 return PTR_ERR(tmdev->regmap);
> > +       }
> >
> >         if (tmdev->chip->has_bus_clk_reset) {
> >                 tmdev->reset = devm_reset_control_get(dev, NULL);
> > -               if (IS_ERR(tmdev->reset))
> > +               if (IS_ERR(tmdev->reset)) {
> > +                       dev_err(dev, "Failed to get reset (%ld)\n",
> > +                               PTR_ERR(tmdev->reset));
> >                         return PTR_ERR(tmdev->reset);
> > +               }
> >
> >                 tmdev->bus_clk = devm_clk_get(&pdev->dev, "bus");
> > -               if (IS_ERR(tmdev->bus_clk))
> > +               if (IS_ERR(tmdev->bus_clk)) {
> > +                       dev_err(dev, "Failed to get bus clock (%ld)\n",
> > +                               PTR_ERR(tmdev->bus_clk));
> >                         return PTR_ERR(tmdev->bus_clk);
> > +               }
> >         }
> >
> >         if (tmdev->chip->has_mod_clk) {
> >                 tmdev->mod_clk = devm_clk_get(&pdev->dev, "mod");
> > -               if (IS_ERR(tmdev->mod_clk))
> > +               if (IS_ERR(tmdev->mod_clk)) {
> > +                       dev_err(dev, "Failed to get mod clock (%ld)\n",
> > +                               PTR_ERR(tmdev->mod_clk));
> >                         return PTR_ERR(tmdev->mod_clk);
> > +               }
> >         }
> >
> >         ret = reset_control_deassert(tmdev->reset);
> > @@ -471,8 +489,12 @@ static int sun8i_ths_register(struct ths_device *tmdev)
> >                                                              i,
> >                                                              &tmdev->sensor[i],
> >                                                              &ths_ops);
> > -               if (IS_ERR(tmdev->sensor[i].tzd))
> > +               if (IS_ERR(tmdev->sensor[i].tzd)) {
> > +                       dev_err(tmdev->dev,
> > +                               "Failed to register sensor %d (%ld)\n",
> > +                               i, PTR_ERR(tmdev->sensor[i].tzd));
> >                         return PTR_ERR(tmdev->sensor[i].tzd);
> > +               }
> >
> >                 if (devm_thermal_add_hwmon_sysfs(tmdev->sensor[i].tzd))
> >                         dev_warn(tmdev->dev,
> > @@ -501,19 +523,21 @@ static int sun8i_ths_probe(struct platform_device *pdev)
> >
> >         ret = sun8i_ths_resource_init(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         irq = platform_get_irq(pdev, 0);
> > -       if (irq < 0)
> > -               return irq;
> > +       if (irq < 0) {
> > +               ret = irq;
> > +               goto err_out;
> > +       }
> >
> >         ret = tmdev->chip->init(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         ret = sun8i_ths_register(tmdev);
> >         if (ret)
> > -               return ret;
> > +               goto err_out;
> >
> >         /*
> >          * Avoid entering the interrupt handler, the thermal device is not
> > @@ -523,10 +547,17 @@ static int sun8i_ths_probe(struct platform_device *pdev)
> >         ret = devm_request_threaded_irq(dev, irq, NULL,
> >                                         sun8i_irq_thread,
> >                                         IRQF_ONESHOT, "ths", tmdev);
> > -       if (ret)
> > -               return ret;
> > +       if (ret) {
> > +               dev_err(dev, "Failed to request irq (%d)\n", ret);
> > +               goto err_out;
> > +       }
> >
> > +       dev_info(dev, "Thermal sensor ready!\n");
> >         return 0;
> > +
> > +err_out:
> > +       dev_err(dev, "Failed to probe thermal sensor (%d)\n", ret);
> 
> When the driver fails, there will be this print. Isn't it superfluous
> for you to add these?
> 
> sun8i-thermal: probe of 5070400.thermal-sensor failed with error

There's no such failure message in the case I investigated, which is
EPROBE_DEFER failure waiting for nvmem driver that never loads,
because it's not configured by the user to build.

regards,
	o.

> 
> Yangtao

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2020-07-08 13:21 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-08 10:55 [PATCH] thermal: sun8i: Be loud when probe fails Ondrej Jirman
2020-07-08 10:55 ` Ondrej Jirman
2020-07-08 11:03 ` Russell King - ARM Linux admin
2020-07-08 11:03   ` Russell King - ARM Linux admin
2020-07-08 11:10   ` Ondřej Jirman
2020-07-08 11:10     ` Ondřej Jirman
2020-07-20  7:55   ` Icenowy Zheng
2020-07-20  7:55     ` Icenowy Zheng
2020-07-20  8:28     ` Russell King - ARM Linux admin
2020-07-20  8:28       ` Russell King - ARM Linux admin
2020-07-08 11:55 ` Frank Lee
2020-07-08 11:55   ` Frank Lee
2020-07-08 13:21   ` Ondřej Jirman [this message]
2020-07-08 13:21     ` Ondřej Jirman
2020-07-08 13:42     ` Robin Murphy
2020-07-08 13:42       ` Robin Murphy
2020-07-08 13:33   ` Ondřej Jirman
2020-07-08 13:33     ` Ondřej Jirman
2020-07-08 12:25 ` Maxime Ripard
2020-07-08 12:25   ` Maxime Ripard
2020-07-08 13:29   ` Ondřej Jirman
2020-07-08 13:29     ` Ondřej Jirman
2020-07-08 13:36     ` Maxime Ripard
2020-07-08 13:36       ` Maxime Ripard
2020-07-08 13:44       ` Ondřej Jirman
2020-07-08 13:44         ` Ondřej Jirman
2020-07-08 13:57         ` Maxime Ripard
2020-07-08 13:57           ` Maxime Ripard
2020-07-12 23:29           ` Ondřej Jirman
2020-07-12 23:29             ` Ondřej Jirman
2020-07-23 15:20             ` Maxime Ripard
2020-07-23 15:20               ` Maxime Ripard
2020-07-08 13:29 ` Maxime Ripard
2020-07-08 13:29   ` Maxime Ripard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200708132124.3b3iaavms43o622g@core.my.home \
    --to=megous@megous.com \
    --cc=amit.kucheria@verdurent.com \
    --cc=anarsoul@gmail.com \
    --cc=daniel.lezcano@linaro.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=linux-sunxi@googlegroups.com \
    --cc=mripard@kernel.org \
    --cc=rui.zhang@intel.com \
    --cc=tiny.windzz@gmail.com \
    --cc=wens@csie.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.