From mboxrd@z Thu Jan 1 00:00:00 1970 Return-path: Received: from eusmtp01.atmel.com ([212.144.249.243]:38352 "EHLO eusmtp01.atmel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753236AbbDMJ3l (ORCPT ); Mon, 13 Apr 2015 05:29:41 -0400 Message-ID: <552B8C7F.5070003@atmel.com> Date: Mon, 13 Apr 2015 17:29:35 +0800 From: "Yang, Wenyou" MIME-Version: 1.0 To: Timo Kokkonen , , , , , Subject: Re: [PATCHv5 1/4] watchdog: Extend kernel API to know about HW limitations References: <1428671199-5562-1-git-send-email-timo.kokkonen@offcode.fi> <1428671199-5562-2-git-send-email-timo.kokkonen@offcode.fi> In-Reply-To: <1428671199-5562-2-git-send-email-timo.kokkonen@offcode.fi> Content-Type: text/plain; charset="windows-1252"; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-watchdog-owner@vger.kernel.org List-Id: linux-watchdog@vger.kernel.org Hi Timo, If the /dev/watchdog device file doesn't open by the user space application, the system will reboot by the watchdog. I think it is missing update of the wdd->expires value. added it as following. On 2015/4/10 21:06, Timo Kokkonen wrote: > There is a great deal of diversity in the watchdog hardware found on > different devices. Differen hardware have different contstraints on > them, many of the constraints that are excessively difficult for the > user space to satisfy. > > One such constraint is the length of the timeout value, which in many > cases can be just a few seconds. Drivers are creating ad hoc solutions > with timers and workqueues to extend the timeout in order to give user > space more time between updates. Looking at the drivers it is clear > that this has resulted to a lot of duplicate code. > > Add an extension to the watchdog kernel API that allows the driver to > describe tis HW constraints to the watchdog code. A kernel worker in > the core is then used to extend the watchdog timeout on behalf of the > user space. This allows the drivers to be simplified as core takes > over the timer extending. > > Signed-off-by: Timo Kokkonen > --- > drivers/watchdog/watchdog_core.c | 87 ++++++++++++++++++++++++++++++++++++++-- > drivers/watchdog/watchdog_dev.c | 12 ++++++ > include/linux/watchdog.h | 23 +++++++++++ > 3 files changed, 119 insertions(+), 3 deletions(-) > > diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c > index cec9b55..8e7d08d 100644 > --- a/drivers/watchdog/watchdog_core.c > +++ b/drivers/watchdog/watchdog_core.c > @@ -99,6 +99,75 @@ int watchdog_init_timeout(struct watchdog_device *wdd, > EXPORT_SYMBOL_GPL(watchdog_init_timeout); > > /** > + * watchdog_init_timeout() - initialize generic watchdog parameters > + * @wdd: Watchdog device to operate > + * @dev: Device that stores the device tree properties > + * > + * Initialize the generic timeout parameters. The driver needs to set > + * hw_features bitmask from @wdd prior calling this function in order > + * to ensure the core knows how to handle the HW. > + * > + * A zero is returned on success and -EINVAL for failure. > + */ > +int watchdog_init_params(struct watchdog_device *wdd, struct device *dev) > +{ > + int ret = 0; > + > + ret = watchdog_init_timeout(wdd, wdd->timeout, dev); > + if (ret < 0) > + return ret; > + > + /* > + * Max HW timeout needs to be set so that core knows when to > + * use a kernel worker to support longer watchdog timeouts > + */ > + if (!wdd->hw_max_timeout) > + return -EINVAL; > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(watchdog_init_params); > + > +static void watchdog_worker(struct work_struct *work) > +{ > + struct watchdog_device *wdd = container_of(to_delayed_work(work), > + struct watchdog_device, work); > + > + if (time_after(jiffies, wdd->expires)) { > + pr_crit("I will reset your machine !\n"); > + return; > + } > + > + if ((!watchdog_active(wdd) && !watchdog_is_stoppable(wdd)) || > + (watchdog_active(wdd) && > + wdd->hw_max_timeout < wdd->timeout * HZ)) { > + if (wdd->ops->ping) > + wdd->ops->ping(wdd); > + else > + wdd->ops->start(wdd); > + > + schedule_delayed_work(&wdd->work, wdd->hw_heartbeat); I think you missed the update of wdd->expires, as following. wdd->expires = jiffies + wdd->timeout * HZ; > + } > +} > + > +static int prepare_watchdog(struct watchdog_device *wdd) > +{ > + /* Stop the watchdog now before user space opens the device */ > + if (wdd->hw_features & WDOG_HW_IS_STOPPABLE && > + wdd->hw_features & WDOG_HW_RUNNING_AT_BOOT) { > + wdd->ops->stop(wdd); > + > + } else if (!(wdd->hw_features & WDOG_HW_IS_STOPPABLE)) { > + /* > + * Can't stop it, use a kernel timer to tick > + * it until it's open by user space > + */ > + schedule_delayed_work(&wdd->work, wdd->hw_heartbeat); > + } ditto wdd->expires = jiffies + wdd->timeout * HZ; > + return 0; > +} > + > +/** > * watchdog_register_device() - register a watchdog device > * @wdd: watchdog device > * > @@ -156,13 +225,24 @@ int watchdog_register_device(struct watchdog_device *wdd) > wdd->dev = device_create(watchdog_class, wdd->parent, devno, > NULL, "watchdog%d", wdd->id); > if (IS_ERR(wdd->dev)) { > - watchdog_dev_unregister(wdd); > - ida_simple_remove(&watchdog_ida, id); > ret = PTR_ERR(wdd->dev); > - return ret; > + goto dev_create_fail; > + } > + > + INIT_DELAYED_WORK(&wdd->work, watchdog_worker); > + > + if (wdd->hw_max_timeout) { > + ret = prepare_watchdog(wdd); > + if (ret) > + goto dev_create_fail; > } > > return 0; > + > +dev_create_fail: > + watchdog_dev_unregister(wdd); > + ida_simple_remove(&watchdog_ida, id); > + return ret; > } > EXPORT_SYMBOL_GPL(watchdog_register_device); > > @@ -181,6 +261,7 @@ void watchdog_unregister_device(struct watchdog_device *wdd) > if (wdd == NULL) > return; > > + cancel_delayed_work_sync(&wdd->work); > devno = wdd->cdev.dev; > ret = watchdog_dev_unregister(wdd); > if (ret) > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c > index 6aaefba..99eb363 100644 > --- a/drivers/watchdog/watchdog_dev.c > +++ b/drivers/watchdog/watchdog_dev.c > @@ -78,6 +78,12 @@ static int watchdog_ping(struct watchdog_device *wddev) > else > err = wddev->ops->start(wddev); /* restart watchdog */ > > + if (wddev->hw_max_timeout && > + wddev->timeout * HZ > wddev->hw_max_timeout) { > + wddev->expires = jiffies + wddev->timeout * HZ; > + schedule_delayed_work(&wddev->work, wddev->hw_heartbeat); > + } > + > out_ping: > mutex_unlock(&wddev->lock); > return err; > @@ -110,6 +116,12 @@ static int watchdog_start(struct watchdog_device *wddev) > if (err == 0) > set_bit(WDOG_ACTIVE, &wddev->status); > > + if (wddev->hw_max_timeout && > + wddev->timeout * HZ > wddev->hw_max_timeout) { > + wddev->expires = jiffies + wddev->timeout * HZ; > + schedule_delayed_work(&wddev->work, wddev->hw_heartbeat); > + } > + > out_start: > mutex_unlock(&wddev->lock); > return err; > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h > index 395b70e..c16c921 100644 > --- a/include/linux/watchdog.h > +++ b/include/linux/watchdog.h > @@ -12,6 +12,7 @@ > #include > #include > #include > +#include > #include > > struct watchdog_ops; > @@ -62,9 +63,13 @@ struct watchdog_ops { > * @timeout: The watchdog devices timeout value. > * @min_timeout:The watchdog devices minimum timeout value. > * @max_timeout:The watchdog devices maximum timeout value. > + * @hw_max_timeout:The watchdog hardware maximum timeout value. > + * @hw_margin: Time interval between timer pings. > * @driver-data:Pointer to the drivers private data. > * @lock: Lock for watchdog core internal use only. > + * @work: Worker used to provide longer timeouts than hardware supports. > * @status: Field that contains the devices internal status bits. > + * @hw_features:Feature bits describing how the watchdog HW works. > * > * The watchdog_device structure contains all information about a > * watchdog timer device. > @@ -86,8 +91,12 @@ struct watchdog_device { > unsigned int timeout; > unsigned int min_timeout; > unsigned int max_timeout; > + unsigned int hw_max_timeout; /* in jiffies */ > + unsigned int hw_heartbeat; /* in jiffies */ > + unsigned long int expires; /* for soft timer */ > void *driver_data; > struct mutex lock; > + struct delayed_work work; > unsigned long status; > /* Bit numbers for status flags */ > #define WDOG_ACTIVE 0 /* Is the watchdog running/active */ > @@ -95,6 +104,14 @@ struct watchdog_device { > #define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */ > #define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */ > #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ > + > +/* Bits describing features supported by the HW */ > + unsigned long hw_features; > + > +/* Can the watchdog be stopped and started */ > +#define WDOG_HW_IS_STOPPABLE BIT(0) > +/* Is the watchdog already running when the driver starts up */ > +#define WDOG_HW_RUNNING_AT_BOOT BIT(1) > }; > > #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) > @@ -120,6 +137,11 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne > (t < wdd->min_timeout || t > wdd->max_timeout)); > } > > +static inline bool watchdog_is_stoppable(struct watchdog_device *wdd) > +{ > + return wdd->hw_features & WDOG_HW_IS_STOPPABLE; > +} > + > /* Use the following functions to manipulate watchdog driver specific data */ > static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) > { > @@ -134,6 +156,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) > /* drivers/watchdog/watchdog_core.c */ > extern int watchdog_init_timeout(struct watchdog_device *wdd, > unsigned int timeout_parm, struct device *dev); > +int watchdog_init_params(struct watchdog_device *wdd, struct device *dev); > extern int watchdog_register_device(struct watchdog_device *); > extern void watchdog_unregister_device(struct watchdog_device *); > Best Regards, Wenyou Yang From mboxrd@z Thu Jan 1 00:00:00 1970 From: wenyou.yang@atmel.com (Yang, Wenyou) Date: Mon, 13 Apr 2015 17:29:35 +0800 Subject: [PATCHv5 1/4] watchdog: Extend kernel API to know about HW limitations In-Reply-To: <1428671199-5562-2-git-send-email-timo.kokkonen@offcode.fi> References: <1428671199-5562-1-git-send-email-timo.kokkonen@offcode.fi> <1428671199-5562-2-git-send-email-timo.kokkonen@offcode.fi> Message-ID: <552B8C7F.5070003@atmel.com> To: linux-arm-kernel@lists.infradead.org List-Id: linux-arm-kernel.lists.infradead.org Hi Timo, If the /dev/watchdog device file doesn't open by the user space application, the system will reboot by the watchdog. I think it is missing update of the wdd->expires value. added it as following. On 2015/4/10 21:06, Timo Kokkonen wrote: > There is a great deal of diversity in the watchdog hardware found on > different devices. Differen hardware have different contstraints on > them, many of the constraints that are excessively difficult for the > user space to satisfy. > > One such constraint is the length of the timeout value, which in many > cases can be just a few seconds. Drivers are creating ad hoc solutions > with timers and workqueues to extend the timeout in order to give user > space more time between updates. Looking at the drivers it is clear > that this has resulted to a lot of duplicate code. > > Add an extension to the watchdog kernel API that allows the driver to > describe tis HW constraints to the watchdog code. A kernel worker in > the core is then used to extend the watchdog timeout on behalf of the > user space. This allows the drivers to be simplified as core takes > over the timer extending. > > Signed-off-by: Timo Kokkonen > --- > drivers/watchdog/watchdog_core.c | 87 ++++++++++++++++++++++++++++++++++++++-- > drivers/watchdog/watchdog_dev.c | 12 ++++++ > include/linux/watchdog.h | 23 +++++++++++ > 3 files changed, 119 insertions(+), 3 deletions(-) > > diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c > index cec9b55..8e7d08d 100644 > --- a/drivers/watchdog/watchdog_core.c > +++ b/drivers/watchdog/watchdog_core.c > @@ -99,6 +99,75 @@ int watchdog_init_timeout(struct watchdog_device *wdd, > EXPORT_SYMBOL_GPL(watchdog_init_timeout); > > /** > + * watchdog_init_timeout() - initialize generic watchdog parameters > + * @wdd: Watchdog device to operate > + * @dev: Device that stores the device tree properties > + * > + * Initialize the generic timeout parameters. The driver needs to set > + * hw_features bitmask from @wdd prior calling this function in order > + * to ensure the core knows how to handle the HW. > + * > + * A zero is returned on success and -EINVAL for failure. > + */ > +int watchdog_init_params(struct watchdog_device *wdd, struct device *dev) > +{ > + int ret = 0; > + > + ret = watchdog_init_timeout(wdd, wdd->timeout, dev); > + if (ret < 0) > + return ret; > + > + /* > + * Max HW timeout needs to be set so that core knows when to > + * use a kernel worker to support longer watchdog timeouts > + */ > + if (!wdd->hw_max_timeout) > + return -EINVAL; > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(watchdog_init_params); > + > +static void watchdog_worker(struct work_struct *work) > +{ > + struct watchdog_device *wdd = container_of(to_delayed_work(work), > + struct watchdog_device, work); > + > + if (time_after(jiffies, wdd->expires)) { > + pr_crit("I will reset your machine !\n"); > + return; > + } > + > + if ((!watchdog_active(wdd) && !watchdog_is_stoppable(wdd)) || > + (watchdog_active(wdd) && > + wdd->hw_max_timeout < wdd->timeout * HZ)) { > + if (wdd->ops->ping) > + wdd->ops->ping(wdd); > + else > + wdd->ops->start(wdd); > + > + schedule_delayed_work(&wdd->work, wdd->hw_heartbeat); I think you missed the update of wdd->expires, as following. wdd->expires = jiffies + wdd->timeout * HZ; > + } > +} > + > +static int prepare_watchdog(struct watchdog_device *wdd) > +{ > + /* Stop the watchdog now before user space opens the device */ > + if (wdd->hw_features & WDOG_HW_IS_STOPPABLE && > + wdd->hw_features & WDOG_HW_RUNNING_AT_BOOT) { > + wdd->ops->stop(wdd); > + > + } else if (!(wdd->hw_features & WDOG_HW_IS_STOPPABLE)) { > + /* > + * Can't stop it, use a kernel timer to tick > + * it until it's open by user space > + */ > + schedule_delayed_work(&wdd->work, wdd->hw_heartbeat); > + } ditto wdd->expires = jiffies + wdd->timeout * HZ; > + return 0; > +} > + > +/** > * watchdog_register_device() - register a watchdog device > * @wdd: watchdog device > * > @@ -156,13 +225,24 @@ int watchdog_register_device(struct watchdog_device *wdd) > wdd->dev = device_create(watchdog_class, wdd->parent, devno, > NULL, "watchdog%d", wdd->id); > if (IS_ERR(wdd->dev)) { > - watchdog_dev_unregister(wdd); > - ida_simple_remove(&watchdog_ida, id); > ret = PTR_ERR(wdd->dev); > - return ret; > + goto dev_create_fail; > + } > + > + INIT_DELAYED_WORK(&wdd->work, watchdog_worker); > + > + if (wdd->hw_max_timeout) { > + ret = prepare_watchdog(wdd); > + if (ret) > + goto dev_create_fail; > } > > return 0; > + > +dev_create_fail: > + watchdog_dev_unregister(wdd); > + ida_simple_remove(&watchdog_ida, id); > + return ret; > } > EXPORT_SYMBOL_GPL(watchdog_register_device); > > @@ -181,6 +261,7 @@ void watchdog_unregister_device(struct watchdog_device *wdd) > if (wdd == NULL) > return; > > + cancel_delayed_work_sync(&wdd->work); > devno = wdd->cdev.dev; > ret = watchdog_dev_unregister(wdd); > if (ret) > diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c > index 6aaefba..99eb363 100644 > --- a/drivers/watchdog/watchdog_dev.c > +++ b/drivers/watchdog/watchdog_dev.c > @@ -78,6 +78,12 @@ static int watchdog_ping(struct watchdog_device *wddev) > else > err = wddev->ops->start(wddev); /* restart watchdog */ > > + if (wddev->hw_max_timeout && > + wddev->timeout * HZ > wddev->hw_max_timeout) { > + wddev->expires = jiffies + wddev->timeout * HZ; > + schedule_delayed_work(&wddev->work, wddev->hw_heartbeat); > + } > + > out_ping: > mutex_unlock(&wddev->lock); > return err; > @@ -110,6 +116,12 @@ static int watchdog_start(struct watchdog_device *wddev) > if (err == 0) > set_bit(WDOG_ACTIVE, &wddev->status); > > + if (wddev->hw_max_timeout && > + wddev->timeout * HZ > wddev->hw_max_timeout) { > + wddev->expires = jiffies + wddev->timeout * HZ; > + schedule_delayed_work(&wddev->work, wddev->hw_heartbeat); > + } > + > out_start: > mutex_unlock(&wddev->lock); > return err; > diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h > index 395b70e..c16c921 100644 > --- a/include/linux/watchdog.h > +++ b/include/linux/watchdog.h > @@ -12,6 +12,7 @@ > #include > #include > #include > +#include > #include > > struct watchdog_ops; > @@ -62,9 +63,13 @@ struct watchdog_ops { > * @timeout: The watchdog devices timeout value. > * @min_timeout:The watchdog devices minimum timeout value. > * @max_timeout:The watchdog devices maximum timeout value. > + * @hw_max_timeout:The watchdog hardware maximum timeout value. > + * @hw_margin: Time interval between timer pings. > * @driver-data:Pointer to the drivers private data. > * @lock: Lock for watchdog core internal use only. > + * @work: Worker used to provide longer timeouts than hardware supports. > * @status: Field that contains the devices internal status bits. > + * @hw_features:Feature bits describing how the watchdog HW works. > * > * The watchdog_device structure contains all information about a > * watchdog timer device. > @@ -86,8 +91,12 @@ struct watchdog_device { > unsigned int timeout; > unsigned int min_timeout; > unsigned int max_timeout; > + unsigned int hw_max_timeout; /* in jiffies */ > + unsigned int hw_heartbeat; /* in jiffies */ > + unsigned long int expires; /* for soft timer */ > void *driver_data; > struct mutex lock; > + struct delayed_work work; > unsigned long status; > /* Bit numbers for status flags */ > #define WDOG_ACTIVE 0 /* Is the watchdog running/active */ > @@ -95,6 +104,14 @@ struct watchdog_device { > #define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */ > #define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */ > #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ > + > +/* Bits describing features supported by the HW */ > + unsigned long hw_features; > + > +/* Can the watchdog be stopped and started */ > +#define WDOG_HW_IS_STOPPABLE BIT(0) > +/* Is the watchdog already running when the driver starts up */ > +#define WDOG_HW_RUNNING_AT_BOOT BIT(1) > }; > > #define WATCHDOG_NOWAYOUT IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT) > @@ -120,6 +137,11 @@ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigne > (t < wdd->min_timeout || t > wdd->max_timeout)); > } > > +static inline bool watchdog_is_stoppable(struct watchdog_device *wdd) > +{ > + return wdd->hw_features & WDOG_HW_IS_STOPPABLE; > +} > + > /* Use the following functions to manipulate watchdog driver specific data */ > static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) > { > @@ -134,6 +156,7 @@ static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) > /* drivers/watchdog/watchdog_core.c */ > extern int watchdog_init_timeout(struct watchdog_device *wdd, > unsigned int timeout_parm, struct device *dev); > +int watchdog_init_params(struct watchdog_device *wdd, struct device *dev); > extern int watchdog_register_device(struct watchdog_device *); > extern void watchdog_unregister_device(struct watchdog_device *); > Best Regards, Wenyou Yang