* [PATCH v2] tpm,tpm_tis: Handle interrupt storm
@ 2023-05-30 17:47 Lino Sanfilippo
2023-06-09 14:33 ` Jarkko Sakkinen
0 siblings, 1 reply; 5+ messages in thread
From: Lino Sanfilippo @ 2023-05-30 17:47 UTC (permalink / raw)
To: peterhuewe, jarkko, jgg
Cc: jsnitsel, hdegoede, oe-lkp, lkp, peter.ujfalusi, peterz, linux,
linux-integrity, linux-kernel, l.sanfilippo, LinoSanfilippo,
lukas, p.rosenberger, kernel test robot
From: Lino Sanfilippo <l.sanfilippo@kunbus.com>
After activation of interrupts for TPM TIS drivers 0-day reports an
interrupt storm on an Inspur NF5180M6/NF5180M6 server.
Fix this by detecting the storm and falling back to polling:
Count the number of unhandled interrupts within a 10 ms time interval. In
case that more than 1000 were unhandled deactivate interrupts entirely,
deregister the handler and use polling instead.
The storm detection logic equals the implementation in note_interrupt()
which uses timestamps and counters stored in struct irq_desc. Since this
structure is private to the generic interrupt core the TPM TIS core uses
its own timestamps and counters. Furthermore the TPM interrupt handler
always returns IRQ_HANDLED to prevent the generic interrupt core from
processing the interrupt storm.
Since the interrupt deregistration function devm_free_irq() waits for all
interrupt handlers to finish, only trigger a worker in the interrupt
handler and do the unregistration in the worker to avoid a deadlock.
Reported-by: kernel test robot <yujie.liu@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202305041325.ae8b0c43-yujie.liu@intel.com/
Suggested-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
---
drivers/char/tpm/tpm_tis_core.c | 93 ++++++++++++++++++++++++++++-----
drivers/char/tpm/tpm_tis_core.h | 4 ++
2 files changed, 85 insertions(+), 12 deletions(-)
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 558144fa707a..7ae8228e803f 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -468,25 +468,32 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
return rc;
}
+static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ u32 intmask = 0;
+
+ tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
+ intmask &= ~TPM_GLOBAL_INT_ENABLE;
+
+ tpm_tis_request_locality(chip, 0);
+ tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+ tpm_tis_relinquish_locality(chip, 0);
+
+ chip->flags &= ~TPM_CHIP_FLAG_IRQ;
+}
+
static void disable_interrupts(struct tpm_chip *chip)
{
struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
- u32 intmask;
- int rc;
if (priv->irq == 0)
return;
- rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
- if (rc < 0)
- intmask = 0;
-
- intmask &= ~TPM_GLOBAL_INT_ENABLE;
- rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+ __tpm_tis_disable_interrupts(chip);
devm_free_irq(chip->dev.parent, priv->irq, chip);
priv->irq = 0;
- chip->flags &= ~TPM_CHIP_FLAG_IRQ;
}
/*
@@ -752,6 +759,53 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
return status == TPM_STS_COMMAND_READY;
}
+static void tpm_tis_reenable_polling(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+
+ dev_warn(&chip->dev, FW_BUG
+ "TPM interrupt storm detected, polling instead\n");
+
+ __tpm_tis_disable_interrupts(chip);
+
+ /*
+ * devm_free_irq() must not be called from within the interrupt handler,
+ * since this function waits for running handlers to finish and thus it
+ * would deadlock. Instead trigger a worker that takes care of the
+ * unregistration.
+ */
+ schedule_work(&priv->free_irq_work);
+}
+
+static irqreturn_t tpm_tis_check_for_interrupt_storm(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ const unsigned int MAX_UNHANDLED_IRQS = 1000;
+
+ /*
+ * The worker to free the TPM interrupt (free_irq_work) may already
+ * be scheduled, so make sure it is not scheduled again.
+ */
+ if (!(chip->flags & TPM_CHIP_FLAG_IRQ))
+ return IRQ_HANDLED;
+
+ if (time_after(jiffies, priv->last_unhandled_irq + HZ/10))
+ priv->unhandled_irqs = 1;
+ else
+ priv->unhandled_irqs++;
+
+ priv->last_unhandled_irq = jiffies;
+
+ if (priv->unhandled_irqs > MAX_UNHANDLED_IRQS)
+ tpm_tis_reenable_polling(chip);
+
+ /*
+ * Prevent the genirq code from starting its own interrupt storm
+ * handling by always reporting that the interrupt was handled.
+ */
+ return IRQ_HANDLED;
+}
+
static irqreturn_t tis_int_handler(int dummy, void *dev_id)
{
struct tpm_chip *chip = dev_id;
@@ -761,10 +815,10 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
rc = tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
if (rc < 0)
- return IRQ_NONE;
+ goto unhandled;
if (interrupt == 0)
- return IRQ_NONE;
+ goto unhandled;
set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
if (interrupt & TPM_INTF_DATA_AVAIL_INT)
@@ -780,10 +834,13 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt);
tpm_tis_relinquish_locality(chip, 0);
if (rc < 0)
- return IRQ_NONE;
+ goto unhandled;
tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
return IRQ_HANDLED;
+
+unhandled:
+ return tpm_tis_check_for_interrupt_storm(chip);
}
static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
@@ -804,6 +861,15 @@ static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
chip->flags &= ~TPM_CHIP_FLAG_IRQ;
}
+static void tpm_tis_free_irq_func(struct work_struct *work)
+{
+ struct tpm_tis_data *priv = container_of(work, typeof(*priv), free_irq_work);
+ struct tpm_chip *chip = priv->chip;
+
+ devm_free_irq(chip->dev.parent, priv->irq, chip);
+ priv->irq = 0;
+}
+
/* Register the IRQ and issue a command that will cause an interrupt. If an
* irq is seen then leave the chip setup for IRQ operation, otherwise reverse
* everything and leave in polling mode. Returns 0 on success.
@@ -816,6 +882,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask,
int rc;
u32 int_status;
+ INIT_WORK(&priv->free_irq_work, tpm_tis_free_irq_func);
rc = devm_request_threaded_irq(chip->dev.parent, irq, NULL,
tis_int_handler, IRQF_ONESHOT | flags,
@@ -918,6 +985,7 @@ void tpm_tis_remove(struct tpm_chip *chip)
interrupt = 0;
tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
+ flush_work(&priv->free_irq_work);
tpm_tis_clkrun_enable(chip, false);
@@ -1021,6 +1089,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
+ priv->chip = chip;
priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
priv->phy_ops = phy_ops;
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index e978f457fd4d..b1fa42367052 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -91,11 +91,15 @@ enum tpm_tis_flags {
};
struct tpm_tis_data {
+ struct tpm_chip *chip;
u16 manufacturer_id;
struct mutex locality_count_mutex;
unsigned int locality_count;
int locality;
int irq;
+ struct work_struct free_irq_work;
+ unsigned long last_unhandled_irq;
+ unsigned int unhandled_irqs;
unsigned int int_mask;
unsigned long flags;
void __iomem *ilb_base_addr;
base-commit: 7877cb91f1081754a1487c144d85dc0d2e2e7fc4
--
2.40.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2] tpm,tpm_tis: Handle interrupt storm
2023-05-30 17:47 [PATCH v2] tpm,tpm_tis: Handle interrupt storm Lino Sanfilippo
@ 2023-06-09 14:33 ` Jarkko Sakkinen
2023-06-09 14:34 ` Jarkko Sakkinen
2023-06-09 16:03 ` Lino Sanfilippo
0 siblings, 2 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2023-06-09 14:33 UTC (permalink / raw)
To: Lino Sanfilippo, peterhuewe, jgg
Cc: jsnitsel, hdegoede, oe-lkp, lkp, peter.ujfalusi, peterz, linux,
linux-integrity, linux-kernel, l.sanfilippo, lukas,
p.rosenberger, kernel test robot
On Tue May 30, 2023 at 8:47 PM EEST, Lino Sanfilippo wrote:
> From: Lino Sanfilippo <l.sanfilippo@kunbus.com>
>
> After activation of interrupts for TPM TIS drivers 0-day reports an
> interrupt storm on an Inspur NF5180M6/NF5180M6 server.
>
> Fix this by detecting the storm and falling back to polling:
> Count the number of unhandled interrupts within a 10 ms time interval. In
> case that more than 1000 were unhandled deactivate interrupts entirely,
> deregister the handler and use polling instead.
>
> The storm detection logic equals the implementation in note_interrupt()
> which uses timestamps and counters stored in struct irq_desc. Since this
> structure is private to the generic interrupt core the TPM TIS core uses
> its own timestamps and counters. Furthermore the TPM interrupt handler
> always returns IRQ_HANDLED to prevent the generic interrupt core from
> processing the interrupt storm.
>
> Since the interrupt deregistration function devm_free_irq() waits for all
> interrupt handlers to finish, only trigger a worker in the interrupt
> handler and do the unregistration in the worker to avoid a deadlock.
>
> Reported-by: kernel test robot <yujie.liu@intel.com>
> Closes: https://lore.kernel.org/oe-lkp/202305041325.ae8b0c43-yujie.liu@intel.com/
> Suggested-by: Lukas Wunner <lukas@wunner.de>
> Signed-off-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
> ---
Sorry for the latency. I've moved home office to a new location,
which has caused ~2 week lag. Unfortunate timing.
> drivers/char/tpm/tpm_tis_core.c | 93 ++++++++++++++++++++++++++++-----
> drivers/char/tpm/tpm_tis_core.h | 4 ++
> 2 files changed, 85 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
> index 558144fa707a..7ae8228e803f 100644
> --- a/drivers/char/tpm/tpm_tis_core.c
> +++ b/drivers/char/tpm/tpm_tis_core.c
> @@ -468,25 +468,32 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> return rc;
> }
>
> +static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
> +{
> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> + u32 intmask = 0;
> +
> + tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
> + intmask &= ~TPM_GLOBAL_INT_ENABLE;
> +
> + tpm_tis_request_locality(chip, 0);
> + tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
> + tpm_tis_relinquish_locality(chip, 0);
> +
> + chip->flags &= ~TPM_CHIP_FLAG_IRQ;
> +}
> +
> static void disable_interrupts(struct tpm_chip *chip)
Add tpm_ prefix here too. It makes tracing/grepping/etc so much nicer.
> {
> struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> - u32 intmask;
int_mask is more readable
> - int rc;
>
> if (priv->irq == 0)
> return;
>
> - rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
> - if (rc < 0)
> - intmask = 0;
> -
> - intmask &= ~TPM_GLOBAL_INT_ENABLE;
> - rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
> + __tpm_tis_disable_interrupts(chip);
>
> devm_free_irq(chip->dev.parent, priv->irq, chip);
> priv->irq = 0;
> - chip->flags &= ~TPM_CHIP_FLAG_IRQ;
> }
>
> /*
> @@ -752,6 +759,53 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
> return status == TPM_STS_COMMAND_READY;
> }
>
> +static void tpm_tis_reenable_polling(struct tpm_chip *chip)
> +{
> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> +
> + dev_warn(&chip->dev, FW_BUG
> + "TPM interrupt storm detected, polling instead\n");
> +
> + __tpm_tis_disable_interrupts(chip);
> +
> + /*
> + * devm_free_irq() must not be called from within the interrupt handler,
> + * since this function waits for running handlers to finish and thus it
> + * would deadlock. Instead trigger a worker that takes care of the
> + * unregistration.
> + */
> + schedule_work(&priv->free_irq_work);
> +}
> +
> +static irqreturn_t tpm_tis_check_for_interrupt_storm(struct tpm_chip *chip)
> +{
> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> + const unsigned int MAX_UNHANDLED_IRQS = 1000;
Please declare this in the beginning of file because it is non-empirical
tuning parameter. I do not want it to be buried here. It is now as good
as a magic number.
Or perhaps even tpm_tis_core.h?
Why MAX_UNHANDLED_IRQS is exactly 1000 and not 1? I would rollback eagerly.
> +
> + /*
> + * The worker to free the TPM interrupt (free_irq_work) may already
> + * be scheduled, so make sure it is not scheduled again.
> + */
> + if (!(chip->flags & TPM_CHIP_FLAG_IRQ))
> + return IRQ_HANDLED;
> +
> + if (time_after(jiffies, priv->last_unhandled_irq + HZ/10))
> + priv->unhandled_irqs = 1;
> + else
> + priv->unhandled_irqs++;
> +
> + priv->last_unhandled_irq = jiffies;
> +
> + if (priv->unhandled_irqs > MAX_UNHANDLED_IRQS)
> + tpm_tis_reenable_polling(chip);
> +
> + /*
> + * Prevent the genirq code from starting its own interrupt storm
> + * handling by always reporting that the interrupt was handled.
> + */
> + return IRQ_HANDLED;
> +}
> +
> static irqreturn_t tis_int_handler(int dummy, void *dev_id)
> {
> struct tpm_chip *chip = dev_id;
> @@ -761,10 +815,10 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
>
> rc = tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
> if (rc < 0)
> - return IRQ_NONE;
> + goto unhandled;
>
> if (interrupt == 0)
> - return IRQ_NONE;
> + goto unhandled;
>
> set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
> if (interrupt & TPM_INTF_DATA_AVAIL_INT)
> @@ -780,10 +834,13 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
> rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt);
> tpm_tis_relinquish_locality(chip, 0);
> if (rc < 0)
> - return IRQ_NONE;
> + goto unhandled;
>
> tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
> return IRQ_HANDLED;
> +
> +unhandled:
> + return tpm_tis_check_for_interrupt_storm(chip);
> }
>
> static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
> @@ -804,6 +861,15 @@ static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
> chip->flags &= ~TPM_CHIP_FLAG_IRQ;
> }
>
> +static void tpm_tis_free_irq_func(struct work_struct *work)
> +{
> + struct tpm_tis_data *priv = container_of(work, typeof(*priv), free_irq_work);
> + struct tpm_chip *chip = priv->chip;
> +
> + devm_free_irq(chip->dev.parent, priv->irq, chip);
> + priv->irq = 0;
> +}
> +
> /* Register the IRQ and issue a command that will cause an interrupt. If an
> * irq is seen then leave the chip setup for IRQ operation, otherwise reverse
> * everything and leave in polling mode. Returns 0 on success.
> @@ -816,6 +882,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask,
> int rc;
> u32 int_status;
>
> + INIT_WORK(&priv->free_irq_work, tpm_tis_free_irq_func);
>
> rc = devm_request_threaded_irq(chip->dev.parent, irq, NULL,
> tis_int_handler, IRQF_ONESHOT | flags,
> @@ -918,6 +985,7 @@ void tpm_tis_remove(struct tpm_chip *chip)
> interrupt = 0;
>
> tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
> + flush_work(&priv->free_irq_work);
>
> tpm_tis_clkrun_enable(chip, false);
>
> @@ -1021,6 +1089,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
> chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
> chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
> chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
> + priv->chip = chip;
> priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
> priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
> priv->phy_ops = phy_ops;
> diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
> index e978f457fd4d..b1fa42367052 100644
> --- a/drivers/char/tpm/tpm_tis_core.h
> +++ b/drivers/char/tpm/tpm_tis_core.h
> @@ -91,11 +91,15 @@ enum tpm_tis_flags {
> };
>
> struct tpm_tis_data {
> + struct tpm_chip *chip;
> u16 manufacturer_id;
> struct mutex locality_count_mutex;
> unsigned int locality_count;
> int locality;
> int irq;
> + struct work_struct free_irq_work;
> + unsigned long last_unhandled_irq;
> + unsigned int unhandled_irqs;
> unsigned int int_mask;
> unsigned long flags;
> void __iomem *ilb_base_addr;
>
> base-commit: 7877cb91f1081754a1487c144d85dc0d2e2e7fc4
> --
> 2.40.1
BR, Jarkko
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] tpm,tpm_tis: Handle interrupt storm
2023-06-09 14:33 ` Jarkko Sakkinen
@ 2023-06-09 14:34 ` Jarkko Sakkinen
2023-06-09 16:03 ` Lino Sanfilippo
1 sibling, 0 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2023-06-09 14:34 UTC (permalink / raw)
To: Jarkko Sakkinen, Lino Sanfilippo, peterhuewe, jgg
Cc: jsnitsel, hdegoede, oe-lkp, lkp, peter.ujfalusi, peterz, linux,
linux-integrity, linux-kernel, l.sanfilippo, lukas,
p.rosenberger, kernel test robot
Short summary: tpm_tis: Disable interrupts after 1000 unhandled IRQs
I.e. the exact thing the commit changes. Handle means absolutely
nothing.
BR, Jarkko
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] tpm,tpm_tis: Handle interrupt storm
2023-06-09 14:33 ` Jarkko Sakkinen
2023-06-09 14:34 ` Jarkko Sakkinen
@ 2023-06-09 16:03 ` Lino Sanfilippo
2023-06-09 16:10 ` Jarkko Sakkinen
1 sibling, 1 reply; 5+ messages in thread
From: Lino Sanfilippo @ 2023-06-09 16:03 UTC (permalink / raw)
To: Jarkko Sakkinen, Lino Sanfilippo, peterhuewe, jgg
Cc: jsnitsel, hdegoede, oe-lkp, lkp, peter.ujfalusi, peterz, linux,
linux-integrity, linux-kernel, lukas, p.rosenberger,
kernel test robot
Hi,
On 09.06.23 16:33, Jarkko Sakkinen wrote:
>
> On Tue May 30, 2023 at 8:47 PM EEST, Lino Sanfilippo wrote:
>> From: Lino Sanfilippo <l.sanfilippo@kunbus.com>
>>
>> After activation of interrupts for TPM TIS drivers 0-day reports an
>> interrupt storm on an Inspur NF5180M6/NF5180M6 server.
>>
>> Fix this by detecting the storm and falling back to polling:
>> Count the number of unhandled interrupts within a 10 ms time interval. In
>> case that more than 1000 were unhandled deactivate interrupts entirely,
>> deregister the handler and use polling instead.
>>
>> The storm detection logic equals the implementation in note_interrupt()
>> which uses timestamps and counters stored in struct irq_desc. Since this
>> structure is private to the generic interrupt core the TPM TIS core uses
>> its own timestamps and counters. Furthermore the TPM interrupt handler
>> always returns IRQ_HANDLED to prevent the generic interrupt core from
>> processing the interrupt storm.
>>
>> Since the interrupt deregistration function devm_free_irq() waits for all
>> interrupt handlers to finish, only trigger a worker in the interrupt
>> handler and do the unregistration in the worker to avoid a deadlock.
>>
>> Reported-by: kernel test robot <yujie.liu@intel.com>
>> Closes: https://lore.kernel.org/oe-lkp/202305041325.ae8b0c43-yujie.liu@intel.com/
>> Suggested-by: Lukas Wunner <lukas@wunner.de>
>> Signed-off-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
>> ---
>
> Sorry for the latency. I've moved home office to a new location,
> which has caused ~2 week lag. Unfortunate timing.
>
No prob :)
>> drivers/char/tpm/tpm_tis_core.c | 93 ++++++++++++++++++++++++++++-----
>> drivers/char/tpm/tpm_tis_core.h | 4 ++
>> 2 files changed, 85 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
>> index 558144fa707a..7ae8228e803f 100644
>> --- a/drivers/char/tpm/tpm_tis_core.c
>> +++ b/drivers/char/tpm/tpm_tis_core.c
>> @@ -468,25 +468,32 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
>> return rc;
>> }
>>
>> +static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
>> +{
>> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
>> + u32 intmask = 0;
>> +
>> + tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
>> + intmask &= ~TPM_GLOBAL_INT_ENABLE;
>> +
>> + tpm_tis_request_locality(chip, 0);
>> + tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
>> + tpm_tis_relinquish_locality(chip, 0);
>> +
>> + chip->flags &= ~TPM_CHIP_FLAG_IRQ;
>> +}
>> +
>> static void disable_interrupts(struct tpm_chip *chip)
>
> Add tpm_ prefix here too. It makes tracing/grepping/etc so much nicer.
Ok.
>
>> {
>> struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
>> - u32 intmask;
>
> int_mask is more readable
Ok.
>
>> - int rc;
>>
>> if (priv->irq == 0)
>> return;
>>
>> - rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
>> - if (rc < 0)
>> - intmask = 0;
>> -
>> - intmask &= ~TPM_GLOBAL_INT_ENABLE;
>> - rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
>> + __tpm_tis_disable_interrupts(chip);
>>
>> devm_free_irq(chip->dev.parent, priv->irq, chip);
>> priv->irq = 0;
>> - chip->flags &= ~TPM_CHIP_FLAG_IRQ;
>> }
>>
>> /*
>> @@ -752,6 +759,53 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
>> return status == TPM_STS_COMMAND_READY;
>> }
>>
>> +static void tpm_tis_reenable_polling(struct tpm_chip *chip)
>> +{
>> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
>> +
>> + dev_warn(&chip->dev, FW_BUG
>> + "TPM interrupt storm detected, polling instead\n");
>> +
>> + __tpm_tis_disable_interrupts(chip);
>> +
>> + /*
>> + * devm_free_irq() must not be called from within the interrupt handler,
>> + * since this function waits for running handlers to finish and thus it
>> + * would deadlock. Instead trigger a worker that takes care of the
>> + * unregistration.
>> + */
>> + schedule_work(&priv->free_irq_work);
>> +}
>> +
>> +static irqreturn_t tpm_tis_check_for_interrupt_storm(struct tpm_chip *chip)
>> +{
>> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
>> + const unsigned int MAX_UNHANDLED_IRQS = 1000;
>
> Please declare this in the beginning of file because it is non-empirical
> tuning parameter. I do not want it to be buried here. It is now as good
> as a magic number.
>
> Or perhaps even tpm_tis_core.h?
>
For now that constant is only used in tpm_tis_core.c. So I would favor to define it there.
> Why MAX_UNHANDLED_IRQS is exactly 1000 and not 1? I would rollback eagerly.
Because the IRQ line may be shared with another device which has raised the
interrupt instead of the TPM. So unhandled interrupts may be legit.
Regards,
Lino
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v2] tpm,tpm_tis: Handle interrupt storm
2023-06-09 16:03 ` Lino Sanfilippo
@ 2023-06-09 16:10 ` Jarkko Sakkinen
0 siblings, 0 replies; 5+ messages in thread
From: Jarkko Sakkinen @ 2023-06-09 16:10 UTC (permalink / raw)
To: Lino Sanfilippo, Lino Sanfilippo, peterhuewe, jgg
Cc: jsnitsel, hdegoede, oe-lkp, lkp, peter.ujfalusi, peterz, linux,
linux-integrity, linux-kernel, lukas, p.rosenberger,
kernel test robot
On Fri Jun 9, 2023 at 7:03 PM EEST, Lino Sanfilippo wrote:
>
> Hi,
>
> On 09.06.23 16:33, Jarkko Sakkinen wrote:
>
> >
> > On Tue May 30, 2023 at 8:47 PM EEST, Lino Sanfilippo wrote:
> >> From: Lino Sanfilippo <l.sanfilippo@kunbus.com>
> >>
> >> After activation of interrupts for TPM TIS drivers 0-day reports an
> >> interrupt storm on an Inspur NF5180M6/NF5180M6 server.
> >>
> >> Fix this by detecting the storm and falling back to polling:
> >> Count the number of unhandled interrupts within a 10 ms time interval. In
> >> case that more than 1000 were unhandled deactivate interrupts entirely,
> >> deregister the handler and use polling instead.
> >>
> >> The storm detection logic equals the implementation in note_interrupt()
> >> which uses timestamps and counters stored in struct irq_desc. Since this
> >> structure is private to the generic interrupt core the TPM TIS core uses
> >> its own timestamps and counters. Furthermore the TPM interrupt handler
> >> always returns IRQ_HANDLED to prevent the generic interrupt core from
> >> processing the interrupt storm.
> >>
> >> Since the interrupt deregistration function devm_free_irq() waits for all
> >> interrupt handlers to finish, only trigger a worker in the interrupt
> >> handler and do the unregistration in the worker to avoid a deadlock.
> >>
> >> Reported-by: kernel test robot <yujie.liu@intel.com>
> >> Closes: https://lore.kernel.org/oe-lkp/202305041325.ae8b0c43-yujie.liu@intel.com/
> >> Suggested-by: Lukas Wunner <lukas@wunner.de>
> >> Signed-off-by: Lino Sanfilippo <l.sanfilippo@kunbus.com>
> >> ---
> >
> > Sorry for the latency. I've moved home office to a new location,
> > which has caused ~2 week lag. Unfortunate timing.
> >
>
>
> No prob :)
>
>
> >> drivers/char/tpm/tpm_tis_core.c | 93 ++++++++++++++++++++++++++++-----
> >> drivers/char/tpm/tpm_tis_core.h | 4 ++
> >> 2 files changed, 85 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
> >> index 558144fa707a..7ae8228e803f 100644
> >> --- a/drivers/char/tpm/tpm_tis_core.c
> >> +++ b/drivers/char/tpm/tpm_tis_core.c
> >> @@ -468,25 +468,32 @@ static int tpm_tis_send_data(struct tpm_chip *chip, const u8 *buf, size_t len)
> >> return rc;
> >> }
> >>
> >> +static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
> >> +{
> >> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> >> + u32 intmask = 0;
> >> +
> >> + tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
> >> + intmask &= ~TPM_GLOBAL_INT_ENABLE;
> >> +
> >> + tpm_tis_request_locality(chip, 0);
> >> + tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
> >> + tpm_tis_relinquish_locality(chip, 0);
> >> +
> >> + chip->flags &= ~TPM_CHIP_FLAG_IRQ;
> >> +}
> >> +
> >> static void disable_interrupts(struct tpm_chip *chip)
> >
> > Add tpm_ prefix here too. It makes tracing/grepping/etc so much nicer.
>
> Ok.
>
> >
> >> {
> >> struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> >> - u32 intmask;
> >
> > int_mask is more readable
>
> Ok.
>
> >
> >> - int rc;
> >>
> >> if (priv->irq == 0)
> >> return;
> >>
> >> - rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
> >> - if (rc < 0)
> >> - intmask = 0;
> >> -
> >> - intmask &= ~TPM_GLOBAL_INT_ENABLE;
> >> - rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
> >> + __tpm_tis_disable_interrupts(chip);
> >>
> >> devm_free_irq(chip->dev.parent, priv->irq, chip);
> >> priv->irq = 0;
> >> - chip->flags &= ~TPM_CHIP_FLAG_IRQ;
> >> }
> >>
> >> /*
> >> @@ -752,6 +759,53 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
> >> return status == TPM_STS_COMMAND_READY;
> >> }
> >>
> >> +static void tpm_tis_reenable_polling(struct tpm_chip *chip)
> >> +{
> >> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> >> +
> >> + dev_warn(&chip->dev, FW_BUG
> >> + "TPM interrupt storm detected, polling instead\n");
> >> +
> >> + __tpm_tis_disable_interrupts(chip);
> >> +
> >> + /*
> >> + * devm_free_irq() must not be called from within the interrupt handler,
> >> + * since this function waits for running handlers to finish and thus it
> >> + * would deadlock. Instead trigger a worker that takes care of the
> >> + * unregistration.
> >> + */
> >> + schedule_work(&priv->free_irq_work);
> >> +}
> >> +
> >> +static irqreturn_t tpm_tis_check_for_interrupt_storm(struct tpm_chip *chip)
> >> +{
> >> + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
> >> + const unsigned int MAX_UNHANDLED_IRQS = 1000;
> >
> > Please declare this in the beginning of file because it is non-empirical
> > tuning parameter. I do not want it to be buried here. It is now as good
> > as a magic number.
> >
> > Or perhaps even tpm_tis_core.h?
> >
>
> For now that constant is only used in tpm_tis_core.c. So I would favor to define it there.
>
> > Why MAX_UNHANDLED_IRQS is exactly 1000 and not 1? I would rollback eagerly.
>
>
> Because the IRQ line may be shared with another device which has raised the
> interrupt instead of the TPM. So unhandled interrupts may be legit.
I understand that being exact here is impossible. So let's stick to this
but please move the constant to the tpm_tis_core.c with the TPM_ prefix
because it is an essential tuning parameter.
BR, Jarkko
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2023-06-09 16:10 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-30 17:47 [PATCH v2] tpm,tpm_tis: Handle interrupt storm Lino Sanfilippo
2023-06-09 14:33 ` Jarkko Sakkinen
2023-06-09 14:34 ` Jarkko Sakkinen
2023-06-09 16:03 ` Lino Sanfilippo
2023-06-09 16:10 ` Jarkko Sakkinen
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.