linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH 16/39] irqdomain: Make normal and nomap irqdomains exclusive
       [not found] ` <20210520163751.27325-17-maz@kernel.org>
@ 2021-11-15 19:05   ` Cédric Le Goater
  2021-11-16  8:33     ` Marc Zyngier
  0 siblings, 1 reply; 3+ messages in thread
From: Cédric Le Goater @ 2021-11-15 19:05 UTC (permalink / raw)
  To: Marc Zyngier, linux-kernel; +Cc: Thomas Gleixner, PowerPC, Greg Kurz

Hello Mark,

On 5/20/21 18:37, Marc Zyngier wrote:
> Direct mappings are completely exclusive of normal mappings, meaning
> that we can refactor the code slightly so that we can get rid of
> the revmap_direct_max_irq field and use the revmap_size field
> instead, reducing the size of the irqdomain structure.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>


This patch is breaking the POWER9/POWER10 XIVE driver (these are not
old PPC systems :) on machines sharing the same LSI HW IRQ. For instance,
a linux KVM guest with a virtio-rng and a virtio-balloon device. In that
case, Linux creates two distinct IRQ mappings which can lead to some
unexpected behavior.

A fix to go forward would be to change the XIVE IRQ domain to use a
'Tree' domain for reverse mapping and not the 'No Map' domain mapping.
I will keep you updated for XIVE.

Thanks,

C.


> ---
>   include/linux/irqdomain.h |  6 +++---
>   kernel/irq/irqdomain.c    | 45 ++++++++++++++++++++++++++++++---------
>   2 files changed, 38 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
> index 723495ec5a2f..0916cf9c6e20 100644
> --- a/include/linux/irqdomain.h
> +++ b/include/linux/irqdomain.h
> @@ -149,8 +149,6 @@ struct irq_domain_chip_generic;
>    * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
>    *
>    * Revmap data, used internally by irq_domain
> - * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
> - *                         support direct mapping
>    * @revmap_size: Size of the linear map table @revmap[]
>    * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
>    * @revmap: Linear table of hwirq->virq reverse mappings
> @@ -173,7 +171,6 @@ struct irq_domain {
>   
>   	/* reverse map data. The linear map gets appended to the irq_domain */
>   	irq_hw_number_t hwirq_max;
> -	unsigned int revmap_direct_max_irq;
>   	unsigned int revmap_size;
>   	struct radix_tree_root revmap_tree;
>   	struct mutex revmap_tree_mutex;
> @@ -207,6 +204,9 @@ enum {
>   	 */
>   	IRQ_DOMAIN_MSI_NOMASK_QUIRK	= (1 << 6),
>   
> +	/* Irq domain doesn't translate anything */
> +	IRQ_DOMAIN_FLAG_NO_MAP		= (1 << 7),
> +
>   	/*
>   	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
>   	 * for implementation specific purposes and ignored by the
> diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
> index e0143e640683..fa94c86e47d4 100644
> --- a/kernel/irq/irqdomain.c
> +++ b/kernel/irq/irqdomain.c
> @@ -146,6 +146,10 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
>   
>   	static atomic_t unknown_domains;
>   
> +	if (WARN_ON((size && direct_max) ||
> +		    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
> +		return NULL;
> +
>   	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
>   			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
>   	if (!domain)
> @@ -213,8 +217,14 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
>   	domain->ops = ops;
>   	domain->host_data = host_data;
>   	domain->hwirq_max = hwirq_max;
> +
> +	if (direct_max) {
> +		size = direct_max;
> +		domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
> +	}
> +
>   	domain->revmap_size = size;
> -	domain->revmap_direct_max_irq = direct_max;
> +
>   	irq_domain_check_hierarchy(domain);
>   
>   	mutex_lock(&irq_domain_mutex);
> @@ -482,9 +492,18 @@ struct irq_domain *irq_get_default_host(void)
>   	return irq_default_domain;
>   }
>   
> +static bool irq_domain_is_nomap(struct irq_domain *domain)
> +{
> +	return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) &&
> +	       (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP);
> +}
> +
>   static void irq_domain_clear_mapping(struct irq_domain *domain,
>   				     irq_hw_number_t hwirq)
>   {
> +	if (irq_domain_is_nomap(domain))
> +		return;
> +
>   	if (hwirq < domain->revmap_size) {
>   		domain->revmap[hwirq] = 0;
>   	} else {
> @@ -498,6 +517,9 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
>   				   irq_hw_number_t hwirq,
>   				   struct irq_data *irq_data)
>   {
> +	if (irq_domain_is_nomap(domain))
> +		return;
> +
>   	if (hwirq < domain->revmap_size) {
>   		domain->revmap[hwirq] = irq_data->irq;
>   	} else {
> @@ -629,9 +651,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
>   		pr_debug("create_direct virq allocation failed\n");
>   		return 0;
>   	}
> -	if (virq >= domain->revmap_direct_max_irq) {
> +	if (virq >= domain->revmap_size) {
>   		pr_err("ERROR: no free irqs available below %i maximum\n",
> -			domain->revmap_direct_max_irq);
> +			domain->revmap_size);
>   		irq_free_desc(virq);
>   		return 0;
>   	}
> @@ -879,10 +901,14 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
>   	if (domain == NULL)
>   		return 0;
>   
> -	if (hwirq < domain->revmap_direct_max_irq) {
> -		data = irq_domain_get_irq_data(domain, hwirq);
> -		if (data && data->hwirq == hwirq)
> -			return hwirq;
> +	if (irq_domain_is_nomap(domain)) {
> +		if (hwirq < domain->revmap_size) {
> +			data = irq_domain_get_irq_data(domain, hwirq);
> +			if (data && data->hwirq == hwirq)
> +				return hwirq;
> +		}
> +
> +		return 0;
>   	}
>   
>   	/* Check if the hwirq is in the linear revmap. */
> @@ -1470,7 +1496,7 @@ static void irq_domain_fix_revmap(struct irq_data *d)
>   {
>   	void __rcu **slot;
>   
> -	if (d->hwirq < d->domain->revmap_size)
> +	if (irq_domain_is_nomap(d->domain) || d->hwirq < d->domain->revmap_size)
>   		return; /* Not using radix tree. */
>   
>   	/* Fix up the revmap. */
> @@ -1830,8 +1856,7 @@ static void
>   irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
>   {
>   	seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
> -	seq_printf(m, "%*ssize:   %u\n", ind + 1, "",
> -		   d->revmap_size + d->revmap_direct_max_irq);
> +	seq_printf(m, "%*ssize:   %u\n", ind + 1, "", d->revmap_size);
>   	seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
>   	seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
>   	if (d->ops && d->ops->debug_show)
> 


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 16/39] irqdomain: Make normal and nomap irqdomains exclusive
  2021-11-15 19:05   ` [PATCH 16/39] irqdomain: Make normal and nomap irqdomains exclusive Cédric Le Goater
@ 2021-11-16  8:33     ` Marc Zyngier
  2021-11-16 10:27       ` Cédric Le Goater
  0 siblings, 1 reply; 3+ messages in thread
From: Marc Zyngier @ 2021-11-16  8:33 UTC (permalink / raw)
  To: Cédric Le Goater; +Cc: Thomas Gleixner, PowerPC, linux-kernel, Greg Kurz

On Mon, 15 Nov 2021 19:05:17 +0000,
Cédric Le Goater <clg@kaod.org> wrote:
> 
> Hello Mark,

s/k/c/

> 
> On 5/20/21 18:37, Marc Zyngier wrote:
> > Direct mappings are completely exclusive of normal mappings, meaning
> > that we can refactor the code slightly so that we can get rid of
> > the revmap_direct_max_irq field and use the revmap_size field
> > instead, reducing the size of the irqdomain structure.
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> 
> 
> This patch is breaking the POWER9/POWER10 XIVE driver (these are not
> old PPC systems :) on machines sharing the same LSI HW IRQ. For instance,
> a linux KVM guest with a virtio-rng and a virtio-balloon device. In that
> case, Linux creates two distinct IRQ mappings which can lead to some
> unexpected behavior.

Either the irq domain translates, or it doesn't. If the driver creates
a nomap domain, and yet expects some sort of translation to happen,
then the driver is fundamentally broken. And even without that: how do
you end-up with a single HW interrupt having two mappings?

> A fix to go forward would be to change the XIVE IRQ domain to use a
> 'Tree' domain for reverse mapping and not the 'No Map' domain mapping.
> I will keep you updated for XIVE.

I bet there is a bit more to it. From what you are saying above,
something rather ungodly is happening in the XIVE code.

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH 16/39] irqdomain: Make normal and nomap irqdomains exclusive
  2021-11-16  8:33     ` Marc Zyngier
@ 2021-11-16 10:27       ` Cédric Le Goater
  0 siblings, 0 replies; 3+ messages in thread
From: Cédric Le Goater @ 2021-11-16 10:27 UTC (permalink / raw)
  To: Marc Zyngier; +Cc: Thomas Gleixner, PowerPC, linux-kernel, Greg Kurz

Hello Marc,

>> This patch is breaking the POWER9/POWER10 XIVE driver (these are not
>> old PPC systems :) on machines sharing the same LSI HW IRQ. For instance,
>> a linux KVM guest with a virtio-rng and a virtio-balloon device. In that
>> case, Linux creates two distinct IRQ mappings which can lead to some
>> unexpected behavior.
> 
> Either the irq domain translates, or it doesn't. If the driver creates
> a nomap domain, and yet expects some sort of translation to happen,
> then the driver is fundamentally broken. And even without that: how do
> you end-up with a single HW interrupt having two mappings?
> 
>> A fix to go forward would be to change the XIVE IRQ domain to use a
>> 'Tree' domain for reverse mapping and not the 'No Map' domain mapping.
>> I will keep you updated for XIVE.
> 
> I bet there is a bit more to it. From what you are saying above,
> something rather ungodly is happening in the XIVE code.

It's making progress.

This change in irq_find_mapping() is what 'breaks' XIVE :

   +       if (irq_domain_is_nomap(domain)) {
   +               if (hwirq < domain->revmap_size) {
   +                      data = irq_domain_get_irq_data(domain, hwirq);
   +                      if (data && data->hwirq == hwirq)
   +                               return hwirq;
   +               }
   +
   +               return 0;


With the introduction of IRQ_DOMAIN_FLAG_NO_MAP, the revmap_tree lookup
is skipped and the previously mapped IRQ is not found. XIVE was relying
on a side effect of irq_domain_set_mapping() which is not true anymore.

I guess the easiest fix for 5.14 and 5.15 (in which was introduced MSI
domains) is to change the XIVE IRQ domain to a domain tree. Since the HW
can handle 1MB interrupts, this looks like a better choice for the driver.

Thanks,

C.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-11-16 10:27 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20210520163751.27325-1-maz@kernel.org>
     [not found] ` <20210520163751.27325-17-maz@kernel.org>
2021-11-15 19:05   ` [PATCH 16/39] irqdomain: Make normal and nomap irqdomains exclusive Cédric Le Goater
2021-11-16  8:33     ` Marc Zyngier
2021-11-16 10:27       ` Cédric Le Goater

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).