All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
@ 2016-08-11 14:02 Alexander Popov
  2016-08-11 16:44 ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Popov @ 2016-08-11 14:02 UTC (permalink / raw)
  To: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Paolo Bonzini, Radim Krcmar,
	Joerg Roedel, Alexander Popov, linux-kernel, x86, kvm

Introduce paravirq irq_domain for interrupts injected by the hypervisor
using Intel VT-x technology. Paravirq irq_domain allows registering
IRQ handlers for these interrupts and avoid emulating an MSI-capable
PCI device by the hypervisor.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
---
 arch/x86/Kconfig                 |   8 +++
 arch/x86/include/asm/irqdomain.h |   6 ++
 arch/x86/include/asm/paravirq.h  |   9 +++
 arch/x86/kernel/apic/Makefile    |   2 +
 arch/x86/kernel/apic/paravirq.c  | 128 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/apic/vector.c    |   1 +
 6 files changed, 154 insertions(+)
 create mode 100644 arch/x86/include/asm/paravirq.h
 create mode 100644 arch/x86/kernel/apic/paravirq.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5c6e747..209bd88 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -760,6 +760,14 @@ config PARAVIRT_TIME_ACCOUNTING
 
 	  If in doubt, say N here.
 
+config X86_PARAVIRQ
+	bool "Enable paravirq irq_domain"
+	depends on PARAVIRT && X86_LOCAL_APIC
+	default n
+	---help---
+	  This option enables paravirq irq_domain for interrupts injected
+	  by the hypervisor using Intel VT-x technology.
+
 config PARAVIRT_CLOCK
 	bool
 
diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h
index d26075b..e3192f6 100644
--- a/arch/x86/include/asm/irqdomain.h
+++ b/arch/x86/include/asm/irqdomain.h
@@ -60,4 +60,10 @@ extern void arch_init_htirq_domain(struct irq_domain *domain);
 static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
 #endif
 
+#ifdef CONFIG_X86_PARAVIRQ
+extern void arch_init_paravirq_domain(struct irq_domain *domain);
+#else
+static inline void arch_init_paravirq_domain(struct irq_domain *domain) { }
+#endif
+
 #endif
diff --git a/arch/x86/include/asm/paravirq.h b/arch/x86/include/asm/paravirq.h
new file mode 100644
index 0000000..a137de2
--- /dev/null
+++ b/arch/x86/include/asm/paravirq.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_PARAVIRQ_H
+#define _ASM_X86_PARAVIRQ_H
+
+int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
+				void (*irq_unmask)(struct irq_data *data));
+int paravirq_alloc_irq(void);
+void paravirq_free_irq(unsigned int irq);
+
+#endif /* _ASM_X86_PARAVIRQ_H */
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8e63ebd..84f9ce0 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -28,3 +28,5 @@ obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
 
 # For 32bit, probe_32 need to be listed last
 obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
+
+obj-$(CONFIG_X86_PARAVIRQ)	+= paravirq.o
diff --git a/arch/x86/kernel/apic/paravirq.c b/arch/x86/kernel/apic/paravirq.c
new file mode 100644
index 0000000..430b819
--- /dev/null
+++ b/arch/x86/kernel/apic/paravirq.c
@@ -0,0 +1,128 @@
+/*
+ * An irq_domain for interrupts injected by the hypervisor using
+ * Intel VT-x technology.
+ *
+ * Copyright (C) 2016 Alexander Popov <alex.popov@linux.com>.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/irqdomain.h>
+#include <asm/paravirq.h>
+
+static struct irq_domain *paravirq_domain;
+
+static struct irq_chip paravirq_chip = {
+	.name			= "PARAVIRQ",
+	.irq_ack		= irq_chip_ack_parent,
+};
+
+static int paravirq_domain_alloc(struct irq_domain *domain,
+			unsigned int virq, unsigned int nr_irqs, void *arg)
+{
+	int ret = 0;
+
+	BUG_ON(domain != paravirq_domain);
+
+	if (nr_irqs != 1)
+		return -EINVAL;
+
+	ret = irq_domain_set_hwirq_and_chip(paravirq_domain,
+					virq, virq, &paravirq_chip, NULL);
+	if (ret) {
+		pr_warn("setting chip, hwirq for irq %u failed\n", virq);
+		return ret;
+	}
+
+	__irq_set_handler(virq, handle_edge_irq, 0, "edge");
+
+	return 0;
+}
+
+static void paravirq_domain_free(struct irq_domain *domain,
+					unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irq_data;
+
+	BUG_ON(domain != paravirq_domain);
+	BUG_ON(nr_irqs != 1);
+
+	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
+	if (irq_data)
+		irq_domain_reset_irq_data(irq_data);
+	else
+		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
+}
+
+static const struct irq_domain_ops paravirq_domain_ops = {
+	.alloc	= paravirq_domain_alloc,
+	.free	= paravirq_domain_free,
+};
+
+int paravirq_alloc_irq(void)
+{
+	struct irq_alloc_info info;
+
+	if (!paravirq_domain)
+		return -ENODEV;
+
+	if (!paravirq_chip.irq_mask || !paravirq_chip.irq_unmask)
+		return -EINVAL;
+
+	init_irq_alloc_info(&info, NULL);
+
+	return irq_domain_alloc_irqs(paravirq_domain, 1, NUMA_NO_NODE, &info);
+}
+EXPORT_SYMBOL(paravirq_alloc_irq);
+
+void paravirq_free_irq(unsigned int virq)
+{
+	struct irq_data *irq_data;
+
+	if (!paravirq_domain) {
+		pr_warn("paravirq irq_domain is not initialized\n");
+		return;
+	}
+
+	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
+	if (irq_data)
+		irq_domain_free_irqs(virq, 1);
+	else
+		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
+}
+EXPORT_SYMBOL(paravirq_free_irq);
+
+int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
+				void (*irq_unmask)(struct irq_data *data))
+{
+	if (!paravirq_domain)
+		return -ENODEV;
+
+	if (paravirq_chip.irq_mask || paravirq_chip.irq_unmask)
+		return -EEXIST;
+
+	if (!irq_mask || !irq_unmask)
+		return -EINVAL;
+
+	paravirq_chip.irq_mask = irq_mask;
+	paravirq_chip.irq_unmask = irq_unmask;
+
+	return 0;
+}
+EXPORT_SYMBOL(paravirq_init_chip);
+
+void arch_init_paravirq_domain(struct irq_domain *parent)
+{
+	paravirq_domain = irq_domain_add_tree(NULL, &paravirq_domain_ops, NULL);
+	if (!paravirq_domain) {
+		pr_warn("failed to initialize paravirq irq_domain\n");
+		return;
+	}
+
+	paravirq_domain->name = paravirq_chip.name;
+	paravirq_domain->parent = parent;
+	paravirq_domain->flags |= IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
+}
+
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6066d94..878b440 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -438,6 +438,7 @@ int __init arch_early_irq_init(void)
 
 	arch_init_msi_domain(x86_vector_domain);
 	arch_init_htirq_domain(x86_vector_domain);
+	arch_init_paravirq_domain(x86_vector_domain);
 
 	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-11 14:02 [PATCH 1/1] x86/apic: Introduce paravirq irq_domain Alexander Popov
@ 2016-08-11 16:44 ` Paolo Bonzini
  2016-08-12 10:56   ` Alexander Popov
  0 siblings, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2016-08-11 16:44 UTC (permalink / raw)
  To: Alexander Popov
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm


> Introduce paravirq irq_domain for interrupts injected by the hypervisor
> using Intel VT-x technology. Paravirq irq_domain allows registering
> IRQ handlers for these interrupts and avoid emulating an MSI-capable
> PCI device by the hypervisor.

What hypervisor are you implementing this on? (And why is this useful?
That is, what kind of device would register an interrupt in this domain?)

This isn't really paravirt since it's just your usual LAPIC interrupt; it's
either some paravirt *bus* that you're implementing, or some MSI-capable
platform device (in which case I'm not sure what the irq_mask and
irq_unmask function arguments to paravirq_init_chip would do).  It's
not clear, and without seeing a user of paravirq_init_chip it's not
really possible to review what you are doing.

Thanks,

Paolo

> Signed-off-by: Alexander Popov <alex.popov@linux.com>
> ---
>  arch/x86/Kconfig                 |   8 +++
>  arch/x86/include/asm/irqdomain.h |   6 ++
>  arch/x86/include/asm/paravirq.h  |   9 +++
>  arch/x86/kernel/apic/Makefile    |   2 +
>  arch/x86/kernel/apic/paravirq.c  | 128
>  +++++++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/apic/vector.c    |   1 +
>  6 files changed, 154 insertions(+)
>  create mode 100644 arch/x86/include/asm/paravirq.h
>  create mode 100644 arch/x86/kernel/apic/paravirq.c
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 5c6e747..209bd88 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -760,6 +760,14 @@ config PARAVIRT_TIME_ACCOUNTING
>  
>  	  If in doubt, say N here.
>  
> +config X86_PARAVIRQ
> +	bool "Enable paravirq irq_domain"
> +	depends on PARAVIRT && X86_LOCAL_APIC
> +	default n
> +	---help---
> +	  This option enables paravirq irq_domain for interrupts injected
> +	  by the hypervisor using Intel VT-x technology.
> +
>  config PARAVIRT_CLOCK
>  	bool
>  
> diff --git a/arch/x86/include/asm/irqdomain.h
> b/arch/x86/include/asm/irqdomain.h
> index d26075b..e3192f6 100644
> --- a/arch/x86/include/asm/irqdomain.h
> +++ b/arch/x86/include/asm/irqdomain.h
> @@ -60,4 +60,10 @@ extern void arch_init_htirq_domain(struct irq_domain
> *domain);
>  static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
>  #endif
>  
> +#ifdef CONFIG_X86_PARAVIRQ
> +extern void arch_init_paravirq_domain(struct irq_domain *domain);
> +#else
> +static inline void arch_init_paravirq_domain(struct irq_domain *domain) { }
> +#endif
> +
>  #endif
> diff --git a/arch/x86/include/asm/paravirq.h
> b/arch/x86/include/asm/paravirq.h
> new file mode 100644
> index 0000000..a137de2
> --- /dev/null
> +++ b/arch/x86/include/asm/paravirq.h
> @@ -0,0 +1,9 @@
> +#ifndef _ASM_X86_PARAVIRQ_H
> +#define _ASM_X86_PARAVIRQ_H
> +
> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
> +				void (*irq_unmask)(struct irq_data *data));
> +int paravirq_alloc_irq(void);
> +void paravirq_free_irq(unsigned int irq);
> +
> +#endif /* _ASM_X86_PARAVIRQ_H */
> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
> index 8e63ebd..84f9ce0 100644
> --- a/arch/x86/kernel/apic/Makefile
> +++ b/arch/x86/kernel/apic/Makefile
> @@ -28,3 +28,5 @@ obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
>  
>  # For 32bit, probe_32 need to be listed last
>  obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
> +
> +obj-$(CONFIG_X86_PARAVIRQ)	+= paravirq.o
> diff --git a/arch/x86/kernel/apic/paravirq.c
> b/arch/x86/kernel/apic/paravirq.c
> new file mode 100644
> index 0000000..430b819
> --- /dev/null
> +++ b/arch/x86/kernel/apic/paravirq.c
> @@ -0,0 +1,128 @@
> +/*
> + * An irq_domain for interrupts injected by the hypervisor using
> + * Intel VT-x technology.
> + *
> + * Copyright (C) 2016 Alexander Popov <alex.popov@linux.com>.
> + *
> + * This file is released under the GPLv2.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/irq.h>
> +#include <asm/irqdomain.h>
> +#include <asm/paravirq.h>
> +
> +static struct irq_domain *paravirq_domain;
> +
> +static struct irq_chip paravirq_chip = {
> +	.name			= "PARAVIRQ",
> +	.irq_ack		= irq_chip_ack_parent,
> +};
> +
> +static int paravirq_domain_alloc(struct irq_domain *domain,
> +			unsigned int virq, unsigned int nr_irqs, void *arg)
> +{
> +	int ret = 0;
> +
> +	BUG_ON(domain != paravirq_domain);
> +
> +	if (nr_irqs != 1)
> +		return -EINVAL;
> +
> +	ret = irq_domain_set_hwirq_and_chip(paravirq_domain,
> +					virq, virq, &paravirq_chip, NULL);
> +	if (ret) {
> +		pr_warn("setting chip, hwirq for irq %u failed\n", virq);
> +		return ret;
> +	}
> +
> +	__irq_set_handler(virq, handle_edge_irq, 0, "edge");
> +
> +	return 0;
> +}
> +
> +static void paravirq_domain_free(struct irq_domain *domain,
> +					unsigned int virq, unsigned int nr_irqs)
> +{
> +	struct irq_data *irq_data;
> +
> +	BUG_ON(domain != paravirq_domain);
> +	BUG_ON(nr_irqs != 1);
> +
> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
> +	if (irq_data)
> +		irq_domain_reset_irq_data(irq_data);
> +	else
> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
> +}
> +
> +static const struct irq_domain_ops paravirq_domain_ops = {
> +	.alloc	= paravirq_domain_alloc,
> +	.free	= paravirq_domain_free,
> +};
> +
> +int paravirq_alloc_irq(void)
> +{
> +	struct irq_alloc_info info;
> +
> +	if (!paravirq_domain)
> +		return -ENODEV;
> +
> +	if (!paravirq_chip.irq_mask || !paravirq_chip.irq_unmask)
> +		return -EINVAL;
> +
> +	init_irq_alloc_info(&info, NULL);
> +
> +	return irq_domain_alloc_irqs(paravirq_domain, 1, NUMA_NO_NODE, &info);
> +}
> +EXPORT_SYMBOL(paravirq_alloc_irq);
> +
> +void paravirq_free_irq(unsigned int virq)
> +{
> +	struct irq_data *irq_data;
> +
> +	if (!paravirq_domain) {
> +		pr_warn("paravirq irq_domain is not initialized\n");
> +		return;
> +	}
> +
> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
> +	if (irq_data)
> +		irq_domain_free_irqs(virq, 1);
> +	else
> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
> +}
> +EXPORT_SYMBOL(paravirq_free_irq);
> +
> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
> +				void (*irq_unmask)(struct irq_data *data))
> +{
> +	if (!paravirq_domain)
> +		return -ENODEV;
> +
> +	if (paravirq_chip.irq_mask || paravirq_chip.irq_unmask)
> +		return -EEXIST;
> +
> +	if (!irq_mask || !irq_unmask)
> +		return -EINVAL;
> +
> +	paravirq_chip.irq_mask = irq_mask;
> +	paravirq_chip.irq_unmask = irq_unmask;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(paravirq_init_chip);
> +
> +void arch_init_paravirq_domain(struct irq_domain *parent)
> +{
> +	paravirq_domain = irq_domain_add_tree(NULL, &paravirq_domain_ops, NULL);
> +	if (!paravirq_domain) {
> +		pr_warn("failed to initialize paravirq irq_domain\n");
> +		return;
> +	}
> +
> +	paravirq_domain->name = paravirq_chip.name;
> +	paravirq_domain->parent = parent;
> +	paravirq_domain->flags |= IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
> +}
> +
> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
> index 6066d94..878b440 100644
> --- a/arch/x86/kernel/apic/vector.c
> +++ b/arch/x86/kernel/apic/vector.c
> @@ -438,6 +438,7 @@ int __init arch_early_irq_init(void)
>  
>  	arch_init_msi_domain(x86_vector_domain);
>  	arch_init_htirq_domain(x86_vector_domain);
> +	arch_init_paravirq_domain(x86_vector_domain);
>  
>  	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
>  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
> --
> 2.5.5
> 
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-11 16:44 ` Paolo Bonzini
@ 2016-08-12 10:56   ` Alexander Popov
  2016-08-12 11:43     ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Popov @ 2016-08-12 10:56 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 11.08.2016 19:44, Paolo Bonzini wrote:
> 
>> Introduce paravirq irq_domain for interrupts injected by the hypervisor
>> using Intel VT-x technology. Paravirq irq_domain allows registering
>> IRQ handlers for these interrupts and avoid emulating an MSI-capable
>> PCI device by the hypervisor.

Hello, Paolo,

Thanks for your answer.

> What hypervisor are you implementing this on? (And why is this useful?
> That is, what kind of device would register an interrupt in this domain?)

I've implemented paravirq for the hypervisor developed by Positive Technologies
(http://www.ptsecurity.com/). It is a lightweight bare-metal hypervisor
based on "Blue Pill" concept (introduced by Joanna Rutkowska). It will be
published as open source. The main idea is to make this hypervisor small
and avoid as much emulation as possible.

> This isn't really paravirt since it's just your usual LAPIC interrupt; it's
> either some paravirt *bus* that you're implementing, or some MSI-capable
> platform device (in which case I'm not sure what the irq_mask and
> irq_unmask function arguments to paravirq_init_chip would do).  It's
> not clear, and without seeing a user of paravirq_init_chip it's not
> really possible to review what you are doing.

Maybe the name "paravirq" is not very good, I'll try to describe the idea.

There is some kernel module for special interactions between guest VMs.
Currently it has to register a MSI-capable PCI device to handle interrupts
injected by the hypervisor. And the bare-metal hypervisor has to emulate
such a device for guest VMs.

So I've implemented paravirq irq_domain to avoid this redundant emulation.
With it we can just call:
 - paravirq_alloc_irq() to allocate a LAPIC irq;
 - request_irq() for it;
 - irqd_cfg(irq_get_irq_data()) to get the corresponding interrupt vector
and inform the hypervisor about it.
Now we happily handle the irq from the hypervisor when it injects this vector.

The irq_mask/irq_unmask parameters of paravirq_init_chip() are the pointers
to the functions from the interaction module which ask the hypervisor to
start/stop injecting interrupts to the guest VM.

Paravirq irq_domain allows to avoid the PCI device emulation in the hypervisor
and provides the ability to run slimmer Linux guests without precompiled
PCI and MSI support.

Did I manage to answer your questions?

Please correct me if the idea is wrong or there's a better way to do that.
Thanks.

>> Signed-off-by: Alexander Popov <alex.popov@linux.com>
>> ---
>>  arch/x86/Kconfig                 |   8 +++
>>  arch/x86/include/asm/irqdomain.h |   6 ++
>>  arch/x86/include/asm/paravirq.h  |   9 +++
>>  arch/x86/kernel/apic/Makefile    |   2 +
>>  arch/x86/kernel/apic/paravirq.c  | 128
>>  +++++++++++++++++++++++++++++++++++++++
>>  arch/x86/kernel/apic/vector.c    |   1 +
>>  6 files changed, 154 insertions(+)
>>  create mode 100644 arch/x86/include/asm/paravirq.h
>>  create mode 100644 arch/x86/kernel/apic/paravirq.c
>>
>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>> index 5c6e747..209bd88 100644
>> --- a/arch/x86/Kconfig
>> +++ b/arch/x86/Kconfig
>> @@ -760,6 +760,14 @@ config PARAVIRT_TIME_ACCOUNTING
>>  
>>  	  If in doubt, say N here.
>>  
>> +config X86_PARAVIRQ
>> +	bool "Enable paravirq irq_domain"
>> +	depends on PARAVIRT && X86_LOCAL_APIC
>> +	default n
>> +	---help---
>> +	  This option enables paravirq irq_domain for interrupts injected
>> +	  by the hypervisor using Intel VT-x technology.
>> +
>>  config PARAVIRT_CLOCK
>>  	bool
>>  
>> diff --git a/arch/x86/include/asm/irqdomain.h
>> b/arch/x86/include/asm/irqdomain.h
>> index d26075b..e3192f6 100644
>> --- a/arch/x86/include/asm/irqdomain.h
>> +++ b/arch/x86/include/asm/irqdomain.h
>> @@ -60,4 +60,10 @@ extern void arch_init_htirq_domain(struct irq_domain
>> *domain);
>>  static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
>>  #endif
>>  
>> +#ifdef CONFIG_X86_PARAVIRQ
>> +extern void arch_init_paravirq_domain(struct irq_domain *domain);
>> +#else
>> +static inline void arch_init_paravirq_domain(struct irq_domain *domain) { }
>> +#endif
>> +
>>  #endif
>> diff --git a/arch/x86/include/asm/paravirq.h
>> b/arch/x86/include/asm/paravirq.h
>> new file mode 100644
>> index 0000000..a137de2
>> --- /dev/null
>> +++ b/arch/x86/include/asm/paravirq.h
>> @@ -0,0 +1,9 @@
>> +#ifndef _ASM_X86_PARAVIRQ_H
>> +#define _ASM_X86_PARAVIRQ_H
>> +
>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>> +				void (*irq_unmask)(struct irq_data *data));
>> +int paravirq_alloc_irq(void);
>> +void paravirq_free_irq(unsigned int irq);
>> +
>> +#endif /* _ASM_X86_PARAVIRQ_H */
>> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
>> index 8e63ebd..84f9ce0 100644
>> --- a/arch/x86/kernel/apic/Makefile
>> +++ b/arch/x86/kernel/apic/Makefile
>> @@ -28,3 +28,5 @@ obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
>>  
>>  # For 32bit, probe_32 need to be listed last
>>  obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
>> +
>> +obj-$(CONFIG_X86_PARAVIRQ)	+= paravirq.o
>> diff --git a/arch/x86/kernel/apic/paravirq.c
>> b/arch/x86/kernel/apic/paravirq.c
>> new file mode 100644
>> index 0000000..430b819
>> --- /dev/null
>> +++ b/arch/x86/kernel/apic/paravirq.c
>> @@ -0,0 +1,128 @@
>> +/*
>> + * An irq_domain for interrupts injected by the hypervisor using
>> + * Intel VT-x technology.
>> + *
>> + * Copyright (C) 2016 Alexander Popov <alex.popov@linux.com>.
>> + *
>> + * This file is released under the GPLv2.
>> + */
>> +
>> +#include <linux/init.h>
>> +#include <linux/irq.h>
>> +#include <asm/irqdomain.h>
>> +#include <asm/paravirq.h>
>> +
>> +static struct irq_domain *paravirq_domain;
>> +
>> +static struct irq_chip paravirq_chip = {
>> +	.name			= "PARAVIRQ",
>> +	.irq_ack		= irq_chip_ack_parent,
>> +};
>> +
>> +static int paravirq_domain_alloc(struct irq_domain *domain,
>> +			unsigned int virq, unsigned int nr_irqs, void *arg)
>> +{
>> +	int ret = 0;
>> +
>> +	BUG_ON(domain != paravirq_domain);
>> +
>> +	if (nr_irqs != 1)
>> +		return -EINVAL;
>> +
>> +	ret = irq_domain_set_hwirq_and_chip(paravirq_domain,
>> +					virq, virq, &paravirq_chip, NULL);
>> +	if (ret) {
>> +		pr_warn("setting chip, hwirq for irq %u failed\n", virq);
>> +		return ret;
>> +	}
>> +
>> +	__irq_set_handler(virq, handle_edge_irq, 0, "edge");
>> +
>> +	return 0;
>> +}
>> +
>> +static void paravirq_domain_free(struct irq_domain *domain,
>> +					unsigned int virq, unsigned int nr_irqs)
>> +{
>> +	struct irq_data *irq_data;
>> +
>> +	BUG_ON(domain != paravirq_domain);
>> +	BUG_ON(nr_irqs != 1);
>> +
>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>> +	if (irq_data)
>> +		irq_domain_reset_irq_data(irq_data);
>> +	else
>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>> +}
>> +
>> +static const struct irq_domain_ops paravirq_domain_ops = {
>> +	.alloc	= paravirq_domain_alloc,
>> +	.free	= paravirq_domain_free,
>> +};
>> +
>> +int paravirq_alloc_irq(void)
>> +{
>> +	struct irq_alloc_info info;
>> +
>> +	if (!paravirq_domain)
>> +		return -ENODEV;
>> +
>> +	if (!paravirq_chip.irq_mask || !paravirq_chip.irq_unmask)
>> +		return -EINVAL;
>> +
>> +	init_irq_alloc_info(&info, NULL);
>> +
>> +	return irq_domain_alloc_irqs(paravirq_domain, 1, NUMA_NO_NODE, &info);
>> +}
>> +EXPORT_SYMBOL(paravirq_alloc_irq);
>> +
>> +void paravirq_free_irq(unsigned int virq)
>> +{
>> +	struct irq_data *irq_data;
>> +
>> +	if (!paravirq_domain) {
>> +		pr_warn("paravirq irq_domain is not initialized\n");
>> +		return;
>> +	}
>> +
>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>> +	if (irq_data)
>> +		irq_domain_free_irqs(virq, 1);
>> +	else
>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>> +}
>> +EXPORT_SYMBOL(paravirq_free_irq);
>> +
>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>> +				void (*irq_unmask)(struct irq_data *data))
>> +{
>> +	if (!paravirq_domain)
>> +		return -ENODEV;
>> +
>> +	if (paravirq_chip.irq_mask || paravirq_chip.irq_unmask)
>> +		return -EEXIST;
>> +
>> +	if (!irq_mask || !irq_unmask)
>> +		return -EINVAL;
>> +
>> +	paravirq_chip.irq_mask = irq_mask;
>> +	paravirq_chip.irq_unmask = irq_unmask;
>> +
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL(paravirq_init_chip);
>> +
>> +void arch_init_paravirq_domain(struct irq_domain *parent)
>> +{
>> +	paravirq_domain = irq_domain_add_tree(NULL, &paravirq_domain_ops, NULL);
>> +	if (!paravirq_domain) {
>> +		pr_warn("failed to initialize paravirq irq_domain\n");
>> +		return;
>> +	}
>> +
>> +	paravirq_domain->name = paravirq_chip.name;
>> +	paravirq_domain->parent = parent;
>> +	paravirq_domain->flags |= IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
>> +}
>> +
>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
>> index 6066d94..878b440 100644
>> --- a/arch/x86/kernel/apic/vector.c
>> +++ b/arch/x86/kernel/apic/vector.c
>> @@ -438,6 +438,7 @@ int __init arch_early_irq_init(void)
>>  
>>  	arch_init_msi_domain(x86_vector_domain);
>>  	arch_init_htirq_domain(x86_vector_domain);
>> +	arch_init_paravirq_domain(x86_vector_domain);
>>  
>>  	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
>>  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
>> --
>> 2.5.5
>>
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-12 10:56   ` Alexander Popov
@ 2016-08-12 11:43     ` Paolo Bonzini
  2016-08-12 22:07       ` Alexander Popov
  0 siblings, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2016-08-12 11:43 UTC (permalink / raw)
  To: alex.popov
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm



On 12/08/2016 12:56, Alexander Popov wrote:
> Maybe the name "paravirq" is not very good, I'll try to describe the idea.
> 
> There is some kernel module for special interactions between guest VMs.
> Currently it has to register a MSI-capable PCI device to handle interrupts
> injected by the hypervisor. And the bare-metal hypervisor has to emulate
> such a device for guest VMs.
> 
> So I've implemented paravirq irq_domain to avoid this redundant emulation.
> With it we can just call:
>  - paravirq_alloc_irq() to allocate a LAPIC irq;
>  - request_irq() for it;
>  - irqd_cfg(irq_get_irq_data()) to get the corresponding interrupt vector
> and inform the hypervisor about it.
> Now we happily handle the irq from the hypervisor when it injects this vector.
> 
> The irq_mask/irq_unmask parameters of paravirq_init_chip() are the pointers
> to the functions from the interaction module which ask the hypervisor to
> start/stop injecting interrupts to the guest VM.
> 
> Paravirq irq_domain allows to avoid the PCI device emulation in the hypervisor
> and provides the ability to run slimmer Linux guests without precompiled
> PCI and MSI support.
> 
> Did I manage to answer your questions?

It's a bit clearer.  My doubt is that the caller of paravirq_init_chip
has to provide irq_mask and irq_unmask, but it doesn't know who will
call paravirq_alloc_irq.  So there are two cases:

1) there is only one device, and then your solution doesn't scale well
to multiple devices

2) there is some kind of commonality between all devices using
paravirq_alloc_irq, and then it should be abstracted in a bus.

The latter would be similar to what Xen and Hyper-V do, for example.
Using PCI is more similar to the KVM approach.

Paolo

> Please correct me if the idea is wrong or there's a better way to do that.
> Thanks.
> 
>>> Signed-off-by: Alexander Popov <alex.popov@linux.com>
>>> ---
>>>  arch/x86/Kconfig                 |   8 +++
>>>  arch/x86/include/asm/irqdomain.h |   6 ++
>>>  arch/x86/include/asm/paravirq.h  |   9 +++
>>>  arch/x86/kernel/apic/Makefile    |   2 +
>>>  arch/x86/kernel/apic/paravirq.c  | 128
>>>  +++++++++++++++++++++++++++++++++++++++
>>>  arch/x86/kernel/apic/vector.c    |   1 +
>>>  6 files changed, 154 insertions(+)
>>>  create mode 100644 arch/x86/include/asm/paravirq.h
>>>  create mode 100644 arch/x86/kernel/apic/paravirq.c
>>>
>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>>> index 5c6e747..209bd88 100644
>>> --- a/arch/x86/Kconfig
>>> +++ b/arch/x86/Kconfig
>>> @@ -760,6 +760,14 @@ config PARAVIRT_TIME_ACCOUNTING
>>>  
>>>  	  If in doubt, say N here.
>>>  
>>> +config X86_PARAVIRQ
>>> +	bool "Enable paravirq irq_domain"
>>> +	depends on PARAVIRT && X86_LOCAL_APIC
>>> +	default n
>>> +	---help---
>>> +	  This option enables paravirq irq_domain for interrupts injected
>>> +	  by the hypervisor using Intel VT-x technology.
>>> +
>>>  config PARAVIRT_CLOCK
>>>  	bool
>>>  
>>> diff --git a/arch/x86/include/asm/irqdomain.h
>>> b/arch/x86/include/asm/irqdomain.h
>>> index d26075b..e3192f6 100644
>>> --- a/arch/x86/include/asm/irqdomain.h
>>> +++ b/arch/x86/include/asm/irqdomain.h
>>> @@ -60,4 +60,10 @@ extern void arch_init_htirq_domain(struct irq_domain
>>> *domain);
>>>  static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
>>>  #endif
>>>  
>>> +#ifdef CONFIG_X86_PARAVIRQ
>>> +extern void arch_init_paravirq_domain(struct irq_domain *domain);
>>> +#else
>>> +static inline void arch_init_paravirq_domain(struct irq_domain *domain) { }
>>> +#endif
>>> +
>>>  #endif
>>> diff --git a/arch/x86/include/asm/paravirq.h
>>> b/arch/x86/include/asm/paravirq.h
>>> new file mode 100644
>>> index 0000000..a137de2
>>> --- /dev/null
>>> +++ b/arch/x86/include/asm/paravirq.h
>>> @@ -0,0 +1,9 @@
>>> +#ifndef _ASM_X86_PARAVIRQ_H
>>> +#define _ASM_X86_PARAVIRQ_H
>>> +
>>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>>> +				void (*irq_unmask)(struct irq_data *data));
>>> +int paravirq_alloc_irq(void);
>>> +void paravirq_free_irq(unsigned int irq);
>>> +
>>> +#endif /* _ASM_X86_PARAVIRQ_H */
>>> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
>>> index 8e63ebd..84f9ce0 100644
>>> --- a/arch/x86/kernel/apic/Makefile
>>> +++ b/arch/x86/kernel/apic/Makefile
>>> @@ -28,3 +28,5 @@ obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
>>>  
>>>  # For 32bit, probe_32 need to be listed last
>>>  obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
>>> +
>>> +obj-$(CONFIG_X86_PARAVIRQ)	+= paravirq.o
>>> diff --git a/arch/x86/kernel/apic/paravirq.c
>>> b/arch/x86/kernel/apic/paravirq.c
>>> new file mode 100644
>>> index 0000000..430b819
>>> --- /dev/null
>>> +++ b/arch/x86/kernel/apic/paravirq.c
>>> @@ -0,0 +1,128 @@
>>> +/*
>>> + * An irq_domain for interrupts injected by the hypervisor using
>>> + * Intel VT-x technology.
>>> + *
>>> + * Copyright (C) 2016 Alexander Popov <alex.popov@linux.com>.
>>> + *
>>> + * This file is released under the GPLv2.
>>> + */
>>> +
>>> +#include <linux/init.h>
>>> +#include <linux/irq.h>
>>> +#include <asm/irqdomain.h>
>>> +#include <asm/paravirq.h>
>>> +
>>> +static struct irq_domain *paravirq_domain;
>>> +
>>> +static struct irq_chip paravirq_chip = {
>>> +	.name			= "PARAVIRQ",
>>> +	.irq_ack		= irq_chip_ack_parent,
>>> +};
>>> +
>>> +static int paravirq_domain_alloc(struct irq_domain *domain,
>>> +			unsigned int virq, unsigned int nr_irqs, void *arg)
>>> +{
>>> +	int ret = 0;
>>> +
>>> +	BUG_ON(domain != paravirq_domain);
>>> +
>>> +	if (nr_irqs != 1)
>>> +		return -EINVAL;
>>> +
>>> +	ret = irq_domain_set_hwirq_and_chip(paravirq_domain,
>>> +					virq, virq, &paravirq_chip, NULL);
>>> +	if (ret) {
>>> +		pr_warn("setting chip, hwirq for irq %u failed\n", virq);
>>> +		return ret;
>>> +	}
>>> +
>>> +	__irq_set_handler(virq, handle_edge_irq, 0, "edge");
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void paravirq_domain_free(struct irq_domain *domain,
>>> +					unsigned int virq, unsigned int nr_irqs)
>>> +{
>>> +	struct irq_data *irq_data;
>>> +
>>> +	BUG_ON(domain != paravirq_domain);
>>> +	BUG_ON(nr_irqs != 1);
>>> +
>>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>>> +	if (irq_data)
>>> +		irq_domain_reset_irq_data(irq_data);
>>> +	else
>>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>>> +}
>>> +
>>> +static const struct irq_domain_ops paravirq_domain_ops = {
>>> +	.alloc	= paravirq_domain_alloc,
>>> +	.free	= paravirq_domain_free,
>>> +};
>>> +
>>> +int paravirq_alloc_irq(void)
>>> +{
>>> +	struct irq_alloc_info info;
>>> +
>>> +	if (!paravirq_domain)
>>> +		return -ENODEV;
>>> +
>>> +	if (!paravirq_chip.irq_mask || !paravirq_chip.irq_unmask)
>>> +		return -EINVAL;
>>> +
>>> +	init_irq_alloc_info(&info, NULL);
>>> +
>>> +	return irq_domain_alloc_irqs(paravirq_domain, 1, NUMA_NO_NODE, &info);
>>> +}
>>> +EXPORT_SYMBOL(paravirq_alloc_irq);
>>> +
>>> +void paravirq_free_irq(unsigned int virq)
>>> +{
>>> +	struct irq_data *irq_data;
>>> +
>>> +	if (!paravirq_domain) {
>>> +		pr_warn("paravirq irq_domain is not initialized\n");
>>> +		return;
>>> +	}
>>> +
>>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>>> +	if (irq_data)
>>> +		irq_domain_free_irqs(virq, 1);
>>> +	else
>>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>>> +}
>>> +EXPORT_SYMBOL(paravirq_free_irq);
>>> +
>>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>>> +				void (*irq_unmask)(struct irq_data *data))
>>> +{
>>> +	if (!paravirq_domain)
>>> +		return -ENODEV;
>>> +
>>> +	if (paravirq_chip.irq_mask || paravirq_chip.irq_unmask)
>>> +		return -EEXIST;
>>> +
>>> +	if (!irq_mask || !irq_unmask)
>>> +		return -EINVAL;
>>> +
>>> +	paravirq_chip.irq_mask = irq_mask;
>>> +	paravirq_chip.irq_unmask = irq_unmask;
>>> +
>>> +	return 0;
>>> +}
>>> +EXPORT_SYMBOL(paravirq_init_chip);
>>> +
>>> +void arch_init_paravirq_domain(struct irq_domain *parent)
>>> +{
>>> +	paravirq_domain = irq_domain_add_tree(NULL, &paravirq_domain_ops, NULL);
>>> +	if (!paravirq_domain) {
>>> +		pr_warn("failed to initialize paravirq irq_domain\n");
>>> +		return;
>>> +	}
>>> +
>>> +	paravirq_domain->name = paravirq_chip.name;
>>> +	paravirq_domain->parent = parent;
>>> +	paravirq_domain->flags |= IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
>>> +}
>>> +
>>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
>>> index 6066d94..878b440 100644
>>> --- a/arch/x86/kernel/apic/vector.c
>>> +++ b/arch/x86/kernel/apic/vector.c
>>> @@ -438,6 +438,7 @@ int __init arch_early_irq_init(void)
>>>  
>>>  	arch_init_msi_domain(x86_vector_domain);
>>>  	arch_init_htirq_domain(x86_vector_domain);
>>> +	arch_init_paravirq_domain(x86_vector_domain);
>>>  
>>>  	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
>>>  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
>>> --
>>> 2.5.5
>>>
>>>
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-12 11:43     ` Paolo Bonzini
@ 2016-08-12 22:07       ` Alexander Popov
  2016-08-13  6:20         ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Popov @ 2016-08-12 22:07 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 12.08.2016 14:43, Paolo Bonzini wrote:
> On 12/08/2016 12:56, Alexander Popov wrote:
>> Maybe the name "paravirq" is not very good, I'll try to describe the idea.
>>
>> There is some kernel module for special interactions between guest VMs.
>> Currently it has to register a MSI-capable PCI device to handle interrupts
>> injected by the hypervisor. And the bare-metal hypervisor has to emulate
>> such a device for guest VMs.
>>
>> So I've implemented paravirq irq_domain to avoid this redundant emulation.
>> With it we can just call:
>>  - paravirq_alloc_irq() to allocate a LAPIC irq;
>>  - request_irq() for it;
>>  - irqd_cfg(irq_get_irq_data()) to get the corresponding interrupt vector
>> and inform the hypervisor about it.
>> Now we happily handle the irq from the hypervisor when it injects this vector.
>>
>> The irq_mask/irq_unmask parameters of paravirq_init_chip() are the pointers
>> to the functions from the interaction module which ask the hypervisor to
>> start/stop injecting interrupts to the guest VM.
>>
>> Paravirq irq_domain allows to avoid the PCI device emulation in the hypervisor
>> and provides the ability to run slimmer Linux guests without precompiled
>> PCI and MSI support.
>>
>> Did I manage to answer your questions?
> 
> It's a bit clearer.  My doubt is that the caller of paravirq_init_chip
> has to provide irq_mask and irq_unmask, but it doesn't know who will
> call paravirq_alloc_irq.  So there are two cases:
> 
> 1) there is only one device, and then your solution doesn't scale well
> to multiple devices
> 
> 2) there is some kind of commonality between all devices using
> paravirq_alloc_irq, and then it should be abstracted in a bus.
> 
> The latter would be similar to what Xen and Hyper-V do, for example.
> Using PCI is more similar to the KVM approach.

Excuse me, I don't see the problem.

The caller of paravirq_init_chip() provides irq_mask/irq_unmask
function pointers only once, and paravirq_init_chip() saves them in
.irq_mask/.irq_unmask fields of struct irq_chip paravirq_chip.

When later, for example, disable_irq() is called for one of several irqs
allocated in paravirq irq_domain, paravirq_chip->irq_mask() is called
with struct irq_desc *desc argument corresponding to that particular irq.

I.e. our irq_mask()/irq_unmask() callbacks get irq_desc of the interrupt
which should be masked/unmasked and can ask the hypervisor to stop/start
injecting the vector of that particular interrupt.


>>>> Signed-off-by: Alexander Popov <alex.popov@linux.com>
>>>> ---
>>>>  arch/x86/Kconfig                 |   8 +++
>>>>  arch/x86/include/asm/irqdomain.h |   6 ++
>>>>  arch/x86/include/asm/paravirq.h  |   9 +++
>>>>  arch/x86/kernel/apic/Makefile    |   2 +
>>>>  arch/x86/kernel/apic/paravirq.c  | 128
>>>>  +++++++++++++++++++++++++++++++++++++++
>>>>  arch/x86/kernel/apic/vector.c    |   1 +
>>>>  6 files changed, 154 insertions(+)
>>>>  create mode 100644 arch/x86/include/asm/paravirq.h
>>>>  create mode 100644 arch/x86/kernel/apic/paravirq.c
>>>>
>>>> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
>>>> index 5c6e747..209bd88 100644
>>>> --- a/arch/x86/Kconfig
>>>> +++ b/arch/x86/Kconfig
>>>> @@ -760,6 +760,14 @@ config PARAVIRT_TIME_ACCOUNTING
>>>>  
>>>>  	  If in doubt, say N here.
>>>>  
>>>> +config X86_PARAVIRQ
>>>> +	bool "Enable paravirq irq_domain"
>>>> +	depends on PARAVIRT && X86_LOCAL_APIC
>>>> +	default n
>>>> +	---help---
>>>> +	  This option enables paravirq irq_domain for interrupts injected
>>>> +	  by the hypervisor using Intel VT-x technology.
>>>> +
>>>>  config PARAVIRT_CLOCK
>>>>  	bool
>>>>  
>>>> diff --git a/arch/x86/include/asm/irqdomain.h
>>>> b/arch/x86/include/asm/irqdomain.h
>>>> index d26075b..e3192f6 100644
>>>> --- a/arch/x86/include/asm/irqdomain.h
>>>> +++ b/arch/x86/include/asm/irqdomain.h
>>>> @@ -60,4 +60,10 @@ extern void arch_init_htirq_domain(struct irq_domain
>>>> *domain);
>>>>  static inline void arch_init_htirq_domain(struct irq_domain *domain) { }
>>>>  #endif
>>>>  
>>>> +#ifdef CONFIG_X86_PARAVIRQ
>>>> +extern void arch_init_paravirq_domain(struct irq_domain *domain);
>>>> +#else
>>>> +static inline void arch_init_paravirq_domain(struct irq_domain *domain) { }
>>>> +#endif
>>>> +
>>>>  #endif
>>>> diff --git a/arch/x86/include/asm/paravirq.h
>>>> b/arch/x86/include/asm/paravirq.h
>>>> new file mode 100644
>>>> index 0000000..a137de2
>>>> --- /dev/null
>>>> +++ b/arch/x86/include/asm/paravirq.h
>>>> @@ -0,0 +1,9 @@
>>>> +#ifndef _ASM_X86_PARAVIRQ_H
>>>> +#define _ASM_X86_PARAVIRQ_H
>>>> +
>>>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>>>> +				void (*irq_unmask)(struct irq_data *data));
>>>> +int paravirq_alloc_irq(void);
>>>> +void paravirq_free_irq(unsigned int irq);
>>>> +
>>>> +#endif /* _ASM_X86_PARAVIRQ_H */
>>>> diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
>>>> index 8e63ebd..84f9ce0 100644
>>>> --- a/arch/x86/kernel/apic/Makefile
>>>> +++ b/arch/x86/kernel/apic/Makefile
>>>> @@ -28,3 +28,5 @@ obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
>>>>  
>>>>  # For 32bit, probe_32 need to be listed last
>>>>  obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
>>>> +
>>>> +obj-$(CONFIG_X86_PARAVIRQ)	+= paravirq.o
>>>> diff --git a/arch/x86/kernel/apic/paravirq.c
>>>> b/arch/x86/kernel/apic/paravirq.c
>>>> new file mode 100644
>>>> index 0000000..430b819
>>>> --- /dev/null
>>>> +++ b/arch/x86/kernel/apic/paravirq.c
>>>> @@ -0,0 +1,128 @@
>>>> +/*
>>>> + * An irq_domain for interrupts injected by the hypervisor using
>>>> + * Intel VT-x technology.
>>>> + *
>>>> + * Copyright (C) 2016 Alexander Popov <alex.popov@linux.com>.
>>>> + *
>>>> + * This file is released under the GPLv2.
>>>> + */
>>>> +
>>>> +#include <linux/init.h>
>>>> +#include <linux/irq.h>
>>>> +#include <asm/irqdomain.h>
>>>> +#include <asm/paravirq.h>
>>>> +
>>>> +static struct irq_domain *paravirq_domain;
>>>> +
>>>> +static struct irq_chip paravirq_chip = {
>>>> +	.name			= "PARAVIRQ",
>>>> +	.irq_ack		= irq_chip_ack_parent,
>>>> +};
>>>> +
>>>> +static int paravirq_domain_alloc(struct irq_domain *domain,
>>>> +			unsigned int virq, unsigned int nr_irqs, void *arg)
>>>> +{
>>>> +	int ret = 0;
>>>> +
>>>> +	BUG_ON(domain != paravirq_domain);
>>>> +
>>>> +	if (nr_irqs != 1)
>>>> +		return -EINVAL;
>>>> +
>>>> +	ret = irq_domain_set_hwirq_and_chip(paravirq_domain,
>>>> +					virq, virq, &paravirq_chip, NULL);
>>>> +	if (ret) {
>>>> +		pr_warn("setting chip, hwirq for irq %u failed\n", virq);
>>>> +		return ret;
>>>> +	}
>>>> +
>>>> +	__irq_set_handler(virq, handle_edge_irq, 0, "edge");
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void paravirq_domain_free(struct irq_domain *domain,
>>>> +					unsigned int virq, unsigned int nr_irqs)
>>>> +{
>>>> +	struct irq_data *irq_data;
>>>> +
>>>> +	BUG_ON(domain != paravirq_domain);
>>>> +	BUG_ON(nr_irqs != 1);
>>>> +
>>>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>>>> +	if (irq_data)
>>>> +		irq_domain_reset_irq_data(irq_data);
>>>> +	else
>>>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>>>> +}
>>>> +
>>>> +static const struct irq_domain_ops paravirq_domain_ops = {
>>>> +	.alloc	= paravirq_domain_alloc,
>>>> +	.free	= paravirq_domain_free,
>>>> +};
>>>> +
>>>> +int paravirq_alloc_irq(void)
>>>> +{
>>>> +	struct irq_alloc_info info;
>>>> +
>>>> +	if (!paravirq_domain)
>>>> +		return -ENODEV;
>>>> +
>>>> +	if (!paravirq_chip.irq_mask || !paravirq_chip.irq_unmask)
>>>> +		return -EINVAL;
>>>> +
>>>> +	init_irq_alloc_info(&info, NULL);
>>>> +
>>>> +	return irq_domain_alloc_irqs(paravirq_domain, 1, NUMA_NO_NODE, &info);
>>>> +}
>>>> +EXPORT_SYMBOL(paravirq_alloc_irq);
>>>> +
>>>> +void paravirq_free_irq(unsigned int virq)
>>>> +{
>>>> +	struct irq_data *irq_data;
>>>> +
>>>> +	if (!paravirq_domain) {
>>>> +		pr_warn("paravirq irq_domain is not initialized\n");
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	irq_data = irq_domain_get_irq_data(paravirq_domain, virq);
>>>> +	if (irq_data)
>>>> +		irq_domain_free_irqs(virq, 1);
>>>> +	else
>>>> +		pr_warn("irq %u is not in paravirq irq_domain\n", virq);
>>>> +}
>>>> +EXPORT_SYMBOL(paravirq_free_irq);
>>>> +
>>>> +int paravirq_init_chip(void (*irq_mask)(struct irq_data *data),
>>>> +				void (*irq_unmask)(struct irq_data *data))
>>>> +{
>>>> +	if (!paravirq_domain)
>>>> +		return -ENODEV;
>>>> +
>>>> +	if (paravirq_chip.irq_mask || paravirq_chip.irq_unmask)
>>>> +		return -EEXIST;
>>>> +
>>>> +	if (!irq_mask || !irq_unmask)
>>>> +		return -EINVAL;
>>>> +
>>>> +	paravirq_chip.irq_mask = irq_mask;
>>>> +	paravirq_chip.irq_unmask = irq_unmask;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +EXPORT_SYMBOL(paravirq_init_chip);
>>>> +
>>>> +void arch_init_paravirq_domain(struct irq_domain *parent)
>>>> +{
>>>> +	paravirq_domain = irq_domain_add_tree(NULL, &paravirq_domain_ops, NULL);
>>>> +	if (!paravirq_domain) {
>>>> +		pr_warn("failed to initialize paravirq irq_domain\n");
>>>> +		return;
>>>> +	}
>>>> +
>>>> +	paravirq_domain->name = paravirq_chip.name;
>>>> +	paravirq_domain->parent = parent;
>>>> +	paravirq_domain->flags |= IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
>>>> +}
>>>> +
>>>> diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
>>>> index 6066d94..878b440 100644
>>>> --- a/arch/x86/kernel/apic/vector.c
>>>> +++ b/arch/x86/kernel/apic/vector.c
>>>> @@ -438,6 +438,7 @@ int __init arch_early_irq_init(void)
>>>>  
>>>>  	arch_init_msi_domain(x86_vector_domain);
>>>>  	arch_init_htirq_domain(x86_vector_domain);
>>>> +	arch_init_paravirq_domain(x86_vector_domain);
>>>>  
>>>>  	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
>>>>  	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
>>>> --
>>>> 2.5.5
>>>>
>>>>
>>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-12 22:07       ` Alexander Popov
@ 2016-08-13  6:20         ` Paolo Bonzini
  2016-08-15 11:51           ` Alexander Popov
  0 siblings, 1 reply; 12+ messages in thread
From: Paolo Bonzini @ 2016-08-13  6:20 UTC (permalink / raw)
  To: alex.popov
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm



On 13/08/2016 00:07, Alexander Popov wrote:
> I.e. our irq_mask()/irq_unmask() callbacks get irq_desc of the interrupt
> which should be masked/unmasked and can ask the hypervisor to stop/start
> injecting the vector of that particular interrupt.

So just let the irqdomain know about your hypervisor and avoid the
pointless indirection through function pointers, and only call
arch_init_paravirq_domain in a file specific to your hypervisor.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-13  6:20         ` Paolo Bonzini
@ 2016-08-15 11:51           ` Alexander Popov
  2016-08-15 12:37             ` Paolo Bonzini
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Popov @ 2016-08-15 11:51 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 13.08.2016 09:20, Paolo Bonzini wrote:
> On 13/08/2016 00:07, Alexander Popov wrote:
>> I.e. our irq_mask()/irq_unmask() callbacks get irq_desc of the interrupt
>> which should be masked/unmasked and can ask the hypervisor to stop/start
>> injecting the vector of that particular interrupt.
> 
> So just let the irqdomain know about your hypervisor and avoid the
> pointless indirection through function pointers, and only call
> arch_init_paravirq_domain in a file specific to your hypervisor.

Paolo, I would like paravirq irq_domain to be useful for many hypervisors,
not only for one developed by Positive Technologies.

It seems to me that the idea of an irq_domain for interrupts injected
by a hypervisor is quite generic.

I don't like such a way of initializing paravirq_chip very much too.
But to my mind, putting some custom hypervisor API to the generic code
in Linux kernel mainline is less attractive. Can we find another solution?

Thank you.
Best regards,
Alexander

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-15 11:51           ` Alexander Popov
@ 2016-08-15 12:37             ` Paolo Bonzini
  2016-08-16 20:00               ` Alexander Popov
  2016-08-17 14:36               ` Jan Kiszka
  0 siblings, 2 replies; 12+ messages in thread
From: Paolo Bonzini @ 2016-08-15 12:37 UTC (permalink / raw)
  To: alex.popov
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm



On 15/08/2016 13:51, Alexander Popov wrote:
> On 13.08.2016 09:20, Paolo Bonzini wrote:
>> On 13/08/2016 00:07, Alexander Popov wrote:
>>> I.e. our irq_mask()/irq_unmask() callbacks get irq_desc of the interrupt
>>> which should be masked/unmasked and can ask the hypervisor to stop/start
>>> injecting the vector of that particular interrupt.
>>
>> So just let the irqdomain know about your hypervisor and avoid the
>> pointless indirection through function pointers, and only call
>> arch_init_paravirq_domain in a file specific to your hypervisor.
> 
> Paolo, I would like paravirq irq_domain to be useful for many hypervisors,
> not only for one developed by Positive Technologies.

If somebody else comes up with similar needs, leave the generalization
of the code to them.  The maintainers will surely remember.  Without two
users, there's a nonzero chance that the abstraction you have is not
good enough, and actually gets in the way of the "second user".

> It seems to me that the idea of an irq_domain for interrupts injected
> by a hypervisor is quite generic.

True, but all of Xen, KVM and VMware use PCI devices for this.

Paolo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-15 12:37             ` Paolo Bonzini
@ 2016-08-16 20:00               ` Alexander Popov
  2016-08-17 14:36               ` Jan Kiszka
  1 sibling, 0 replies; 12+ messages in thread
From: Alexander Popov @ 2016-08-16 20:00 UTC (permalink / raw)
  To: Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 15.08.2016 15:37, Paolo Bonzini wrote:
> On 15/08/2016 13:51, Alexander Popov wrote:
>> On 13.08.2016 09:20, Paolo Bonzini wrote:
>>> So just let the irqdomain know about your hypervisor and avoid the
>>> pointless indirection through function pointers, and only call
>>> arch_init_paravirq_domain in a file specific to your hypervisor.
>>
>> Paolo, I would like paravirq irq_domain to be useful for many hypervisors,
>> not only for one developed by Positive Technologies.
> 
> If somebody else comes up with similar needs, leave the generalization
> of the code to them.  The maintainers will surely remember.  Without two
> users, there's a nonzero chance that the abstraction you have is not
> good enough, and actually gets in the way of the "second user".

Ok. I'll return with v2. Thank you.

Best regards,
Alexander

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-15 12:37             ` Paolo Bonzini
  2016-08-16 20:00               ` Alexander Popov
@ 2016-08-17 14:36               ` Jan Kiszka
  2016-08-17 22:58                 ` Alexander Popov
  1 sibling, 1 reply; 12+ messages in thread
From: Jan Kiszka @ 2016-08-17 14:36 UTC (permalink / raw)
  To: Paolo Bonzini, alex.popov
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 2016-08-15 14:37, Paolo Bonzini wrote:
> On 15/08/2016 13:51, Alexander Popov wrote:
>> On 13.08.2016 09:20, Paolo Bonzini wrote:
>>> On 13/08/2016 00:07, Alexander Popov wrote:
>>>> I.e. our irq_mask()/irq_unmask() callbacks get irq_desc of the interrupt
>>>> which should be masked/unmasked and can ask the hypervisor to stop/start
>>>> injecting the vector of that particular interrupt.
>>>
>>> So just let the irqdomain know about your hypervisor and avoid the
>>> pointless indirection through function pointers, and only call
>>> arch_init_paravirq_domain in a file specific to your hypervisor.
>>
>> Paolo, I would like paravirq irq_domain to be useful for many hypervisors,
>> not only for one developed by Positive Technologies.
> 
> If somebody else comes up with similar needs, leave the generalization
> of the code to them.  The maintainers will surely remember.  Without two
> users, there's a nonzero chance that the abstraction you have is not
> good enough, and actually gets in the way of the "second user".
> 
>> It seems to me that the idea of an irq_domain for interrupts injected
>> by a hypervisor is quite generic.
> 
> True, but all of Xen, KVM and VMware use PCI devices for this.

So does Jailhouse. We have to have the code anyway because we need to
keep Linux alive after taking over control. Thus it is actually easier
to reuse the same logic for para-virtualized domains (non-root cells).

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-17 14:36               ` Jan Kiszka
@ 2016-08-17 22:58                 ` Alexander Popov
  2016-08-19 10:47                   ` Jan Kiszka
  0 siblings, 1 reply; 12+ messages in thread
From: Alexander Popov @ 2016-08-17 22:58 UTC (permalink / raw)
  To: Jan Kiszka, Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 17.08.2016 17:36, Jan Kiszka wrote:
> On 2016-08-15 14:37, Paolo Bonzini wrote:
>> On 15/08/2016 13:51, Alexander Popov wrote:
>>> It seems to me that the idea of an irq_domain for interrupts injected
>>> by a hypervisor is quite generic.
>>
>> True, but all of Xen, KVM and VMware use PCI devices for this.
> 
> So does Jailhouse. We have to have the code anyway because we need to
> keep Linux alive after taking over control. Thus it is actually easier
> to reuse the same logic for para-virtualized domains (non-root cells).

Hello, Jan! Yes, I see.

I can only say that Xen, KVM, VMware and Jailhouse happily use hypercalls,
which are a valid interface between a hypervisor and its guests.

Positive Technologies hypervisor called Gvandra (named after a big Caucasus
mountain) tries to use only the hypercalls and avoid PCI device emulation
to become slimmer.

Best regards,
Alexander

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/1] x86/apic: Introduce paravirq irq_domain
  2016-08-17 22:58                 ` Alexander Popov
@ 2016-08-19 10:47                   ` Jan Kiszka
  0 siblings, 0 replies; 12+ messages in thread
From: Jan Kiszka @ 2016-08-19 10:47 UTC (permalink / raw)
  To: alex.popov, Paolo Bonzini
  Cc: Thomas Gleixner, Christoph Hellwig, Ingo Molnar, Marc Zyngier,
	H. Peter Anvin, Andrew Morton, Kees Cook, Dmitry Vyukov,
	Jiang Liu, Jason Cooper, Radim Krcmar, Joerg Roedel,
	linux-kernel, x86, kvm

On 2016-08-17 18:58, Alexander Popov wrote:
> On 17.08.2016 17:36, Jan Kiszka wrote:
>> On 2016-08-15 14:37, Paolo Bonzini wrote:
>>> On 15/08/2016 13:51, Alexander Popov wrote:
>>>> It seems to me that the idea of an irq_domain for interrupts injected
>>>> by a hypervisor is quite generic.
>>>
>>> True, but all of Xen, KVM and VMware use PCI devices for this.
>>
>> So does Jailhouse. We have to have the code anyway because we need to
>> keep Linux alive after taking over control. Thus it is actually easier
>> to reuse the same logic for para-virtualized domains (non-root cells).
> 
> Hello, Jan! Yes, I see.
> 
> I can only say that Xen, KVM, VMware and Jailhouse happily use hypercalls,
> which are a valid interface between a hypervisor and its guests.
> 
> Positive Technologies hypervisor called Gvandra (named after a big Caucasus
> mountain) tries to use only the hypercalls and avoid PCI device emulation
> to become slimmer.

[Hmm, naming something that's supposed to be slim after something that's
rather big...]

BTW, is there a user of this interface already publicly available? You
didn't reference anything in your posting. Generally, infrastructure
extensions without in-tree users aren't well received (in the best case).

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-08-19 10:49 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-08-11 14:02 [PATCH 1/1] x86/apic: Introduce paravirq irq_domain Alexander Popov
2016-08-11 16:44 ` Paolo Bonzini
2016-08-12 10:56   ` Alexander Popov
2016-08-12 11:43     ` Paolo Bonzini
2016-08-12 22:07       ` Alexander Popov
2016-08-13  6:20         ` Paolo Bonzini
2016-08-15 11:51           ` Alexander Popov
2016-08-15 12:37             ` Paolo Bonzini
2016-08-16 20:00               ` Alexander Popov
2016-08-17 14:36               ` Jan Kiszka
2016-08-17 22:58                 ` Alexander Popov
2016-08-19 10:47                   ` Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.