All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH V5] Implement 3-level event channel ABI in Linux
@ 2013-03-19 15:21 Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h Wei Liu
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: ian.campbell, jbeulich, david.vrabel

This is the kernel side code of 3-level event channel ABI, which corresponds to
RFC V5 in Xen side.

Some notable changes:
* More code shared between 2/3-level ABI.
* evtchn_cpu_mask is allocated dynamically in CPU hotplug bringup path.

Diffstat:
 arch/x86/xen/enlighten.c              |   12 +
 drivers/xen/events.c                  |  930 +++++++++++++++++++++++++++------
 drivers/xen/evtchn.c                  |   13 +-
 include/xen/events.h                  |   12 +
 include/xen/interface/event_channel.h |   46 +-
 include/xen/interface/xen.h           |   13 +-
 6 files changed, 847 insertions(+), 179 deletions(-)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
@ 2013-03-19 15:21 ` Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 02/14] xen: add KERN_DEBUG in printk Wei Liu
                   ` (12 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

This typedef slipped into Linux header file, remove it.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 include/xen/interface/event_channel.h |    1 -
 1 file changed, 1 deletion(-)

diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index f494292..293c3f0 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -188,7 +188,6 @@ struct evtchn_reset {
 	/* IN parameters. */
 	domid_t dom;
 };
-typedef struct evtchn_reset evtchn_reset_t;
 
 struct evtchn_op {
 	uint32_t cmd; /* EVTCHNOP_* */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 02/14] xen: add KERN_DEBUG in printk
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h Wei Liu
@ 2013-03-19 15:21 ` Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 03/14] xen: fix output of xen_debug_interrupt Wei Liu
                   ` (11 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6b78378..90ac37a 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1212,7 +1212,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&debug_lock, flags);
 
-	printk("\nvcpu %d\n  ", cpu);
+	printk(KERN_DEBUG "\nvcpu %d\n  ", cpu);
 
 	for_each_online_cpu(i) {
 		int pending;
@@ -1220,27 +1220,27 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 		pending = (get_irq_regs() && i == cpu)
 			? xen_irqs_disabled(get_irq_regs())
 			: v->evtchn_upcall_mask;
-		printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
+		printk(KERN_DEBUG "%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
 		       pending, v->evtchn_upcall_pending,
 		       (int)(sizeof(v->evtchn_pending_sel)*2),
 		       v->evtchn_pending_sel);
 	}
 	v = per_cpu(xen_vcpu, cpu);
 
-	printk("\npending:\n   ");
+	printk(KERN_DEBUG "\npending:\n   ");
 	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s",
+		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
 		       (int)sizeof(sh->evtchn_pending[0])*2,
 		       sh->evtchn_pending[i],
 		       i % 8 == 0 ? "\n   " : " ");
-	printk("\nglobal mask:\n   ");
+	printk(KERN_DEBUG "\nglobal mask:\n   ");
 	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s",
+		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
 		       (int)(sizeof(sh->evtchn_mask[0])*2),
 		       sh->evtchn_mask[i],
 		       i % 8 == 0 ? "\n   " : " ");
 
-	printk("\nglobally unmasked:\n   ");
+	printk(KERN_DEBUG "\nglobally unmasked:\n   ");
 	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
 		printk("%0*"PRI_xen_ulong"%s",
 		       (int)(sizeof(sh->evtchn_mask[0])*2),
@@ -1249,25 +1249,25 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 
 	printk("\nlocal cpu%d mask:\n   ", cpu);
 	for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
-		printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
 		       cpu_evtchn[i],
 		       i % 8 == 0 ? "\n   " : " ");
 
-	printk("\nlocally unmasked:\n   ");
+	printk(KERN_DEBUG "\nlocally unmasked:\n   ");
 	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
 		xen_ulong_t pending = sh->evtchn_pending[i]
 			& ~sh->evtchn_mask[i]
 			& cpu_evtchn[i];
-		printk("%0*"PRI_xen_ulong"%s",
+		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
 		       (int)(sizeof(sh->evtchn_mask[0])*2),
 		       pending, i % 8 == 0 ? "\n   " : " ");
 	}
 
-	printk("\npending list:\n");
+	printk(KERN_DEBUG "\npending list:\n");
 	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
 		if (sync_test_bit(i, BM(sh->evtchn_pending))) {
 			int word_idx = i / BITS_PER_EVTCHN_WORD;
-			printk("  %d: event %d -> irq %d%s%s%s\n",
+			printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s\n",
 			       cpu_from_evtchn(i), i,
 			       evtchn_to_irq[i],
 			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 03/14] xen: fix output of xen_debug_interrupt
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 02/14] xen: add KERN_DEBUG in printk Wei Liu
@ 2013-03-19 15:21 ` Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 04/14] xen: sync public headers Wei Liu
                   ` (10 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Make the per-cpu selector L1 to be consistent with description in
__xen_evtchn_do_upcall's comment.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 90ac37a..38e30aa 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1271,7 +1271,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 			       cpu_from_evtchn(i), i,
 			       evtchn_to_irq[i],
 			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
-					     ? "" : " l2-clear",
+					     ? "" : " l1-clear",
 			       !sync_test_bit(i, BM(sh->evtchn_mask))
 					     ? "" : " globally-masked",
 			       sync_test_bit(i, BM(cpu_evtchn))
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 04/14] xen: sync public headers
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (2 preceding siblings ...)
  2013-03-19 15:21 ` [RFC PATCH V5 03/14] xen: fix output of xen_debug_interrupt Wei Liu
@ 2013-03-19 15:21 ` Wei Liu
  2013-03-19 15:21 ` [RFC PATCH V5 05/14] xen: introduce test_and_set_mask Wei Liu
                   ` (9 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Stay in sync with Xen public headers:
* event_channel.h:
  * EVTCHNOP_query_extended_abis
  * EVTCHNOP_register_3level
* xen.h:
  * NR_EVENT_CHANNEL*

EVTCHNOP_query_extended_aibs is pretty self-explanatory.

Other structure and macro definitions belong to the 3-level event channel ABI.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 include/xen/interface/event_channel.h |   45 +++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h           |   13 +++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index 293c3f0..155454e 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -189,6 +189,51 @@ struct evtchn_reset {
 	domid_t dom;
 };
 
+/*
+ * EVTCHNOP_query_extended_abis: Query the hypervisor for supported extended
+ * event channel ABIs.
+ */
+#define EVTCHNOP_query_extended_abis 11
+#define EVTCHN_EXTENDED_NONE 0
+#define _EVTCHN_EXTENDED_L3  1
+#define EVTCHN_EXTENDED_L3   (1UL << _EVTCHN_EXTENDED_L3)
+struct evtchn_query_extended_abis {
+	/* OUT parameters. */
+	uint64_t abis;
+};
+
+/*
+ * EVTCHNOP_register_3level: Register 3-level event channel.
+ */
+#define EVTCHNOP_register_3level 12
+/*
+ * 64 bits guests need 8 pages for evtchn_pending and evtchn_mask for 256k
+ * event channels while 32 bits ones only need 1 page for 32k event channels.
+ */
+#define EVTCHN_MAX_L3_PAGES  8
+/*
+ * A guest should register the bitmaps first, then register L2 selector for
+ * individual cpu.
+ */
+#define REGISTER_BITMAPS     1
+#define REGISTER_L2_SELECTOR 2
+struct evtchn_register_3level {
+	/* IN parameters. */
+	uint32_t cmd;
+	union {
+		struct {
+			uint32_t nr_pages;
+			GUEST_HANDLE(xen_pfn_t) evtchn_pending;
+			GUEST_HANDLE(xen_pfn_t) evtchn_mask;
+		} bitmaps;
+		struct {
+			uint32_t  cpu_id;
+			xen_pfn_t mfn;    /* mfn for L2 selector */
+			xen_pfn_t offset; /* offset of L2 selector */
+		} l2_selector;
+	} u;
+};
+
 struct evtchn_op {
 	uint32_t cmd; /* EVTCHNOP_* */
 	union {
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 53ec416..9b0248d 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -283,9 +283,20 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
 
 /*
  * Event channel endpoints per domain:
+ * 2-level for x86:
  *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ * 3-level for x86:
+ *  32k if a long is 32 bits; 256k if a long is 64 bits.
+ * 2-level for ARM:
+ *  4096 for both 32 bits and 64 bits.
+ * 3-level for ARM:
+ *  256k for both 32 bits and 64 bits.
  */
-#define NR_EVENT_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+#define NR_EVENT_CHANNELS_L2 (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64)
+#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * sizeof(xen_ulong_t) * 8)
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#define NR_EVENT_CHANNELS NR_EVENT_CHANNELS_L2 /* for compatibility */
+#endif
 
 struct vcpu_time_info {
 	/*
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 05/14] xen: introduce test_and_set_mask
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (3 preceding siblings ...)
  2013-03-19 15:21 ` [RFC PATCH V5 04/14] xen: sync public headers Wei Liu
@ 2013-03-19 15:21 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 06/14] xen: replace raw bit ops with functions Wei Liu
                   ` (8 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:21 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 38e30aa..eca6488 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -369,6 +369,12 @@ static inline int test_evtchn(int port)
 	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
 }
 
+static inline int test_and_set_mask(int port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
 
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
@@ -1506,7 +1512,7 @@ int resend_irq_on_evtchn(unsigned int irq)
 	if (!VALID_EVTCHN(evtchn))
 		return 1;
 
-	masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+	masked = test_and_set_mask(evtchn);
 	sync_set_bit(evtchn, BM(s->evtchn_pending));
 	if (!masked)
 		unmask_evtchn(evtchn);
@@ -1555,7 +1561,7 @@ static int retrigger_dynirq(struct irq_data *data)
 	if (VALID_EVTCHN(evtchn)) {
 		int masked;
 
-		masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
+		masked = test_and_set_mask(evtchn);
 		sync_set_bit(evtchn, BM(sh->evtchn_pending));
 		if (!masked)
 			unmask_evtchn(evtchn);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 06/14] xen: replace raw bit ops with functions
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (4 preceding siblings ...)
  2013-03-19 15:21 ` [RFC PATCH V5 05/14] xen: introduce test_and_set_mask Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 07/14] xen: generalized event channel operations Wei Liu
                   ` (7 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

There is already a function called set_evtchn() for that job.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index eca6488..6e226c3 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1507,13 +1507,12 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 int resend_irq_on_evtchn(unsigned int irq)
 {
 	int masked, evtchn = evtchn_from_irq(irq);
-	struct shared_info *s = HYPERVISOR_shared_info;
 
 	if (!VALID_EVTCHN(evtchn))
 		return 1;
 
 	masked = test_and_set_mask(evtchn);
-	sync_set_bit(evtchn, BM(s->evtchn_pending));
+	set_evtchn(evtchn);
 	if (!masked)
 		unmask_evtchn(evtchn);
 
@@ -1555,14 +1554,13 @@ static void mask_ack_dynirq(struct irq_data *data)
 static int retrigger_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
-	struct shared_info *sh = HYPERVISOR_shared_info;
 	int ret = 0;
 
 	if (VALID_EVTCHN(evtchn)) {
 		int masked;
 
 		masked = test_and_set_mask(evtchn);
-		sync_set_bit(evtchn, BM(sh->evtchn_pending));
+		set_evtchn(evtchn);
 		if (!masked)
 			unmask_evtchn(evtchn);
 		ret = 1;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 07/14] xen: generalized event channel operations
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (5 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 06/14] xen: replace raw bit ops with functions Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 08/14] xen: dynamically allocate cpu_evtchn_mask Wei Liu
                   ` (6 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Use global pointers in common operations to allow for better code sharing
between 2 and 3 level event channel ABI.

Function pointers are used to deal with functions which are not suitable for
sharing.

Also update drivers/xen/evtchn.c to use exported variable instead of macro.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  199 +++++++++++++++++++++++++++++++-------------------
 drivers/xen/evtchn.c |   13 ++--
 include/xen/events.h |    3 +
 3 files changed, 135 insertions(+), 80 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6e226c3..217efb2 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -56,6 +56,27 @@
 #include <xen/interface/sched.h>
 #include <asm/hw_irq.h>
 
+/* extended event channel ABI in use, default is EVTCHN_EXTENDED_NONE */
+uint64_t xen_evtchn_extended = EVTCHN_EXTENDED_NONE;
+EXPORT_SYMBOL_GPL(xen_evtchn_extended);
+/* number of event channels */
+unsigned int xen_nr_event_channels;
+EXPORT_SYMBOL_GPL(xen_nr_event_channels);
+
+struct evtchn_ops {
+	void (*unmask)(int port);
+	irqreturn_t (*debug_interrupt)(int irq, void *dev_id);
+	void (*do_upcall)(void);
+};
+
+static const struct evtchn_ops *eops;
+
+/* The following pointers point to pending bitmap and mask bitmap. */
+static xen_ulong_t *evtchn_pending;
+static xen_ulong_t *evtchn_mask;
+/* The following per-cpu var points to selector(s). */
+static DEFINE_PER_CPU(xen_ulong_t *[1], evtchn_sel);
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -135,7 +156,7 @@ static bool (*pirq_needs_eoi)(unsigned irq);
 /* Find the first set bit in a evtchn mask */
 #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
 
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
+static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS_L2/BITS_PER_EVTCHN_WORD],
 		      cpu_evtchn_mask);
 
 /* Xen will never allocate port zero for any purpose. */
@@ -310,12 +331,11 @@ static bool pirq_needs_eoi_flag(unsigned irq)
 }
 
 static inline xen_ulong_t active_evtchns(unsigned int cpu,
-					 struct shared_info *sh,
 					 unsigned int idx)
 {
-	return sh->evtchn_pending[idx] &
+	return evtchn_pending[idx] &
 		per_cpu(cpu_evtchn_mask, cpu)[idx] &
-		~sh->evtchn_mask[idx];
+		~evtchn_mask[idx];
 }
 
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
@@ -353,26 +373,22 @@ static void init_evtchn_cpu_bindings(void)
 
 static inline void clear_evtchn(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+	sync_clear_bit(port, BM(&evtchn_pending[0]));
 }
 
 static inline void set_evtchn(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_pending[0]));
+	sync_set_bit(port, BM(&evtchn_pending[0]));
 }
 
 static inline int test_evtchn(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+	return sync_test_bit(port, BM(&evtchn_pending[0]));
 }
 
 static inline int test_and_set_mask(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+	return sync_test_and_set_bit(port, BM(&evtchn_mask[0]));
 }
 
 
@@ -395,24 +411,40 @@ EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
 static void mask_evtchn(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_mask[0]));
+	sync_set_bit(port, BM(&evtchn_mask[0]));
+}
+
+static inline void __unmask_local_port_l2(int port)
+{
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	int cpu = smp_processor_id();
+
+	sync_clear_bit(port, BM(&evtchn_mask[0]));
+
+	/*
+	 * The following is basically the equivalent of
+	 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+	 * the interrupt edge' if the channel is masked.
+	 */
+	if (sync_test_bit(port, BM(&evtchn_pending[0])) &&
+	    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+				   BM(per_cpu(evtchn_sel, cpu)[0])))
+		vcpu_info->evtchn_upcall_pending = 1;
 }
 
 static void unmask_evtchn(int port)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
 	unsigned int cpu = get_cpu();
-	int do_hypercall = 0, evtchn_pending = 0;
+	int do_hypercall = 0, _evtchn_pending = 0;
 
 	BUG_ON(!irqs_disabled());
 
 	if (unlikely((cpu != cpu_from_evtchn(port))))
 		do_hypercall = 1;
 	else
-		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+		_evtchn_pending = sync_test_bit(port, BM(&evtchn_pending[0]));
 
-	if (unlikely(evtchn_pending && xen_hvm_domain()))
+	if (unlikely(_evtchn_pending && xen_hvm_domain()))
 		do_hypercall = 1;
 
 	/* Slow path (hypercall) if this is a non-local port or if this is
@@ -421,21 +453,8 @@ static void unmask_evtchn(int port)
 	if (do_hypercall) {
 		struct evtchn_unmask unmask = { .port = port };
 		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
-		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (evtchn_pending &&
-		    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
-					   BM(&vcpu_info->evtchn_pending_sel)))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
+	} else
+		eops->unmask(port);
 
 	put_cpu();
 }
@@ -938,7 +957,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
 	int port, rc = -ENOENT;
 
 	memset(&status, 0, sizeof(status));
-	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+	for (port = 0; port <= xen_nr_event_channels; port++) {
 		status.dom = DOMID_SELF;
 		status.port = port;
 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -1163,7 +1182,7 @@ int evtchn_get(unsigned int evtchn)
 	struct irq_info *info;
 	int err = -ENOENT;
 
-	if (evtchn >= NR_EVENT_CHANNELS)
+	if (evtchn >= xen_nr_event_channels)
 		return -EINVAL;
 
 	mutex_lock(&irq_mapping_update_lock);
@@ -1208,13 +1227,12 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 {
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int cpu = smp_processor_id();
-	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
-	int i;
-	unsigned long flags;
+	irqreturn_t rc;
 	static DEFINE_SPINLOCK(debug_lock);
+	unsigned long flags;
+	int cpu = smp_processor_id();
 	struct vcpu_info *v;
+	int i;
 
 	spin_lock_irqsave(&debug_lock, flags);
 
@@ -1228,65 +1246,80 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 			: v->evtchn_upcall_mask;
 		printk(KERN_DEBUG "%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
 		       pending, v->evtchn_upcall_pending,
-		       (int)(sizeof(v->evtchn_pending_sel)*2),
-		       v->evtchn_pending_sel);
+		       (int)(sizeof(*per_cpu(evtchn_sel, cpu)[0])*2),
+		       *per_cpu(evtchn_sel, cpu)[0]);
 	}
+
+	rc = eops->debug_interrupt(irq, dev_id);
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+	return rc;
+}
+
+static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	int i;
+	unsigned long nr_elems = NR_EVENT_CHANNELS_L2 / BITS_PER_EVTCHN_WORD;
+	struct vcpu_info *v;
+
 	v = per_cpu(xen_vcpu, cpu);
 
 	printk(KERN_DEBUG "\npending:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+	for (i = nr_elems; i >= 0; i--)
 		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
-		       (int)sizeof(sh->evtchn_pending[0])*2,
-		       sh->evtchn_pending[i],
+		       (int)sizeof(evtchn_pending[0])*2,
+		       evtchn_pending[i],
 		       i % 8 == 0 ? "\n   " : " ");
 	printk(KERN_DEBUG "\nglobal mask:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+	for (i = nr_elems; i >= 0; i--)
 		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
-		       sh->evtchn_mask[i],
+		       (int)(sizeof(evtchn_mask[0])*2),
+		       evtchn_mask[i],
 		       i % 8 == 0 ? "\n   " : " ");
 
 	printk(KERN_DEBUG "\nglobally unmasked:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+	for (i = nr_elems; i >= 0; i--)
 		printk("%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
-		       sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+		       (int)(sizeof(evtchn_mask[0])*2),
+		       evtchn_pending[i] & ~evtchn_mask[i],
 		       i % 8 == 0 ? "\n   " : " ");
 
 	printk("\nlocal cpu%d mask:\n   ", cpu);
-	for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
-		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+	for (i = (NR_EVENT_CHANNELS_L2/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(cpu_evtchn[0])*2),
 		       cpu_evtchn[i],
 		       i % 8 == 0 ? "\n   " : " ");
 
 	printk(KERN_DEBUG "\nlocally unmasked:\n   ");
-	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
-		xen_ulong_t pending = sh->evtchn_pending[i]
-			& ~sh->evtchn_mask[i]
+	for (i = nr_elems-1; i >= 0; i--) {
+		xen_ulong_t pending = evtchn_pending[i]
+			& ~evtchn_mask[i]
 			& cpu_evtchn[i];
 		printk(KERN_DEBUG "%0*"PRI_xen_ulong"%s",
-		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       (int)(sizeof(evtchn_mask[0])*2),
 		       pending, i % 8 == 0 ? "\n   " : " ");
 	}
 
 	printk(KERN_DEBUG "\npending list:\n");
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+	for (i = 0; i < NR_EVENT_CHANNELS_L2; i++) {
+		if (sync_test_bit(i, BM(evtchn_pending))) {
 			int word_idx = i / BITS_PER_EVTCHN_WORD;
 			printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s\n",
 			       cpu_from_evtchn(i), i,
 			       evtchn_to_irq[i],
-			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+			       sync_test_bit(word_idx,
+				             BM(per_cpu(evtchn_sel, cpu)[0]))
 					     ? "" : " l1-clear",
-			       !sync_test_bit(i, BM(sh->evtchn_mask))
+			       !sync_test_bit(i, BM(evtchn_mask))
 					     ? "" : " globally-masked",
 			       sync_test_bit(i, BM(cpu_evtchn))
 					     ? "" : " locally-masked");
 		}
 	}
 
-	spin_unlock_irqrestore(&debug_lock, flags);
-
 	return IRQ_HANDLED;
 }
 
@@ -1308,13 +1341,12 @@ static DEFINE_PER_CPU(unsigned int, current_bit_idx);
  * a bitset of words which contain pending event bits.  The second
  * level is a bitset of pending events themselves.
  */
-static void __xen_evtchn_do_upcall(void)
+static void __xen_evtchn_do_upcall_l2(void)
 {
 	int start_word_idx, start_bit_idx;
 	int word_idx, bit_idx;
 	int i;
 	int cpu = get_cpu();
-	struct shared_info *s = HYPERVISOR_shared_info;
 	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
 	unsigned count;
 
@@ -1331,7 +1363,7 @@ static void __xen_evtchn_do_upcall(void)
 		 * selector flag. xchg_xen_ulong must contain an
 		 * appropriate barrier.
 		 */
-		pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+		pending_words = xchg_xen_ulong(per_cpu(evtchn_sel, cpu)[0], 0);
 
 		start_word_idx = __this_cpu_read(current_word_idx);
 		start_bit_idx = __this_cpu_read(current_bit_idx);
@@ -1354,7 +1386,7 @@ static void __xen_evtchn_do_upcall(void)
 			}
 			word_idx = EVTCHN_FIRST_BIT(words);
 
-			pending_bits = active_evtchns(cpu, s, word_idx);
+			pending_bits = active_evtchns(cpu, word_idx);
 			bit_idx = 0; /* usually scan entire word from start */
 			if (word_idx == start_word_idx) {
 				/* We scan the starting word in two parts */
@@ -1425,7 +1457,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 	exit_idle();
 #endif
 
-	__xen_evtchn_do_upcall();
+	eops->do_upcall();
 
 	irq_exit();
 	set_irq_regs(old_regs);
@@ -1433,7 +1465,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 void xen_hvm_evtchn_do_upcall(void)
 {
-	__xen_evtchn_do_upcall();
+	eops->do_upcall();
 }
 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
 
@@ -1729,14 +1761,14 @@ void xen_irq_resume(void)
 	init_evtchn_cpu_bindings();
 
 	/* New event-channel space is not 'live' yet. */
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+	for (evtchn = 0; evtchn < xen_nr_event_channels; evtchn++)
 		mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
 	list_for_each_entry(info, &xen_irq_list_head, list)
 		info->evtchn = 0; /* zap event-channel binding */
 
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+	for (evtchn = 0; evtchn < xen_nr_event_channels; evtchn++)
 		evtchn_to_irq[evtchn] = -1;
 
 	for_each_possible_cpu(cpu) {
@@ -1829,20 +1861,39 @@ void xen_callback_vector(void)
 void xen_callback_vector(void) {}
 #endif
 
+const struct evtchn_ops evtchn_l2_ops = {
+	.unmask = __unmask_local_port_l2,
+	.debug_interrupt = xen_debug_interrupt_l2,
+	.do_upcall = __xen_evtchn_do_upcall_l2
+};
+
 void __init xen_init_IRQ(void)
 {
 	int i;
+	int cpu;
+	struct shared_info *s = HYPERVISOR_shared_info;
+
+	evtchn_pending = s->evtchn_pending;
+	evtchn_mask = s->evtchn_mask;
+	for_each_possible_cpu(cpu) {
+		struct vcpu_info *vcpu_info = per_cpu(xen_vcpu, cpu);
+		per_cpu(evtchn_sel, cpu)[0] = &vcpu_info->evtchn_pending_sel;
+	}
+
+	xen_evtchn_extended = EVTCHN_EXTENDED_NONE;
+	xen_nr_event_channels = NR_EVENT_CHANNELS_L2;
+	eops = &evtchn_l2_ops;
 
-	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
+	evtchn_to_irq = kcalloc(xen_nr_event_channels, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
 	BUG_ON(!evtchn_to_irq);
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+	for (i = 0; i < xen_nr_event_channels; i++)
 		evtchn_to_irq[i] = -1;
 
 	init_evtchn_cpu_bindings();
 
 	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+	for (i = 0; i < xen_nr_event_channels; i++)
 		mask_evtchn(i);
 
 	pirq_needs_eoi = pirq_needs_eoi_flag;
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b2db77e..ac7a96e 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -232,7 +232,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
 		unsigned port = kbuf[i];
 
-		if (port < NR_EVENT_CHANNELS &&
+		if (port < xen_nr_event_channels &&
 		    get_port_user(port) == u &&
 		    !get_port_enabled(port)) {
 			set_port_enabled(port, true);
@@ -374,7 +374,7 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = -EINVAL;
-		if (unbind.port >= NR_EVENT_CHANNELS)
+		if (unbind.port >= xen_nr_event_channels)
 			break;
 
 		spin_lock_irq(&port_user_lock);
@@ -402,7 +402,7 @@ static long evtchn_ioctl(struct file *file,
 		if (copy_from_user(&notify, uarg, sizeof(notify)))
 			break;
 
-		if (notify.port >= NR_EVENT_CHANNELS) {
+		if (notify.port >= xen_nr_event_channels) {
 			rc = -EINVAL;
 		} else if (get_port_user(notify.port) != u) {
 			rc = -ENOTCONN;
@@ -492,7 +492,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
 
 	free_page((unsigned long)u->ring);
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+	for (i = 0; i < xen_nr_event_channels; i++) {
 		if (get_port_user(i) != u)
 			continue;
 
@@ -501,7 +501,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
 
 	spin_unlock_irq(&port_user_lock);
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+	for (i = 0; i < xen_nr_event_channels; i++) {
 		if (get_port_user(i) != u)
 			continue;
 
@@ -538,7 +538,8 @@ static int __init evtchn_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
+	port_user = kcalloc(xen_nr_event_channels,
+			    sizeof(*port_user), GFP_KERNEL);
 	if (port_user == NULL)
 		return -ENOMEM;
 
diff --git a/include/xen/events.h b/include/xen/events.h
index c6bfe01..24cf421 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -111,4 +111,7 @@ int xen_test_irq_shared(int irq);
 
 /* initialize Xen IRQ subsystem */
 void xen_init_IRQ(void);
+extern unsigned int xen_nr_event_channels;
+extern uint64_t xen_evtchn_extended;
+
 #endif	/* _XEN_EVENTS_H */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 08/14] xen: dynamically allocate cpu_evtchn_mask
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (6 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 07/14] xen: generalized event channel operations Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 09/14] xen: implement 3-level event channel routines Wei Liu
                   ` (5 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

The size of cpu_evtchn_mask can change, use dynamic allocation to cope with
this. To save space, cpu_evtchn_mask is not allocated for offline cpus. It
will get allocated as soon as a cpu goes online.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   57 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 217efb2..ee35ff9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/irqnr.h>
 #include <linux/pci.h>
+#include <linux/cpu.h>
 
 #ifdef CONFIG_X86
 #include <asm/desc.h>
@@ -156,8 +157,7 @@ static bool (*pirq_needs_eoi)(unsigned irq);
 /* Find the first set bit in a evtchn mask */
 #define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
 
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS_L2/BITS_PER_EVTCHN_WORD],
-		      cpu_evtchn_mask);
+static DEFINE_PER_CPU(xen_ulong_t *, cpu_evtchn_mask);
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -356,6 +356,9 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 static void init_evtchn_cpu_bindings(void)
 {
 	int i;
+	unsigned int nr = xen_nr_event_channels / BITS_PER_EVTCHN_WORD;
+	unsigned int nr_bytes = nr * sizeof(xen_ulong_t);
+
 #ifdef CONFIG_SMP
 	struct irq_info *info;
 
@@ -366,9 +369,9 @@ static void init_evtchn_cpu_bindings(void)
 	}
 #endif
 
-	for_each_possible_cpu(i)
+	for_each_online_cpu(i)
 		memset(per_cpu(cpu_evtchn_mask, i),
-		       (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
+		       (i == 0) ? ~0 : 0, nr_bytes);
 }
 
 static inline void clear_evtchn(int port)
@@ -1867,6 +1870,41 @@ const struct evtchn_ops evtchn_l2_ops = {
 	.do_upcall = __xen_evtchn_do_upcall_l2
 };
 
+static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
+					    unsigned long action,
+					    void *hcpu)
+{
+	int cpu = (long)hcpu;
+	int rc = NOTIFY_OK;
+	void *p;
+	unsigned int nr = xen_nr_event_channels / BITS_PER_EVTCHN_WORD;
+	unsigned int nr_bytes = nr * sizeof(xen_ulong_t);
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (!per_cpu(cpu_evtchn_mask, cpu)) {
+			p = kzalloc_node(sizeof(xen_ulong_t) * nr,
+					 GFP_KERNEL, cpu_to_node(cpu));
+			if (!p)
+				rc = NOTIFY_BAD;
+			else {
+				per_cpu(cpu_evtchn_mask, cpu) = p;
+				memset(per_cpu(cpu_evtchn_mask, cpu),
+				       (cpu == 0) ? ~0 : 0, nr_bytes);
+				rc = NOTIFY_OK;
+			}
+		}
+		break;
+	default:
+		break;
+	}
+	return rc;
+}
+
+static struct notifier_block xen_events_notifier __cpuinitdata = {
+	.notifier_call = xen_events_notifier_cb,
+};
+
 void __init xen_init_IRQ(void)
 {
 	int i;
@@ -1890,6 +1928,17 @@ void __init xen_init_IRQ(void)
 	for (i = 0; i < xen_nr_event_channels; i++)
 		evtchn_to_irq[i] = -1;
 
+	for_each_online_cpu(cpu) {
+		void *p;
+		unsigned int nr = xen_nr_event_channels / BITS_PER_EVTCHN_WORD;
+
+		p = kzalloc_node(sizeof(xen_ulong_t) * nr,
+				 GFP_KERNEL, cpu_to_node(cpu));
+		BUG_ON(!p);
+		per_cpu(cpu_evtchn_mask, cpu) = p;
+	}
+	register_cpu_notifier(&xen_events_notifier);
+
 	init_evtchn_cpu_bindings();
 
 	/* No event channels are 'live' right now. */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 09/14] xen: implement 3-level event channel routines
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (7 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 08/14] xen: dynamically allocate cpu_evtchn_mask Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 10/14] xen: document 2/3-level event channel ABI Wei Liu
                   ` (4 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Implement several routines for 3-level event channel ABI. Some routines are
shared between 2/3-level ABIs.

For N-level (now only 2 and 3) event channel ABIs, the active events are
processed in a top-down approach, i.e. L1 -> L2 -> .. -> L(n-1) -> bitmap. The
selectors are processed recursively, the event bitmap is processed by a
dedicated function called process_port.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  376 +++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 293 insertions(+), 83 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ee35ff9..fe1831b 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -76,7 +76,12 @@ static const struct evtchn_ops *eops;
 static xen_ulong_t *evtchn_pending;
 static xen_ulong_t *evtchn_mask;
 /* The following per-cpu var points to selector(s). */
-static DEFINE_PER_CPU(xen_ulong_t *[1], evtchn_sel);
+static DEFINE_PER_CPU(xen_ulong_t *[2], evtchn_sel);
+/*
+ * 2nd level selector for 3-level event channel, '8' stands for 8 bits
+ * per byte.
+ */
+static DEFINE_PER_CPU(xen_ulong_t [sizeof(xen_ulong_t) * 8], evtchn_sel_l2);
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -150,6 +155,11 @@ static bool (*pirq_needs_eoi)(unsigned irq);
  */
 #define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
 /*
+ * If xen_ulong_t is 8 byte, it's 64 bits wide, 2^6 == 64, otherwise
+ * it is 32 bits, 2^5 == 32
+ */
+#define EVTCHN_WORD_BITORDER (sizeof(xen_ulong_t) == 8 ? 6 : 5)
+/*
  * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
  * array. Primarily to avoid long lines (hence the terse name).
  */
@@ -435,6 +445,29 @@ static inline void __unmask_local_port_l2(int port)
 		vcpu_info->evtchn_upcall_pending = 1;
 }
 
+static inline void __unmask_local_port_l3(int port)
+{
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	int cpu = smp_processor_id();
+	unsigned int l1bit = port >> (EVTCHN_WORD_BITORDER << 1);
+	unsigned int l2bit = port >> EVTCHN_WORD_BITORDER;
+
+	sync_clear_bit(port, BM(&evtchn_mask[0]));
+
+	/*
+	 * The following is basically the equivalent of
+	 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+	 * the interrupt edge' if the channel is masked.
+	 */
+	if (sync_test_bit(port, BM(&evtchn_pending[0])) &&
+	    !sync_test_and_set_bit(l2bit,
+				   BM(per_cpu(evtchn_sel, cpu)[1])) &&
+	    !sync_test_and_set_bit(l1bit,
+				   BM(per_cpu(evtchn_sel, cpu)[0])))
+		vcpu_info->evtchn_upcall_pending = 1;
+
+}
+
 static void unmask_evtchn(int port)
 {
 	unsigned int cpu = get_cpu();
@@ -1326,119 +1359,254 @@ static irqreturn_t xen_debug_interrupt_l2(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xen_debug_interrupt_l3(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	unsigned long nr_elems = NR_EVENT_CHANNELS_L3 / BITS_PER_EVTCHN_WORD;
+	int i;
+	struct vcpu_info *v;
+
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk(KERN_DEBUG "\npending (only show words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if (evtchn_pending[i] != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_pending[0])*2),
+			       evtchn_pending[i]);
+		}
+
+	printk(KERN_DEBUG "\nglobal mask (only show words which have bits set to 0):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if (evtchn_mask[i] != ~0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_mask[i]);
+		}
+
+	printk(KERN_DEBUG "\nglobally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--)
+		if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_pending[i] & ~evtchn_mask[i]);
+		}
+
+	printk(KERN_DEBUG "\nlocal cpu%d mask (only show words which have bits set to 1):\n   ", cpu);
+	for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+		if (cpu_evtchn[i] != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(cpu_evtchn[0])*2),
+			       cpu_evtchn[i]);
+		}
+
+	printk(KERN_DEBUG "\nlocally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = nr_elems-1; i >= 0; i--) {
+		xen_ulong_t pending = evtchn_pending[i]
+			& ~evtchn_mask[i]
+			& cpu_evtchn[i];
+		if (pending != 0UL) {
+			printk(KERN_DEBUG " word index %d %0*"PRI_xen_ulong"\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       pending);
+		}
+	}
+
+	printk(KERN_DEBUG "\npending list:\n");
+	for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+		if (sync_test_bit(i, evtchn_pending)) {
+			int word_idx = i / (BITS_PER_EVTCHN_WORD * BITS_PER_EVTCHN_WORD);
+			int word_idx_l2 = i / BITS_PER_EVTCHN_WORD;
+			printk(KERN_DEBUG "  %d: event %d -> irq %d%s%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       evtchn_to_irq[i],
+			       !sync_test_bit(word_idx, BM(per_cpu(evtchn_sel, cpu)[0]))
+			       ? "" : " l1-clear",
+			       !sync_test_bit(word_idx_l2, BM(per_cpu(evtchn_sel, cpu)[1]))
+			       ? "" : " l2-clear",
+			       sync_test_bit(i, BM(evtchn_mask))
+			       ? "" : " globally-masked",
+			       sync_test_bit(i, BM(cpu_evtchn))
+			       ? "" : " locally-masked");
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+static DEFINE_PER_CPU(unsigned int[3], current_idx);
 
 /*
  * Mask out the i least significant bits of w
  */
 #define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
 
+static __always_inline void process_port(int cpu,
+					 unsigned int base,
+					 unsigned int *idx,
+					 unsigned int *idx_array)
+{
+	xen_ulong_t pending_bits, bits;
+	int port, irq;
+	struct irq_desc *desc;
+
+	pending_bits = active_evtchns(cpu, base >> EVTCHN_WORD_BITORDER);
+
+	do {
+		bits = MASK_LSBS(pending_bits, *idx);
+
+		/* If we masked out all events, move on. */
+		if (bits == 0)
+			break;
+
+		*idx = EVTCHN_FIRST_BIT(bits);
+
+		/* Process port. */
+		port = base + *idx;
+		irq = evtchn_to_irq[port];
+
+		if (irq != -1) {
+			desc = irq_to_desc(irq);
+			if (desc)
+				generic_handle_irq_desc(irq, desc);
+		}
+
+		*idx = (*idx + 1) % BITS_PER_EVTCHN_WORD;
+
+		/* Next caller starts at last processed + 1 */
+		/*
+		 * As this routine is shared by 2/3-level event
+		 * channel, we need to write all three current_idx
+		 * elements. In the 2-level case, the caller /should/
+		 * always set idx_array[2] to ~0U, so in practice the
+		 * write to current_idx[1] is equivalent to writing
+		 * idx_array[1].
+		 */
+		__this_cpu_write(current_idx[0],
+				 idx_array[1] ? idx_array[0] :
+				 (idx_array[0]+1) % BITS_PER_EVTCHN_WORD);
+		__this_cpu_write(current_idx[1],
+				 idx_array[2] ? idx_array[1] :
+				 (idx_array[1]+1) % BITS_PER_EVTCHN_WORD);
+		__this_cpu_write(current_idx[2], idx_array[2]);
+	} while (*idx != 0);
+}
+
 /*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
+ * This function process active event channel top-down, L1 -> L2 ->
+ * .. -> L(n-1) -> bitmap. The selectors are processed recursively,
+ * the event bitmap is processed by process_port
  *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
+ * @cpu: current cpu id
+ * @base: accumulated offsets along selector processing
+ * @start_idx: array used to resume index
+ * @idx: array of current processing index
+ * @sel_idx: selector word index
+ * @level: current processing level, from 0 to highest_level
+ * @highest_level: highest recursion level
+ *
+ * If level == higest_level, we reach the event bitmap.  level
+ * variable starts from 0, so highest_level for 2-level ABI is 1,
+ * while for 3-level ABI it is 2.
  */
-static void __xen_evtchn_do_upcall_l2(void)
+static void process(int cpu,
+		    unsigned int base,
+		    unsigned int *start_idx,
+		    unsigned int *idx,
+		    unsigned int sel_idx,
+		    unsigned short level,
+		    unsigned short highest_level)
 {
-	int start_word_idx, start_bit_idx;
-	int word_idx, bit_idx;
 	int i;
-	int cpu = get_cpu();
-	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-	unsigned count;
+	xen_ulong_t pending_words;
 
-	do {
-		xen_ulong_t pending_words;
+	if (level == highest_level) {
+		process_port(cpu, base, &idx[level], idx);
+		return;
+	}
 
-		vcpu_info->evtchn_upcall_pending = 0;
+	pending_words =
+		xchg_xen_ulong(&per_cpu(evtchn_sel, cpu)[level][sel_idx], 0);
 
-		if (__this_cpu_inc_return(xed_nesting_count) - 1)
-			goto out;
+	/* This loop is used to process selectors. */
+	for (i = 0; pending_words != 0; i++) {
+		xen_ulong_t words;
+		unsigned int saved_base;
+
+		words = MASK_LSBS(pending_words, idx[level]);
 
 		/*
-		 * Master flag must be cleared /before/ clearing
-		 * selector flag. xchg_xen_ulong must contain an
-		 * appropriate barrier.
+		 * If we masked out all events, wrap to beginning.
 		 */
-		pending_words = xchg_xen_ulong(per_cpu(evtchn_sel, cpu)[0], 0);
-
-		start_word_idx = __this_cpu_read(current_word_idx);
-		start_bit_idx = __this_cpu_read(current_bit_idx);
-
-		word_idx = start_word_idx;
+		if (words == 0) {
+			idx[level] = 0;
+			start_idx[level+1] = 0;
+			continue;
+		}
 
-		for (i = 0; pending_words != 0; i++) {
-			xen_ulong_t pending_bits;
-			xen_ulong_t words;
+		idx[level] = EVTCHN_FIRST_BIT(words);
+
+		idx[level+1] = 0; /* usually scan entire word from start */
+		if (idx[level] == start_idx[level]) {
+			/* We scan the starting word in two parts */
+			if (i == 0)
+				/* 1st time: start in the middle */
+				idx[level+1] = start_idx[level+1];
+			else
+				/* 2nd time: mask bits done already */
+				idx[level+1] &= (1UL << start_idx[level+1]) - 1;
+		}
 
-			words = MASK_LSBS(pending_words, word_idx);
+		saved_base = base;
+		base += (idx[level] <<
+			 (EVTCHN_WORD_BITORDER * (highest_level-level)));
 
-			/*
-			 * If we masked out all events, wrap to beginning.
-			 */
-			if (words == 0) {
-				word_idx = 0;
-				bit_idx = 0;
-				continue;
-			}
-			word_idx = EVTCHN_FIRST_BIT(words);
-
-			pending_bits = active_evtchns(cpu, word_idx);
-			bit_idx = 0; /* usually scan entire word from start */
-			if (word_idx == start_word_idx) {
-				/* We scan the starting word in two parts */
-				if (i == 0)
-					/* 1st time: start in the middle */
-					bit_idx = start_bit_idx;
-				else
-					/* 2nd time: mask bits done already */
-					bit_idx &= (1UL << start_bit_idx) - 1;
-			}
+		process(cpu, base, start_idx, idx, idx[level],
+			level+1, highest_level);
 
-			do {
-				xen_ulong_t bits;
-				int port, irq;
-				struct irq_desc *desc;
+		base = saved_base;
 
-				bits = MASK_LSBS(pending_bits, bit_idx);
+		/* Scan start_l1i twice; all others once. */
+		if ((idx[level] != start_idx[level]) || (i != 0))
+			pending_words &= ~(1UL << idx[level]);
 
-				/* If we masked out all events, move on. */
-				if (bits == 0)
-					break;
+		idx[level] = (idx[level] + 1) % BITS_PER_EVTCHN_WORD;
+	}
+}
 
-				bit_idx = EVTCHN_FIRST_BIT(bits);
 
-				/* Process port. */
-				port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
-				irq = evtchn_to_irq[port];
+/* This routine is shared between 2/3-level ABI */
+static void ___xen_evtchn_do_upcall(unsigned int *start_idx,
+				    unsigned int *idx,
+				    unsigned short highest_level)
+{
+	int cpu = get_cpu();
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	unsigned count;
 
-				if (irq != -1) {
-					desc = irq_to_desc(irq);
-					if (desc)
-						generic_handle_irq_desc(irq, desc);
-				}
+	do {
+		vcpu_info->evtchn_upcall_pending = 0;
 
-				bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+		if (__this_cpu_inc_return(xed_nesting_count) - 1)
+			goto out;
 
-				/* Next caller starts at last processed + 1 */
-				__this_cpu_write(current_word_idx,
-						 bit_idx ? word_idx :
-						 (word_idx+1) % BITS_PER_EVTCHN_WORD);
-				__this_cpu_write(current_bit_idx, bit_idx);
-			} while (bit_idx != 0);
+		start_idx[0] = __this_cpu_read(current_idx[0]);
+		start_idx[1] = __this_cpu_read(current_idx[1]);
+		start_idx[2] = __this_cpu_read(current_idx[2]);
 
-			/* Scan start_l1i twice; all others once. */
-			if ((word_idx != start_word_idx) || (i != 0))
-				pending_words &= ~(1UL << word_idx);
+		idx[0] = start_idx[0];
 
-			word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
-		}
+		process(cpu, 0 /* base */, start_idx, idx,
+			0 /* selector index */,
+			0 /* starting from L1 (1-1=0) */,
+			highest_level);
 
 		BUG_ON(!irqs_disabled());
 
@@ -1451,6 +1619,42 @@ out:
 	put_cpu();
 }
 
+/*
+ * Search the CPUs pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for
+ * handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+static void __xen_evtchn_do_upcall_l2(void)
+{
+	/*
+	 * Need three elements to feed into __process_port, but the
+	 * third element is never used for 2-level ABI and should
+	 * always be set to ~0U.
+	 */
+	unsigned int start_idx[3] = { 0, 0, ~0U };
+	unsigned int idx[3] = { 0, 0, ~0U };
+
+	___xen_evtchn_do_upcall(start_idx, idx, 1);
+}
+
+static void __xen_evtchn_do_upcall_l3(void)
+{
+	/*
+	 * Need three elements to feed into __process_port, but the
+	 * third element is never used for 2-level ABI and should
+	 * always be set to ~0U.
+	 */
+	unsigned int start_idx[3] = { 0, 0, 0 };
+	unsigned int idx[3] = { 0, 0, 0 };
+
+	___xen_evtchn_do_upcall(start_idx, idx, 2);
+
+}
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1870,6 +2074,12 @@ const struct evtchn_ops evtchn_l2_ops = {
 	.do_upcall = __xen_evtchn_do_upcall_l2
 };
 
+const struct evtchn_ops evtchn_l3_ops = {
+	.unmask = __unmask_local_port_l3,
+	.debug_interrupt = xen_debug_interrupt_l3,
+	.do_upcall = __xen_evtchn_do_upcall_l3
+};
+
 static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
 					    unsigned long action,
 					    void *hcpu)
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 10/14] xen: document 2/3-level event channel ABI
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (8 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 09/14] xen: implement 3-level event channel routines Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 11/14] xen: introduce xen_event_channel_query_extended_abis Wei Liu
                   ` (3 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index fe1831b..ee33421 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -57,6 +57,47 @@
 #include <xen/interface/sched.h>
 #include <asm/hw_irq.h>
 
+/*
+ * The 2-level (default) event channel ABI:
+ *
+ * This is the default ABI, it is guaranteed to be supported. The name
+ * comes from its 2-level lookup path.
+ *
+ * The first level is a per-cpu selector in struct vcpu_info. The size
+ * of L1 selector is sizeof(xen_ulong_t), in which each bit represents
+ * a xen_ulong_t word in the event bitmap (second level).
+ *
+ * The second level is a shared bitmap of events, embedded in shared
+ * info page.
+ *
+ * The lookup path is as followed. We first look at each bit of the L1
+ * selector. A non-zero bit in L1 selector indicates one or more bits
+ * in the corresponding word in L2 bitmap is / are set. In this case
+ * we pick up the word in bitmap, process each non-zero bit in the
+ * word and process the event.
+ *
+ *
+ * The 3-level event channel ABI:
+ *
+ * This ABI is more or less the same as the 2-level ABI. In this ABI:
+ *
+ * The first level is a per-cpu selector in struct vcpu_info. In fact,
+ * we reuse the same selector in 2-level ABI.
+ *
+ * The second level is a per-cpu bitmap of xen_ulong_t words, whose
+ * size is the same as the second level bitmap in 2-level ABI. However
+ * we cannot reuse the same bitmap in shared info page because this
+ * bitmap is per-cpu.
+ *
+ * The third level is a shared bitmap of events, which is allocated at
+ * boot time by Linux kernel.
+ *
+ * The lookup path is as followed. The first two levels lookup is the
+ * same as the 2-level ABI, but after picking up the non-zero bit in
+ * L2 selector, we still need to go down one level furthur for the
+ * actual event bit.
+ */
+
 /* extended event channel ABI in use, default is EVTCHN_EXTENDED_NONE */
 uint64_t xen_evtchn_extended = EVTCHN_EXTENDED_NONE;
 EXPORT_SYMBOL_GPL(xen_evtchn_extended);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 11/14] xen: introduce xen_event_channel_query_extended_abis
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (9 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 10/14] xen: document 2/3-level event channel ABI Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 12/14] xen: introduce xen_event_channel_register_3level Wei Liu
                   ` (2 subsequent siblings)
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   37 +++++++++++++++++++++++++++++++++++++
 include/xen/events.h |    3 +++
 2 files changed, 40 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ee33421..270821d 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2109,6 +2109,43 @@ void xen_callback_vector(void)
 void xen_callback_vector(void) {}
 #endif
 
+/*
+ * This function returns the extended AIBs a guest can use.
+ * When
+ *  1) hypervisor doesn't support extended ABIs (EVTCHNOP_* not implemented)
+ *  2) hypervisor supports extended ABIs but this guest cannot use them
+ * it returns EVTCHN_EXTENDED_NONE
+ * otherwise it returns a or'ed bitmap of enabled ABIs
+ */
+uint64_t xen_event_channel_query_extended_abis(void)
+{
+	struct evtchn_query_extended_abis query;
+	int rc;
+
+	memset(&query, 0, sizeof(query));
+
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_query_extended_abis, &query);
+
+	if (rc < 0) {
+		printk(KERN_INFO
+		       "Hypervisor does not support extended event channel ABIs.");
+		return EVTCHN_EXTENDED_NONE;
+	}
+
+	printk(KERN_INFO "Hypervisor supports extended event channel ABIs.\n");
+
+	printk(KERN_INFO
+	       "Extended event channel AIBs enabled for this guest:\n");
+	if (query.abis == EVTCHN_EXTENDED_NONE /* 0 */)
+		printk(KERN_INFO "  None (disabled by host administrator)\n");
+	else {
+		if (query.abis & EVTCHN_EXTENDED_L3)
+			printk(KERN_INFO "  3-level event channel ABI\n");
+	}
+
+	return query.abis;
+}
+
 const struct evtchn_ops evtchn_l2_ops = {
 	.unmask = __unmask_local_port_l2,
 	.debug_interrupt = xen_debug_interrupt_l2,
diff --git a/include/xen/events.h b/include/xen/events.h
index 24cf421..49d54ac 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -114,4 +114,7 @@ void xen_init_IRQ(void);
 extern unsigned int xen_nr_event_channels;
 extern uint64_t xen_evtchn_extended;
 
+/* Query hypervisor for supported / enabled extended event channel ABIs. */
+uint64_t xen_event_channel_query_extended_abis(void);
+
 #endif	/* _XEN_EVENTS_H */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 12/14] xen: introduce xen_event_channel_register_3level
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (10 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 11/14] xen: introduce xen_event_channel_query_extended_abis Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 13/14] xen: introduce xen_event_channel_register_extended Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 14/14] xen: register 3-level event channel Wei Liu
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  172 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 161 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 270821d..6bb9a47 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -201,6 +201,16 @@ static bool (*pirq_needs_eoi)(unsigned irq);
  */
 #define EVTCHN_WORD_BITORDER (sizeof(xen_ulong_t) == 8 ? 6 : 5)
 /*
+ * If we use 3-level event channel and the event word size is 64 bits, we have
+ * 256k event channels in total, for 32 bits, we have 32k event channels in
+ * total. A page (4K) can represent 4096 * 8 = 32k event channels. So we can
+ * calculate pages needed for 3-level event channels is 1 page for 32 bits and
+ * 8 pages for 64 bits.
+ */
+#define BITMAP_PG_ORDER (BITS_PER_EVTCHN_WORD == 64 ? 3 : 0)
+#define BITMAP_NR_PAGES (BITMAP_PG_ORDER == 3 ? 8 : 1)
+
+/*
  * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
  * array. Primarily to avoid long lines (hence the terse name).
  */
@@ -2146,6 +2156,115 @@ uint64_t xen_event_channel_query_extended_abis(void)
 	return query.abis;
 }
 
+static int xen_event_channel_register_3level_bitmaps(void)
+{
+	struct evtchn_register_3level reg;
+	int i;
+	int rc;
+	xen_ulong_t _evtchn_pending[EVTCHN_MAX_L3_PAGES];
+	xen_ulong_t _evtchn_mask[EVTCHN_MAX_L3_PAGES];
+
+	/*
+	 * can only register 3-level ABI in following states:
+	 * a) no extended ABIs in use
+	 * b) come from restore path which already has ABI set and
+	 *    pages allocated
+	 */
+	if (!(xen_evtchn_extended == EVTCHN_EXTENDED_NONE ||
+	      (xen_evtchn_extended == EVTCHN_EXTENDED_L3 &&
+	       evtchn_pending && evtchn_pending)))
+		return -EINVAL;
+
+	/*
+	 * If we come from restore path, we don't need to allocate
+	 * pages.
+	 */
+	if (!evtchn_pending && !evtchn_mask) {
+		/* Get zeroed pages */
+		evtchn_pending =
+			(xen_ulong_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+							BITMAP_PG_ORDER);
+		evtchn_mask =
+			(xen_ulong_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+							BITMAP_PG_ORDER);
+		if (!evtchn_pending || !evtchn_mask) {
+			free_pages((unsigned long)evtchn_pending, BITMAP_PG_ORDER);
+			free_pages((unsigned long)evtchn_mask, BITMAP_PG_ORDER);
+			evtchn_pending = NULL;
+			evtchn_mask = NULL;
+			rc = -ENOMEM;
+			goto err;
+		}
+	}
+
+	memset(&reg, 0, sizeof(reg));
+
+	for (i = 0; i < BITMAP_NR_PAGES; i++) {
+		unsigned long offset = PAGE_SIZE * i;
+		_evtchn_pending[i] =
+			arbitrary_virt_to_mfn(
+				(void *)((unsigned long)evtchn_pending+offset));
+		_evtchn_mask[i] =
+			arbitrary_virt_to_mfn(
+				(void *)((unsigned long)evtchn_mask+offset));
+	}
+
+	reg.cmd = REGISTER_BITMAPS;
+	reg.u.bitmaps.nr_pages = BITMAP_NR_PAGES;
+	reg.u.bitmaps.evtchn_pending = _evtchn_pending;
+	reg.u.bitmaps.evtchn_mask = _evtchn_mask;
+
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_3level, &reg);
+	if (rc) {
+		free_pages((unsigned long)evtchn_pending, BITMAP_PG_ORDER);
+		free_pages((unsigned long)evtchn_mask, BITMAP_PG_ORDER);
+		evtchn_pending = NULL;
+		evtchn_mask = NULL;
+	}
+
+err:
+	return rc;
+}
+
+int xen_event_channel_register_3level_l2selector(int cpu)
+{
+	struct evtchn_register_3level reg;
+	int rc;
+
+	memset(&reg, 0, sizeof(reg));
+
+	reg.cmd = REGISTER_L2_SELECTOR;
+
+	reg.u.l2_selector.cpu_id = cpu;
+	reg.u.l2_selector.mfn =
+		arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu));
+	reg.u.l2_selector.offset =
+		offset_in_page(&per_cpu(evtchn_sel_l2, cpu));
+
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_3level, &reg);
+
+	if (rc == -EBUSY) /* already registered, this can happen in hotplug */
+		return 0;
+
+	if (!rc)
+		per_cpu(evtchn_sel, cpu)[1] = per_cpu(evtchn_sel_l2, cpu);
+
+	return rc;
+}
+
+static int xen_event_channel_register_3level(void)
+{
+	int rc;
+
+	rc = xen_event_channel_register_3level_bitmaps();
+	if (rc)
+		return rc;
+
+	rc = xen_event_channel_register_3level_l2selector(0);
+
+	return rc;
+}
+
 const struct evtchn_ops evtchn_l2_ops = {
 	.unmask = __unmask_local_port_l2,
 	.debug_interrupt = xen_debug_interrupt_l2,
@@ -2158,6 +2277,47 @@ const struct evtchn_ops evtchn_l3_ops = {
 	.do_upcall = __xen_evtchn_do_upcall_l3
 };
 
+void xen_set_event_channel_extended(uint64_t abi)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	int cpu;
+
+	switch (abi) {
+	case EVTCHN_EXTENDED_NONE:
+		evtchn_pending = s->evtchn_pending;
+		evtchn_mask = s->evtchn_mask;
+		for_each_possible_cpu(cpu) {
+			struct vcpu_info *vcpu_info = per_cpu(xen_vcpu, cpu);
+			per_cpu(evtchn_sel, cpu)[0] =
+				&vcpu_info->evtchn_pending_sel;
+		}
+		xen_evtchn_extended = EVTCHN_EXTENDED_NONE;
+		xen_nr_event_channels = NR_EVENT_CHANNELS_L2;
+		eops = &evtchn_l2_ops;
+		printk(KERN_INFO "Using 2-level event channel ABI.\n");
+		break;
+	case EVTCHN_EXTENDED_L3:
+		/* evtchn_pending/mask already set */
+		for_each_possible_cpu(cpu) {
+			struct vcpu_info *vcpu_info = per_cpu(xen_vcpu, cpu);
+			per_cpu(evtchn_sel, cpu)[0] =
+				&vcpu_info->evtchn_pending_sel;
+			per_cpu(evtchn_sel, cpu)[1] =
+				per_cpu(evtchn_sel_l2, cpu);
+		}
+		xen_evtchn_extended = EVTCHN_EXTENDED_L3;
+		xen_nr_event_channels = NR_EVENT_CHANNELS_L3;
+		eops = &evtchn_l3_ops;
+		printk(KERN_INFO "Using 3-level event channel ABI.\n");
+		break;
+	default:
+		printk(KERN_EMERG
+		       "Trying to set unsupported event channel ABI %llx\n",
+		       abi);
+		BUG();
+	}
+}
+
 static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
 					    unsigned long action,
 					    void *hcpu)
@@ -2197,18 +2357,8 @@ void __init xen_init_IRQ(void)
 {
 	int i;
 	int cpu;
-	struct shared_info *s = HYPERVISOR_shared_info;
-
-	evtchn_pending = s->evtchn_pending;
-	evtchn_mask = s->evtchn_mask;
-	for_each_possible_cpu(cpu) {
-		struct vcpu_info *vcpu_info = per_cpu(xen_vcpu, cpu);
-		per_cpu(evtchn_sel, cpu)[0] = &vcpu_info->evtchn_pending_sel;
-	}
 
-	xen_evtchn_extended = EVTCHN_EXTENDED_NONE;
-	xen_nr_event_channels = NR_EVENT_CHANNELS_L2;
-	eops = &evtchn_l2_ops;
+	xen_set_event_channel_extended(EVTCHN_EXTENDED_NONE);
 
 	evtchn_to_irq = kcalloc(xen_nr_event_channels, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 13/14] xen: introduce xen_event_channel_register_extended
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (11 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 12/14] xen: introduce xen_event_channel_register_3level Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  2013-03-19 15:22 ` [RFC PATCH V5 14/14] xen: register 3-level event channel Wei Liu
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   26 ++++++++++++++++++++++++++
 include/xen/events.h |    6 ++++++
 2 files changed, 32 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6bb9a47..6f21f27 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2265,6 +2265,32 @@ static int xen_event_channel_register_3level(void)
 	return rc;
 }
 
+int xen_event_channel_register_extended(uint64_t abi)
+{
+	int rc = -EINVAL;
+
+	switch (abi) {
+	case EVTCHN_EXTENDED_L3:
+		rc = xen_event_channel_register_3level();
+		if (rc == 0)
+			printk(KERN_INFO
+			       "Register 3-level event channel succeed.\n");
+		else
+			printk(KERN_INFO
+			       "Register 3-level event channel failed: %d\n",
+			       rc);
+		break;
+	default:
+		printk(KERN_EMERG
+		       "Trying to register unsupported event channel ABI %llx\n",
+		       abi);
+		BUG();
+	}
+
+	return rc;
+}
+
+
 const struct evtchn_ops evtchn_l2_ops = {
 	.unmask = __unmask_local_port_l2,
 	.debug_interrupt = xen_debug_interrupt_l2,
diff --git a/include/xen/events.h b/include/xen/events.h
index 49d54ac..a6a6024 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -117,4 +117,10 @@ extern uint64_t xen_evtchn_extended;
 /* Query hypervisor for supported / enabled extended event channel ABIs. */
 uint64_t xen_event_channel_query_extended_abis(void);
 
+/* Set extended event channel to "abi". */
+void xen_set_event_channel_extended(uint64_t abi);
+
+/* Register extended event channel. */
+int xen_event_channel_register_extended(uint64_t abi);
+
 #endif	/* _XEN_EVENTS_H */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [RFC PATCH V5 14/14] xen: register 3-level event channel
  2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
                   ` (12 preceding siblings ...)
  2013-03-19 15:22 ` [RFC PATCH V5 13/14] xen: introduce xen_event_channel_register_extended Wei Liu
@ 2013-03-19 15:22 ` Wei Liu
  13 siblings, 0 replies; 15+ messages in thread
From: Wei Liu @ 2013-03-19 15:22 UTC (permalink / raw)
  To: xen-devel, konrad.wilk; +Cc: Wei Liu, ian.campbell, jbeulich, david.vrabel

CPU hotplug is supported.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 arch/x86/xen/enlighten.c |   12 ++++++++++++
 drivers/xen/events.c     |   22 +++++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 3556678..18edf66 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -213,6 +213,18 @@ void xen_vcpu_restore(void)
 		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 			BUG();
 	}
+
+	/*
+	 * If we use any extended event channel ABI, should try to
+	 * re-setup it in restore path. Currently only 3-level ABI is
+	 * implemented, so simplify the code a bit.
+	 */
+	if (xen_evtchn_extended & EVTCHN_EXTENDED_L3) {
+		int rc;
+		rc = xen_event_channel_register_extended(EVTCHN_EXTENDED_L3);
+		if (rc)
+			xen_set_event_channel_extended(EVTCHN_EXTENDED_NONE);
+	}
 }
 
 static void __init xen_banner(void)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6f21f27..b7e5bc1 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2368,6 +2368,11 @@ static int __cpuinit xen_events_notifier_cb(struct notifier_block *self,
 				rc = NOTIFY_OK;
 			}
 		}
+		if (rc == NOTIFY_OK &&
+		    xen_evtchn_extended & EVTCHN_EXTENDED_L3) {
+			rc = xen_event_channel_register_3level_l2selector(cpu);
+			rc = (rc == 0 ? NOTIFY_OK : NOTIFY_BAD);
+		}
 		break;
 	default:
 		break;
@@ -2383,8 +2388,23 @@ void __init xen_init_IRQ(void)
 {
 	int i;
 	int cpu;
+	uint64_t evtchn_ext_abis;
+	int rc, fallback_to_default_evtchn = 0;
+
+	evtchn_ext_abis = xen_event_channel_query_extended_abis();
+
+	if (evtchn_ext_abis == EVTCHN_EXTENDED_NONE)
+		fallback_to_default_evtchn = 1;
+	else if (evtchn_ext_abis & EVTCHN_EXTENDED_L3) {
+		rc = xen_event_channel_register_extended(EVTCHN_EXTENDED_L3);
+		if (rc == 0)
+			xen_set_event_channel_extended(EVTCHN_EXTENDED_L3);
+		else
+			fallback_to_default_evtchn = 1;
+	}
 
-	xen_set_event_channel_extended(EVTCHN_EXTENDED_NONE);
+	if (fallback_to_default_evtchn)
+		xen_set_event_channel_extended(EVTCHN_EXTENDED_NONE);
 
 	evtchn_to_irq = kcalloc(xen_nr_event_channels, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2013-03-19 15:22 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-19 15:21 [RFC PATCH V5] Implement 3-level event channel ABI in Linux Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 01/14] xen: remove typedef in event_channel.h Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 02/14] xen: add KERN_DEBUG in printk Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 03/14] xen: fix output of xen_debug_interrupt Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 04/14] xen: sync public headers Wei Liu
2013-03-19 15:21 ` [RFC PATCH V5 05/14] xen: introduce test_and_set_mask Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 06/14] xen: replace raw bit ops with functions Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 07/14] xen: generalized event channel operations Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 08/14] xen: dynamically allocate cpu_evtchn_mask Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 09/14] xen: implement 3-level event channel routines Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 10/14] xen: document 2/3-level event channel ABI Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 11/14] xen: introduce xen_event_channel_query_extended_abis Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 12/14] xen: introduce xen_event_channel_register_3level Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 13/14] xen: introduce xen_event_channel_register_extended Wei Liu
2013-03-19 15:22 ` [RFC PATCH V5 14/14] xen: register 3-level event channel Wei Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.