All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH V2 0/8] Implement 3-level event channel in Linux
@ 2013-01-21 14:58 Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt Wei Liu
                   ` (7 more replies)
  0 siblings, 8 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, ian.campbell, jbeulich, konrad.wilk

This is version 2 of the patch series.

Change from V1 (apart from normal fixes and cleanups):
* New debug interrupt handle, avoid floodint log with useless output
* Register 3-level event channels in different locations, please see last
  changeset


Thanks
Wei.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 2/8] xen: sync public headers Wei Liu
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Three things are fixed:
 a) the test result of globaly masked event;
 b) make the per-cpu selector L1 to be consistent with description in
    __xen_evtchn_do_upcall's comment;
 c) the test result of L1 selector.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7595581..dadeea4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1231,9 +1231,9 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 			printk("  %d: event %d -> irq %d%s%s%s\n",
 			       cpu_from_evtchn(i), i,
 			       evtchn_to_irq[i],
-			       sync_test_bit(word_idx, &v->evtchn_pending_sel)
-					     ? "" : " l2-clear",
-			       !sync_test_bit(i, sh->evtchn_mask)
+			       !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+					     ? "" : " l1-clear",
+			       sync_test_bit(i, sh->evtchn_mask)
 					     ? "" : " globally-masked",
 			       sync_test_bit(i, cpu_evtchn)
 					     ? "" : " locally-masked");
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 2/8] xen: sync public headers
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 3/8] xen: generalized event channel operations Wei Liu
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 include/xen/interface/event_channel.h |   33 +++++++++++++++++++++++++++++++++
 include/xen/interface/xen.h           |    7 ++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h
index f494292..3a3ac83 100644
--- a/include/xen/interface/event_channel.h
+++ b/include/xen/interface/event_channel.h
@@ -190,6 +190,39 @@ struct evtchn_reset {
 };
 typedef struct evtchn_reset evtchn_reset_t;
 
+/*
+ * EVTCHNOP_register_nlevel: Register N-level event channel
+ * NOTES:
+ *  1. Currently only 3-level is supported.
+ *  2. Should fall back to 2-level if this call fails.
+ */
+#define EVTCHNOP_register_nlevel 11
+/* 64 bit guests need 8 pages for evtchn_pending and evtchn_mask for
+ * 256k event channels while 32 bit ones only need 1 page for 32k
+ * event channels. */
+#define EVTCHN_MAX_L3_PAGES 8
+struct evtchn_register_3level {
+	/* IN parameters. */
+	uint32_t nr_pages;
+	GUEST_HANDLE(ulong) evtchn_pending;
+	GUEST_HANDLE(ulong) evtchn_mask;
+	uint32_t nr_vcpus;
+	GUEST_HANDLE(ulong) l2sel_mfns;
+	GUEST_HANDLE(ulong) l2sel_offsets;
+};
+typedef struct evtchn_register_3level evtchn_register_3level_t;
+DEFINE_GUEST_HANDLE(evtchn_register_3level_t);
+
+struct evtchn_register_nlevel {
+	/* IN parameters. */
+	uint32_t level;
+	union {
+		GUEST_HANDLE(evtchn_register_3level_t) l3;
+	} u;
+};
+typedef struct evtchn_register_nlevel evtchn_register_nlevel_t;
+DEFINE_GUEST_HANDLE(evtchn_register_nlevel_t);
+
 struct evtchn_op {
 	uint32_t cmd; /* EVTCHNOP_* */
 	union {
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804..7e51fb7 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -283,9 +283,14 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
 
 /*
  * Event channel endpoints per domain:
+ * 2-level:
  *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ * 3-level:
+ *  32k if a long is 32 bits; 256k if a long is 64 bits.
  */
-#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L2 (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+#define NR_EVENT_CHANNELS_L3 (NR_EVENT_CHANNELS_L2 * 64)
+#define NR_EVENT_CHANNELS NR_EVENT_CHANNELS_L2 /* for compatibility */
 
 struct vcpu_time_info {
 	/*
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 3/8] xen: generalized event channel operations
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 2/8] xen: sync public headers Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-22  9:00   ` Jan Beulich
  2013-01-21 14:58 ` [RFC PATCH V2 4/8] xen: dynamically allocate cpu_evtchn_mask Wei Liu
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  133 ++++++++++++++++++++++++++++++++++----------------
 drivers/xen/evtchn.c |   14 +++---
 2 files changed, 98 insertions(+), 49 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index dadeea4..6c1917e 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -51,6 +51,27 @@
 #include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/hvm/params.h>
 
+/* N-level event channel, starting from 2 */
+unsigned int evtchn_level = 2;
+EXPORT_SYMBOL_GPL(evtchn_level);
+unsigned int nr_event_channels;
+EXPORT_SYMBOL_GPL(nr_event_channels);
+
+struct evtchn_ops {
+	unsigned long (*active_evtchns)   (unsigned int cpu,
+					   struct shared_info *sh,
+					   unsigned int idx);
+	void          (*clear_evtchn)     (int port);
+	void          (*set_evtchn)       (int port);
+	int           (*test_evtchn)      (int port);
+	void          (*mask_evtchn)      (int port);
+	void          (*unmask_evtchn)    (int port);
+	int           (*test_and_set_mask)(int port);
+	void          (*do_upcall)        (void);
+	irqreturn_t   (*debug_interrupt)  (int irq, void *dev_id);
+};
+static struct evtchn_ops *eops;
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -113,7 +134,7 @@ static int *evtchn_to_irq;
 static unsigned long *pirq_eoi_map;
 static bool (*pirq_needs_eoi)(unsigned irq);
 
-static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
+static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS_L2/BITS_PER_LONG],
 		      cpu_evtchn_mask);
 
 /* Xen will never allocate port zero for any purpose. */
@@ -285,9 +306,9 @@ static bool pirq_needs_eoi_flag(unsigned irq)
 	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 }
 
-static inline unsigned long active_evtchns(unsigned int cpu,
-					   struct shared_info *sh,
-					   unsigned int idx)
+static inline unsigned long active_evtchns_l2(unsigned int cpu,
+					      struct shared_info *sh,
+					      unsigned int idx)
 {
 	return sh->evtchn_pending[idx] &
 		per_cpu(cpu_evtchn_mask, cpu)[idx] &
@@ -327,19 +348,19 @@ static void init_evtchn_cpu_bindings(void)
 		       (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
 }
 
-static inline void clear_evtchn(int port)
+static inline void clear_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	sync_clear_bit(port, &s->evtchn_pending[0]);
 }
 
-static inline void set_evtchn(int port)
+static inline void set_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	sync_set_bit(port, &s->evtchn_pending[0]);
 }
 
-static inline int test_evtchn(int port)
+static inline int test_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	return sync_test_bit(port, &s->evtchn_pending[0]);
@@ -363,13 +384,13 @@ void notify_remote_via_irq(int irq)
 }
 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
-static void mask_evtchn(int port)
+static void mask_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	sync_set_bit(port, &s->evtchn_mask[0]);
 }
 
-static void unmask_evtchn(int port)
+static void unmask_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	unsigned int cpu = get_cpu();
@@ -521,7 +542,7 @@ static void eoi_pirq(struct irq_data *data)
 	irq_move_irq(data);
 
 	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
+		eops->clear_evtchn(evtchn);
 
 	if (pirq_needs_eoi(data->irq)) {
 		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@ -567,7 +588,7 @@ static unsigned int __startup_pirq(unsigned int irq)
 	info->evtchn = evtchn;
 
 out:
-	unmask_evtchn(evtchn);
+	eops->unmask_evtchn(evtchn);
 	eoi_pirq(irq_get_irq_data(irq));
 
 	return 0;
@@ -590,7 +611,7 @@ static void shutdown_pirq(struct irq_data *data)
 	if (!VALID_EVTCHN(evtchn))
 		return;
 
-	mask_evtchn(evtchn);
+	eops->mask_evtchn(evtchn);
 
 	close.port = evtchn;
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
@@ -896,7 +917,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
 	int port, rc = -ENOENT;
 
 	memset(&status, 0, sizeof(status));
-	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+	for (port = 0; port <= nr_event_channels; port++) {
 		status.dom = DOMID_SELF;
 		status.port = port;
 		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -1121,7 +1142,7 @@ int evtchn_get(unsigned int evtchn)
 	struct irq_info *info;
 	int err = -ENOENT;
 
-	if (evtchn >= NR_EVENT_CHANNELS)
+	if (evtchn >= nr_event_channels)
 		return -EINVAL;
 
 	mutex_lock(&irq_mapping_update_lock);
@@ -1164,7 +1185,7 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 	notify_remote_via_irq(irq);
 }
 
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+static irqreturn_t debug_interrupt_l2(int irq, void *dev_id)
 {
 	struct shared_info *sh = HYPERVISOR_shared_info;
 	int cpu = smp_processor_id();
@@ -1210,7 +1231,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 		       i % 8 == 0 ? "\n   " : " ");
 
 	printk("\nlocal cpu%d mask:\n   ", cpu);
-	for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
+	for (i = (NR_EVENT_CHANNELS_L2/BITS_PER_LONG)-1; i >= 0; i--)
 		printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
 		       cpu_evtchn[i],
 		       i % 8 == 0 ? "\n   " : " ");
@@ -1225,7 +1246,7 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 	}
 
 	printk("\npending list:\n");
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+	for (i = 0; i < NR_EVENT_CHANNELS_L2; i++) {
 		if (sync_test_bit(i, sh->evtchn_pending)) {
 			int word_idx = i / BITS_PER_LONG;
 			printk("  %d: event %d -> irq %d%s%s%s\n",
@@ -1245,6 +1266,11 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+	return eops->debug_interrupt(irq, dev_id);
+}
+
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
 static DEFINE_PER_CPU(unsigned int, current_word_idx);
 static DEFINE_PER_CPU(unsigned int, current_bit_idx);
@@ -1263,7 +1289,7 @@ static DEFINE_PER_CPU(unsigned int, current_bit_idx);
  * a bitset of words which contain pending event bits.  The second
  * level is a bitset of pending events themselves.
  */
-static void __xen_evtchn_do_upcall(void)
+static void do_upcall_l2(void)
 {
 	int start_word_idx, start_bit_idx;
 	int word_idx, bit_idx;
@@ -1308,7 +1334,7 @@ static void __xen_evtchn_do_upcall(void)
 			}
 			word_idx = __ffs(words);
 
-			pending_bits = active_evtchns(cpu, s, word_idx);
+			pending_bits = eops->active_evtchns(cpu, s, word_idx);
 			bit_idx = 0; /* usually scan entire word from start */
 			if (word_idx == start_word_idx) {
 				/* We scan the starting word in two parts */
@@ -1377,7 +1403,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 	exit_idle();
 	irq_enter();
 
-	__xen_evtchn_do_upcall();
+	eops->do_upcall();
 
 	irq_exit();
 	set_irq_regs(old_regs);
@@ -1385,7 +1411,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 void xen_hvm_evtchn_do_upcall(void)
 {
-	__xen_evtchn_do_upcall();
+	eops->do_upcall();
 }
 EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
 
@@ -1459,15 +1485,14 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 int resend_irq_on_evtchn(unsigned int irq)
 {
 	int masked, evtchn = evtchn_from_irq(irq);
-	struct shared_info *s = HYPERVISOR_shared_info;
 
 	if (!VALID_EVTCHN(evtchn))
 		return 1;
 
-	masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
-	sync_set_bit(evtchn, s->evtchn_pending);
+	masked = eops->test_and_set_mask(evtchn);
+	eops->set_evtchn(evtchn);
 	if (!masked)
-		unmask_evtchn(evtchn);
+		eops->unmask_evtchn(evtchn);
 
 	return 1;
 }
@@ -1477,7 +1502,7 @@ static void enable_dynirq(struct irq_data *data)
 	int evtchn = evtchn_from_irq(data->irq);
 
 	if (VALID_EVTCHN(evtchn))
-		unmask_evtchn(evtchn);
+		eops->unmask_evtchn(evtchn);
 }
 
 static void disable_dynirq(struct irq_data *data)
@@ -1485,7 +1510,7 @@ static void disable_dynirq(struct irq_data *data)
 	int evtchn = evtchn_from_irq(data->irq);
 
 	if (VALID_EVTCHN(evtchn))
-		mask_evtchn(evtchn);
+		eops->mask_evtchn(evtchn);
 }
 
 static void ack_dynirq(struct irq_data *data)
@@ -1495,7 +1520,7 @@ static void ack_dynirq(struct irq_data *data)
 	irq_move_irq(data);
 
 	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
+		eops->clear_evtchn(evtchn);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
@@ -1504,19 +1529,24 @@ static void mask_ack_dynirq(struct irq_data *data)
 	ack_dynirq(data);
 }
 
+static inline int test_and_set_mask_l2(int chn)
+{
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	return sync_test_and_set_bit(chn, sh->evtchn_mask);
+}
+
 static int retrigger_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
-	struct shared_info *sh = HYPERVISOR_shared_info;
 	int ret = 0;
 
 	if (VALID_EVTCHN(evtchn)) {
 		int masked;
 
-		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
-		sync_set_bit(evtchn, sh->evtchn_pending);
+		masked = eops->test_and_set_mask(evtchn);
+		eops->set_evtchn(evtchn);
 		if (!masked)
-			unmask_evtchn(evtchn);
+			eops->unmask_evtchn(evtchn);
 		ret = 1;
 	}
 
@@ -1616,7 +1646,7 @@ void xen_clear_irq_pending(int irq)
 	int evtchn = evtchn_from_irq(irq);
 
 	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
+		eops->clear_evtchn(evtchn);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)
@@ -1624,7 +1654,7 @@ void xen_set_irq_pending(int irq)
 	int evtchn = evtchn_from_irq(irq);
 
 	if (VALID_EVTCHN(evtchn))
-		set_evtchn(evtchn);
+		eops->set_evtchn(evtchn);
 }
 
 bool xen_test_irq_pending(int irq)
@@ -1633,7 +1663,7 @@ bool xen_test_irq_pending(int irq)
 	bool ret = false;
 
 	if (VALID_EVTCHN(evtchn))
-		ret = test_evtchn(evtchn);
+		ret = eops->test_evtchn(evtchn);
 
 	return ret;
 }
@@ -1683,14 +1713,14 @@ void xen_irq_resume(void)
 	init_evtchn_cpu_bindings();
 
 	/* New event-channel space is not 'live' yet. */
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
-		mask_evtchn(evtchn);
+	for (evtchn = 0; evtchn < nr_event_channels; evtchn++)
+		eops->mask_evtchn(evtchn);
 
 	/* No IRQ <-> event-channel mappings. */
 	list_for_each_entry(info, &xen_irq_list_head, list)
 		info->evtchn = 0; /* zap event-channel binding */
 
-	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+	for (evtchn = 0; evtchn < nr_event_channels; evtchn++)
 		evtchn_to_irq[evtchn] = -1;
 
 	for_each_possible_cpu(cpu) {
@@ -1783,21 +1813,38 @@ void xen_callback_vector(void)
 void xen_callback_vector(void) {}
 #endif
 
+static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
+	.active_evtchns    = active_evtchns_l2,
+	.clear_evtchn      = clear_evtchn_l2,
+	.set_evtchn        = set_evtchn_l2,
+	.test_evtchn       = test_evtchn_l2,
+	.mask_evtchn       = mask_evtchn_l2,
+	.unmask_evtchn     = unmask_evtchn_l2,
+	.test_and_set_mask = test_and_set_mask_l2,
+	.do_upcall         = do_upcall_l2,
+	.debug_interrupt   = debug_interrupt_l2,
+};
+
 void __init xen_init_IRQ(void)
 {
 	int i, rc;
 
-	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
-				    GFP_KERNEL);
+	evtchn_level = 2;
+	nr_event_channels = NR_EVENT_CHANNELS_L2;
+	eops = &evtchn_ops_l2;
+
+	/* Setup 2-level event channel */
+	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
+				GFP_KERNEL);
 	BUG_ON(!evtchn_to_irq);
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+	for (i = 0; i < nr_event_channels; i++)
 		evtchn_to_irq[i] = -1;
 
 	init_evtchn_cpu_bindings();
 
 	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		mask_evtchn(i);
+	for (i = 0; i < nr_event_channels; i++)
+		eops->mask_evtchn(i);
 
 	pirq_needs_eoi = pirq_needs_eoi_flag;
 
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index b1f60a0..975e970 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -53,6 +53,8 @@
 #include <xen/evtchn.h>
 #include <asm/xen/hypervisor.h>
 
+extern unsigned int nr_event_channels;
+
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
 
@@ -232,7 +234,7 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 	for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) {
 		unsigned port = kbuf[i];
 
-		if (port < NR_EVENT_CHANNELS &&
+		if (port < nr_event_channels &&
 		    get_port_user(port) == u &&
 		    !get_port_enabled(port)) {
 			set_port_enabled(port, true);
@@ -364,7 +366,7 @@ static long evtchn_ioctl(struct file *file,
 			break;
 
 		rc = -EINVAL;
-		if (unbind.port >= NR_EVENT_CHANNELS)
+		if (unbind.port >= nr_event_channels)
 			break;
 
 		spin_lock_irq(&port_user_lock);
@@ -392,7 +394,7 @@ static long evtchn_ioctl(struct file *file,
 		if (copy_from_user(&notify, uarg, sizeof(notify)))
 			break;
 
-		if (notify.port >= NR_EVENT_CHANNELS) {
+		if (notify.port >= nr_event_channels) {
 			rc = -EINVAL;
 		} else if (get_port_user(notify.port) != u) {
 			rc = -ENOTCONN;
@@ -482,7 +484,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
 
 	free_page((unsigned long)u->ring);
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+	for (i = 0; i < nr_event_channels; i++) {
 		if (get_port_user(i) != u)
 			continue;
 
@@ -491,7 +493,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
 
 	spin_unlock_irq(&port_user_lock);
 
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+	for (i = 0; i < nr_event_channels; i++) {
 		if (get_port_user(i) != u)
 			continue;
 
@@ -528,7 +530,7 @@ static int __init evtchn_init(void)
 	if (!xen_domain())
 		return -ENODEV;
 
-	port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL);
+	port_user = kcalloc(nr_event_channels, sizeof(*port_user), GFP_KERNEL);
 	if (port_user == NULL)
 		return -ENOMEM;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 4/8] xen: dynamically allocate cpu_evtchn_mask
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
                   ` (2 preceding siblings ...)
  2013-01-21 14:58 ` [RFC PATCH V2 3/8] xen: generalized event channel operations Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 5/8] xen: implement 3-level event channel routines Wei Liu
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6c1917e..913ef0c 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -134,8 +134,7 @@ static int *evtchn_to_irq;
 static unsigned long *pirq_eoi_map;
 static bool (*pirq_needs_eoi)(unsigned irq);
 
-static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS_L2/BITS_PER_LONG],
-		      cpu_evtchn_mask);
+static DEFINE_PER_CPU(unsigned long *, cpu_evtchn_mask);
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -1828,6 +1827,7 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
 void __init xen_init_IRQ(void)
 {
 	int i, rc;
+	int cpu;
 
 	evtchn_level = 2;
 	nr_event_channels = NR_EVENT_CHANNELS_L2;
@@ -1837,6 +1837,21 @@ void __init xen_init_IRQ(void)
 	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
 				GFP_KERNEL);
 	BUG_ON(!evtchn_to_irq);
+
+	for_each_possible_cpu(cpu) {
+		void *p;
+		unsigned int nr = nr_event_channels / BITS_PER_LONG;
+
+		p = kzalloc_node(sizeof(unsigned long) * nr,
+				 GFP_KERNEL,
+				 cpu_to_node(cpu));
+		if (!p)
+			p = kzalloc(sizeof(unsigned long) * nr,
+				    GFP_KERNEL);
+		BUG_ON(!p);
+		per_cpu(cpu_evtchn_mask, cpu) = p;
+	}
+
 	for (i = 0; i < nr_event_channels; i++)
 		evtchn_to_irq[i] = -1;
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 5/8] xen: implement 3-level event channel routines
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
                   ` (3 preceding siblings ...)
  2013-01-21 14:58 ` [RFC PATCH V2 4/8] xen: dynamically allocate cpu_evtchn_mask Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-22  9:12   ` Jan Beulich
  2013-01-21 14:58 ` [RFC PATCH V2 6/8] xen: introduce xen_event_channel_register_3level Wei Liu
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |  407 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 385 insertions(+), 22 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 913ef0c..5b45441 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -57,6 +57,16 @@ EXPORT_SYMBOL_GPL(evtchn_level);
 unsigned int nr_event_channels;
 EXPORT_SYMBOL_GPL(nr_event_channels);
 
+/* 2nd level selector for 3-level event channel */
+DEFINE_PER_CPU(unsigned long [sizeof(unsigned long) * 8], evtchn_sel_l2);
+/* shared bitmaps for 3-level event channel */
+#define __NR_ELEMS (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)
+unsigned long evtchn_pending[__NR_ELEMS] __page_aligned_bss;
+unsigned long evtchn_mask   [__NR_ELEMS] __page_aligned_bss;
+#undef __NR_ELEMS
+/* Helper macros */
+#define LONG_BITORDER (BITS_PER_LONG == 64 ? 6 : 5)
+
 struct evtchn_ops {
 	unsigned long (*active_evtchns)   (unsigned int cpu,
 					   struct shared_info *sh,
@@ -314,6 +324,15 @@ static inline unsigned long active_evtchns_l2(unsigned int cpu,
 		~sh->evtchn_mask[idx];
 }
 
+static inline unsigned long active_evtchns_l3(unsigned int cpu,
+					      struct shared_info *sh,
+					      unsigned int idx)
+{
+	return evtchn_pending[idx] &
+		per_cpu(cpu_evtchn_mask, cpu)[idx] &
+		~evtchn_mask[idx];
+}
+
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
 	int irq = evtchn_to_irq[chn];
@@ -353,18 +372,32 @@ static inline void clear_evtchn_l2(int port)
 	sync_clear_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void clear_evtchn_l3(int port)
+{
+	sync_clear_bit(port, &evtchn_pending[0]);
+}
+
 static inline void set_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	sync_set_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline void set_evtchn_l3(int port)
+{
+	sync_set_bit(port, &evtchn_pending[0]);
+}
+
 static inline int test_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
 	return sync_test_bit(port, &s->evtchn_pending[0]);
 }
 
+static inline int test_evtchn_l3(int port)
+{
+	return sync_test_bit(port, &evtchn_pending[0]);
+}
 
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
@@ -389,6 +422,11 @@ static void mask_evtchn_l2(int port)
 	sync_set_bit(port, &s->evtchn_mask[0]);
 }
 
+static void mask_evtchn_l3(int port)
+{
+	sync_set_bit(port, &evtchn_mask[0]);
+}
+
 static void unmask_evtchn_l2(int port)
 {
 	struct shared_info *s = HYPERVISOR_shared_info;
@@ -419,6 +457,40 @@ static void unmask_evtchn_l2(int port)
 	put_cpu();
 }
 
+static void unmask_evtchn_l3(int port)
+{
+	unsigned int cpu = get_cpu();
+	unsigned int l1bit = port >> (LONG_BITORDER << 1);
+	unsigned int l2bit = port >> LONG_BITORDER;
+
+	BUG_ON(!irqs_disabled());
+
+	/* Slow path (hypercall) if this is a non-local port. */
+	if (unlikely(cpu != cpu_from_evtchn(port))) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	} else {
+		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+		sync_clear_bit(port, &evtchn_mask[0]);
+
+		/*
+		 * The following is basically the equivalent of
+		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+		 * the interrupt edge' if the channel is masked.
+		 */
+		if (sync_test_bit(port, &evtchn_pending[0]) &&
+		    !sync_test_and_set_bit(l2bit,
+					   &per_cpu(evtchn_sel_l2, cpu)[0]) &&
+		    !sync_test_and_set_bit(l1bit,
+					   &vcpu_info->evtchn_pending_sel))
+			vcpu_info->evtchn_upcall_pending = 1;
+	}
+
+	put_cpu();
+}
+
+
 static void xen_irq_init(unsigned irq)
 {
 	struct irq_info *info;
@@ -1190,25 +1262,8 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id)
 	int cpu = smp_processor_id();
 	unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
 	int i;
-	unsigned long flags;
-	static DEFINE_SPINLOCK(debug_lock);
 	struct vcpu_info *v;
 
-	spin_lock_irqsave(&debug_lock, flags);
-
-	printk("\nvcpu %d\n  ", cpu);
-
-	for_each_online_cpu(i) {
-		int pending;
-		v = per_cpu(xen_vcpu, i);
-		pending = (get_irq_regs() && i == cpu)
-			? xen_irqs_disabled(get_irq_regs())
-			: v->evtchn_upcall_mask;
-		printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
-		       pending, v->evtchn_upcall_pending,
-		       (int)(sizeof(v->evtchn_pending_sel)*2),
-		       v->evtchn_pending_sel);
-	}
 	v = per_cpu(xen_vcpu, cpu);
 
 	printk("\npending:\n   ");
@@ -1260,18 +1315,143 @@ static irqreturn_t debug_interrupt_l2(int irq, void *dev_id)
 		}
 	}
 
-	spin_unlock_irqrestore(&debug_lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t debug_interrupt_l3(int irq, void *dev_id)
+{
+	int cpu = smp_processor_id();
+	unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	int i;
+	struct vcpu_info *v;
+
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk("\npending (only show words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_pending)-1; i >= 0; i--)
+		if (evtchn_pending[i] != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)sizeof(evtchn_pending[0])*2,
+			       evtchn_pending[i]);
+		}
+
+	printk("\nglobal mask (only show words which have bits set to 0):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+		if (evtchn_mask[i] != ~0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)sizeof(evtchn_mask[0])*2,
+			       evtchn_mask[i]);
+		}
+
+	printk("\nglobally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--)
+		if ((evtchn_pending[i] & ~evtchn_mask[i]) != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       evtchn_pending[i] & ~evtchn_mask[i]);
+		}
+
+	printk("\nlocal cpu%d mask (only show words which have bits set to 1):\n   ", cpu);
+	for (i = (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)-1; i >= 0; i--)
+		if (cpu_evtchn[i] != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(cpu_evtchn[0])*2),
+			       cpu_evtchn[i]);
+		}
+
+	printk("\nlocally unmasked (only show result words which have bits set to 1):\n   ");
+	for (i = ARRAY_SIZE(evtchn_mask)-1; i >= 0; i--) {
+		unsigned long pending = evtchn_pending[i]
+			& ~evtchn_mask[i]
+			& cpu_evtchn[i];
+		if (pending != 0UL) {
+			printk(" word index %d %0*lx\n",
+			       i,
+			       (int)(sizeof(evtchn_mask[0])*2),
+			       pending);
+		}
+	}
+
+	printk("\npending list:\n");
+	for (i = 0; i < NR_EVENT_CHANNELS_L3; i++) {
+		if (sync_test_bit(i, evtchn_pending)) {
+			int word_idx = i / (BITS_PER_LONG * BITS_PER_LONG);
+			int word_idx_l2 = i / BITS_PER_LONG;
+			printk("  %d: event %d -> irq %d%s%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       evtchn_to_irq[i],
+			       !sync_test_bit(word_idx, &v->evtchn_pending_sel)
+					     ? "" : " l1-clear",
+			       !sync_test_bit(word_idx_l2, per_cpu(evtchn_sel_l2, cpu))
+					     ? "" : " l2-clear",
+			       sync_test_bit(i, evtchn_mask)
+					     ? "" : " globally-masked",
+			       sync_test_bit(i, cpu_evtchn)
+					     ? "" : " locally-masked");
+		}
+	}
 
 	return IRQ_HANDLED;
 }
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
 {
-	return eops->debug_interrupt(irq, dev_id);
+	irqreturn_t rc;
+	static DEFINE_SPINLOCK(debug_lock);
+	unsigned long flags;
+	int cpu = smp_processor_id();
+	struct vcpu_info *v;
+	int i;
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("\nvcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		int pending;
+		v = per_cpu(xen_vcpu, i);
+		pending = (get_irq_regs() && i == cpu)
+			? xen_irqs_disabled(get_irq_regs())
+			: v->evtchn_upcall_mask;
+		printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
+		       pending, v->evtchn_upcall_pending,
+		       (int)(sizeof(v->evtchn_pending_sel)*2),
+		       v->evtchn_pending_sel);
+	}
+
+	rc = eops->debug_interrupt(irq, dev_id);
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return rc;
 }
 
+/* The following per-cpu variables are used to save current state of event 
+ * processing loop.
+ *
+ * 2-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *  N.B. current_word_idx_l2 is not used.
+ *
+ * 3-level event channel:
+ *  current_word_idx is the bit index in L1 selector indicating the currently
+ *  processing word in L2 selector.
+ *  current_word_idx_l2 is the bit index in L2 selector word indicating the
+ *  currently processing word in shared bitmap.
+ *  current_bit_idx is the bit index in the currently processing word in shared
+ *  bitmap.
+ *
+ */
 static DEFINE_PER_CPU(unsigned, xed_nesting_count);
 static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_word_idx_l2);
 static DEFINE_PER_CPU(unsigned int, current_bit_idx);
 
 /*
@@ -1395,6 +1575,163 @@ out:
 	put_cpu();
 }
 
+/*
+ * In the 3-level event channel implementation, the first level is a
+ * bitset of words which contain pending bits in the second level.
+ * The second level is another bitsets which contain pending bits in
+ * the third level.  The third level is a bit set of pending events
+ * themselves.
+ */
+static void do_upcall_l3(void)
+{
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+	unsigned count;
+	int start_word_idx_l1, start_word_idx_l2, start_bit_idx;
+	int word_idx_l1, word_idx_l2, bit_idx;
+	int i, j;
+	unsigned long l1cb, l2cb;
+	int cpu = get_cpu();
+
+	l1cb = BITS_PER_LONG * BITS_PER_LONG;
+	l2cb = BITS_PER_LONG;
+
+	do {
+		unsigned long pending_words_l1;
+
+		vcpu_info->evtchn_upcall_pending = 0;
+
+		if (__this_cpu_inc_return(xed_nesting_count) - 1)
+			goto out;
+#ifndef CONFIG_X86
+		/* No need for a barrier -- XCHG is a barrier on x86. */
+		/* Clear master flag /before/ clearing selector flag. */
+		wmb();
+#endif
+		/* here we get l1 pending selector */
+		pending_words_l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+		start_word_idx_l1 = __this_cpu_read(current_word_idx);
+		start_word_idx_l2 = __this_cpu_read(current_word_idx_l2);
+		start_bit_idx = __this_cpu_read(current_bit_idx);
+
+		word_idx_l1 = start_word_idx_l1;
+
+		/* loop through l1, try to pick up l2 */
+		for (i = 0; pending_words_l1 != 0; i++) {
+			unsigned long words_l1;
+			unsigned long pending_words_l2;
+			unsigned long pwl2idx;
+
+			words_l1 = MASK_LSBS(pending_words_l1, word_idx_l1);
+
+			if (words_l1 == 0) {
+				word_idx_l1 = 0;
+				start_word_idx_l2 = 0;
+				continue;
+			}
+
+			word_idx_l1 = __ffs(words_l1);
+
+			pwl2idx = word_idx_l1 * BITS_PER_LONG;
+
+			pending_words_l2 =
+				xchg(&per_cpu(evtchn_sel_l2, cpu)[pwl2idx],
+				     0);
+
+			word_idx_l2 = 0;
+			if (word_idx_l1 == start_word_idx_l1) {
+				if (i == 0)
+					word_idx_l2 = start_word_idx_l2;
+				else
+					word_idx_l2 &= (1UL << start_word_idx_l2) - 1;
+			}
+
+			for (j = 0; pending_words_l2 != 0; j++) {
+				unsigned long pending_bits;
+				unsigned long words_l2;
+				unsigned long idx;
+
+				words_l2 = MASK_LSBS(pending_words_l2,
+						     word_idx_l2);
+
+				if (words_l2 == 0) {
+					word_idx_l2 = 0;
+					bit_idx = 0;
+					continue;
+				}
+
+				word_idx_l2 = __ffs(words_l2);
+
+				idx = word_idx_l1*BITS_PER_LONG+word_idx_l2;
+				pending_bits =
+					eops->active_evtchns(cpu, NULL, idx);
+
+				bit_idx = 0;
+				if (word_idx_l2 == start_word_idx_l2) {
+					if (j == 0)
+						bit_idx = start_bit_idx;
+					else
+						bit_idx &= (1UL<<start_bit_idx)-1;
+				}
+
+				/* process port */
+				do {
+					unsigned long bits;
+					int port, irq;
+					struct irq_desc *desc;
+
+					bits = MASK_LSBS(pending_bits, bit_idx);
+
+					if (bits == 0)
+						break;
+
+					bit_idx = __ffs(bits);
+
+					port = word_idx_l1 * l1cb +
+						word_idx_l2 * l2cb +
+						bit_idx;
+
+					irq = evtchn_to_irq[port];
+
+					if (irq != -1) {
+						desc = irq_to_desc(irq);
+						if (desc)
+							generic_handle_irq_desc(irq, desc);
+					}
+
+					bit_idx = (bit_idx + 1) % BITS_PER_LONG;
+
+					__this_cpu_write(current_bit_idx, bit_idx);
+					__this_cpu_write(current_word_idx_l2,
+							 bit_idx ? word_idx_l2 :
+							 (word_idx_l2+1) % BITS_PER_LONG);
+					__this_cpu_write(current_word_idx_l2,
+							 word_idx_l2 ? word_idx_l1 :
+							 (word_idx_l1+1) % BITS_PER_LONG);
+				} while (bit_idx != 0);
+
+				if ((word_idx_l2 != start_word_idx_l2) || (j != 0))
+					pending_words_l2 &= ~(1UL << word_idx_l2);
+
+				word_idx_l2 = (word_idx_l2) % BITS_PER_LONG;
+			}
+
+			if ((word_idx_l1 != start_word_idx_l1) || (i != 0))
+				pending_words_l1 &= ~(1UL << word_idx_l1);
+
+			word_idx_l1 = (word_idx_l1) % BITS_PER_LONG;
+		}
+
+		BUG_ON(!irqs_disabled());
+		count = __this_cpu_read(xed_nesting_count);
+		__this_cpu_write(xed_nesting_count, 0);
+	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
+
+out:
+	put_cpu();
+}
+
+
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
@@ -1534,6 +1871,11 @@ static inline int test_and_set_mask_l2(int chn)
 	return sync_test_and_set_bit(chn, sh->evtchn_mask);
 }
 
+static inline int test_and_set_mask_l3(int chn)
+{
+	return sync_test_and_set_bit(chn, evtchn_mask);
+}
+
 static int retrigger_dynirq(struct irq_data *data)
 {
 	int evtchn = evtchn_from_irq(data->irq);
@@ -1824,14 +2166,35 @@ static struct evtchn_ops evtchn_ops_l2 __read_mostly = {
 	.debug_interrupt   = debug_interrupt_l2,
 };
 
+static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
+	.active_evtchns    = active_evtchns_l3,
+	.clear_evtchn      = clear_evtchn_l3,
+	.set_evtchn        = set_evtchn_l3,
+	.test_evtchn       = test_evtchn_l3,
+	.mask_evtchn       = mask_evtchn_l3,
+	.unmask_evtchn     = unmask_evtchn_l3,
+	.test_and_set_mask = test_and_set_mask_l3,
+	.do_upcall         = do_upcall_l3,
+	.debug_interrupt   = debug_interrupt_l3,
+};
+
 void __init xen_init_IRQ(void)
 {
 	int i, rc;
 	int cpu;
 
-	evtchn_level = 2;
-	nr_event_channels = NR_EVENT_CHANNELS_L2;
-	eops = &evtchn_ops_l2;
+	switch (evtchn_level) {
+	case 2:
+		nr_event_channels = NR_EVENT_CHANNELS_L2;
+		eops = &evtchn_ops_l2;
+		break;
+	case 3:
+		nr_event_channels = NR_EVENT_CHANNELS_L3;
+		eops = &evtchn_ops_l3;
+		break;
+	default:
+		BUG();
+	}
 
 	/* Setup 2-level event channel */
 	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 6/8] xen: introduce xen_event_channel_register_3level
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
                   ` (4 preceding siblings ...)
  2013-01-21 14:58 ` [RFC PATCH V2 5/8] xen: implement 3-level event channel routines Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 7/8] xen: introduce interfaces to register N-level event channel Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 8/8] xen: register 3-level " Wei Liu
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 5b45441..cbb10ed 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2178,6 +2178,77 @@ static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
 	.debug_interrupt   = debug_interrupt_l3,
 };
 
+static int xen_event_channel_register_3level(void)
+{
+	evtchn_register_nlevel_t reg;
+	evtchn_register_3level_t l3;
+	int i, nr_pages, cpu;
+	unsigned long *_evtchn_pending = NULL;
+	unsigned long *_evtchn_mask = NULL;
+	unsigned long *l2sel_mfns = NULL;
+	unsigned long *l2sel_offsets = NULL;
+	int rc;
+
+	/* 1 page for 32 bit and 8 pages for 64 bit */
+	nr_pages = (sizeof(unsigned long) == 4 ? 1 : 8);
+
+	rc = -ENOMEM;
+#define __ALLOC_ARRAY(_ptr, _nr)					\
+	do {								\
+		(_ptr) = kzalloc(sizeof(unsigned long) * (_nr),		\
+				 GFP_KERNEL);				\
+		if (!(_ptr))						\
+			goto out;					\
+	} while (0)
+
+	__ALLOC_ARRAY(_evtchn_pending, nr_pages);
+	__ALLOC_ARRAY(_evtchn_mask, nr_pages);
+	__ALLOC_ARRAY(l2sel_mfns, nr_cpu_ids);
+	__ALLOC_ARRAY(l2sel_offsets, nr_cpu_ids);
+#undef __ALLOC_ARRAY
+
+	memset(&reg, 0, sizeof(reg));
+	memset(&l3,  0, sizeof(l3));
+
+	for (i = 0; i < nr_pages; i++) {
+		unsigned long offset = PAGE_SIZE * i;
+		_evtchn_pending[i] =
+			arbitrary_virt_to_mfn(
+				(void *)((unsigned long)evtchn_pending+offset));
+		_evtchn_mask[i] =
+			arbitrary_virt_to_mfn(
+				(void *)((unsigned long)evtchn_mask+offset));
+	}
+
+	for_each_possible_cpu(cpu) {
+		l2sel_mfns[cpu] =
+			arbitrary_virt_to_mfn(&per_cpu(evtchn_sel_l2, cpu));
+		l2sel_offsets[cpu] =
+			offset_in_page(&per_cpu(evtchn_sel_l2, cpu));
+	}
+
+	l3.nr_pages = nr_pages;
+	l3.evtchn_pending = _evtchn_pending;
+	l3.evtchn_mask = _evtchn_mask;
+
+	l3.nr_vcpus = nr_cpu_ids;
+	l3.l2sel_mfns = l2sel_mfns;
+	l3.l2sel_offsets = l2sel_offsets;
+
+	reg.level = 3;
+	reg.u.l3  = &l3;
+
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_register_nlevel, &reg);
+
+out:
+	kfree(_evtchn_pending);
+	kfree(_evtchn_mask);
+	kfree(l2sel_mfns);
+	kfree(l2sel_offsets);
+
+	return rc;
+}
+
 void __init xen_init_IRQ(void)
 {
 	int i, rc;
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 7/8] xen: introduce interfaces to register N-level event channel
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
                   ` (5 preceding siblings ...)
  2013-01-21 14:58 ` [RFC PATCH V2 6/8] xen: introduce xen_event_channel_register_3level Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  2013-01-21 14:58 ` [RFC PATCH V2 8/8] xen: register 3-level " Wei Liu
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 drivers/xen/events.c |   46 +++++++++++++++++++++++++++++++++++-----------
 include/xen/events.h |    6 ++++++
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index cbb10ed..76ff48c 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2178,6 +2178,26 @@ static struct evtchn_ops evtchn_ops_l3 __read_mostly = {
 	.debug_interrupt   = debug_interrupt_l3,
 };
 
+void xen_set_event_channel_nlevel(unsigned int level)
+{
+	switch (level) {
+	case 2:
+		evtchn_level = 2;
+		nr_event_channels = NR_EVENT_CHANNELS_L2;
+		eops = &evtchn_ops_l2;
+		break;
+	case 3:
+		evtchn_level = 3;
+		nr_event_channels = NR_EVENT_CHANNELS_L3;
+		eops = &evtchn_ops_l3;
+		break;
+	default:
+		printk(KERN_EMERG "Trying to set unsupported %d-level event channel\n",
+		       level);
+		BUG();
+	}
+}
+
 static int xen_event_channel_register_3level(void)
 {
 	evtchn_register_nlevel_t reg;
@@ -2249,24 +2269,28 @@ out:
 	return rc;
 }
 
-void __init xen_init_IRQ(void)
+int xen_event_channel_register_nlevel(unsigned int level)
 {
-	int i, rc;
-	int cpu;
+	int rc;
 
-	switch (evtchn_level) {
-	case 2:
-		nr_event_channels = NR_EVENT_CHANNELS_L2;
-		eops = &evtchn_ops_l2;
-		break;
+	switch (level) {
 	case 3:
-		nr_event_channels = NR_EVENT_CHANNELS_L3;
-		eops = &evtchn_ops_l3;
+		rc = xen_event_channel_register_3level();
 		break;
 	default:
-		BUG();
+		printk(KERN_INFO "Trying to register unsupported %d-level event channel\n",
+		       level);
+		rc = -EINVAL;
 	}
 
+	return rc;
+}
+
+void __init xen_init_IRQ(void)
+{
+	int i, rc;
+	int cpu;
+
 	/* Setup 2-level event channel */
 	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
 				GFP_KERNEL);
diff --git a/include/xen/events.h b/include/xen/events.h
index 04399b2..3c1708f 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -109,4 +109,10 @@ int xen_irq_from_gsi(unsigned gsi);
 /* Determine whether to ignore this IRQ if it is passed to a guest. */
 int xen_test_irq_shared(int irq);
 
+/* Register N-level event channel. */
+int xen_event_channel_register_nlevel(unsigned int level);
+
+/* Set event channel to N-level if registration succeed. */
+void xen_set_event_channel_nlevel(unsigned int level);
+
 #endif	/* _XEN_EVENTS_H */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [RFC PATCH V2 8/8] xen: register 3-level event channel
  2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
                   ` (6 preceding siblings ...)
  2013-01-21 14:58 ` [RFC PATCH V2 7/8] xen: introduce interfaces to register N-level event channel Wei Liu
@ 2013-01-21 14:58 ` Wei Liu
  7 siblings, 0 replies; 11+ messages in thread
From: Wei Liu @ 2013-01-21 14:58 UTC (permalink / raw)
  To: xen-devel; +Cc: david.vrabel, Wei Liu, ian.campbell, jbeulich, konrad.wilk

The 3-level event channel is registered in
 a) xen_init_IRQ(), when the guest is fresh started;
 b) xen_vcpu_restore(), when the guest is restored.

If registration fails, the kernel will fall back to 2-level event channel.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 arch/x86/xen/enlighten.c |   12 ++++++++++++
 drivers/xen/events.c     |   11 ++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bc893e7..25481b1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -43,6 +43,7 @@
 #include <xen/hvm.h>
 #include <xen/hvc-console.h>
 #include <xen/acpi.h>
+#include <xen/events.h>
 
 #include <asm/paravirt.h>
 #include <asm/apic.h>
@@ -177,6 +178,7 @@ static void xen_vcpu_setup(int cpu)
 void xen_vcpu_restore(void)
 {
 	int cpu;
+	int rc;
 
 	for_each_possible_cpu(cpu) {
 		bool other_cpu = (cpu != smp_processor_id());
@@ -195,6 +197,16 @@ void xen_vcpu_restore(void)
 		    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 			BUG();
 	}
+
+	if (!(rc = xen_event_channel_register_nlevel(3))) {
+		printk(KERN_INFO "Register 3-level event channel succeeded.\n");
+		xen_set_event_channel_nlevel(3);
+	} else {
+		printk(KERN_INFO "Register 3-level event channel failed with %d.\n"
+		       "Fall back to default 2-level event channel.\n",
+		       rc);
+		xen_set_event_channel_nlevel(2);
+	}
 }
 
 static void __init xen_banner(void)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 76ff48c..45159d9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -2291,7 +2291,16 @@ void __init xen_init_IRQ(void)
 	int i, rc;
 	int cpu;
 
-	/* Setup 2-level event channel */
+	if (!(rc = xen_event_channel_register_3level())) {
+		printk(KERN_INFO "Register 3-level event channel succeeded.\n");
+		xen_set_event_channel_nlevel(3);
+	} else {
+		printk(KERN_INFO "Register 3-level event channel failed with %d.\n"
+		       "Fall back to default 2-level event channel.\n",
+		       rc);
+		xen_set_event_channel_nlevel(2);
+	}
+
 	evtchn_to_irq = kcalloc(nr_event_channels, sizeof(*evtchn_to_irq),
 				GFP_KERNEL);
 	BUG_ON(!evtchn_to_irq);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V2 3/8] xen: generalized event channel operations
  2013-01-21 14:58 ` [RFC PATCH V2 3/8] xen: generalized event channel operations Wei Liu
@ 2013-01-22  9:00   ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2013-01-22  9:00 UTC (permalink / raw)
  To: Wei Liu; +Cc: xen-devel, david.vrabel, konrad.wilk, ian.campbell

>>> On 21.01.13 at 15:58, Wei Liu <wei.liu2@citrix.com> wrote:
> @@ -51,6 +51,27 @@
>  #include <xen/interface/hvm/hvm_op.h>
>  #include <xen/interface/hvm/params.h>
>  
> +/* N-level event channel, starting from 2 */
> +unsigned int evtchn_level = 2;
> +EXPORT_SYMBOL_GPL(evtchn_level);
> +unsigned int nr_event_channels;
> +EXPORT_SYMBOL_GPL(nr_event_channels);
> +
> +struct evtchn_ops {
> +	unsigned long (*active_evtchns)   (unsigned int cpu,
> +					   struct shared_info *sh,
> +					   unsigned int idx);
> +	void          (*clear_evtchn)     (int port);
> +	void          (*set_evtchn)       (int port);
> +	int           (*test_evtchn)      (int port);
> +	void          (*mask_evtchn)      (int port);
> +	void          (*unmask_evtchn)    (int port);
> +	int           (*test_and_set_mask)(int port);
> +	void          (*do_upcall)        (void);
> +	irqreturn_t   (*debug_interrupt)  (int irq, void *dev_id);
> +};
> +static struct evtchn_ops *eops;

static const struct evtchn_ops *__read_mostly eops;

And of course neither here nor in the hypervisor side patches
I'm convinced the added level of indirection (and the necessarily
resulting amount of code duplication) is really the best approach
(afaict some of the affected code paths can be hot for interrupt
intensive workloads), also with the consideration of what that
would mean if further levels got added.

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC PATCH V2 5/8] xen: implement 3-level event channel routines
  2013-01-21 14:58 ` [RFC PATCH V2 5/8] xen: implement 3-level event channel routines Wei Liu
@ 2013-01-22  9:12   ` Jan Beulich
  0 siblings, 0 replies; 11+ messages in thread
From: Jan Beulich @ 2013-01-22  9:12 UTC (permalink / raw)
  To: Wei Liu; +Cc: xen-devel, david.vrabel, konrad.wilk, ian.campbell

>>> On 21.01.13 at 15:58, Wei Liu <wei.liu2@citrix.com> wrote:
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -57,6 +57,16 @@ EXPORT_SYMBOL_GPL(evtchn_level);
>  unsigned int nr_event_channels;
>  EXPORT_SYMBOL_GPL(nr_event_channels);
>  
> +/* 2nd level selector for 3-level event channel */
> +DEFINE_PER_CPU(unsigned long [sizeof(unsigned long) * 8], evtchn_sel_l2);
> +/* shared bitmaps for 3-level event channel */
> +#define __NR_ELEMS (NR_EVENT_CHANNELS_L3/BITS_PER_LONG)
> +unsigned long evtchn_pending[__NR_ELEMS] __page_aligned_bss;
> +unsigned long evtchn_mask   [__NR_ELEMS] __page_aligned_bss;

static across the board, even more so with the names not being
tagged (which I'm specifically _not_ asking you to do).

For the former, the array dimension surely can be expressed
better (namely in terms of constants from the interface
headers)?

For the latter - wouldn't it be better to dynamically allocate those
(not wasting the memory when running with 2 levels only, which
I hope is going to remain the default for DomU-s)? Dynamic
allocations would, judging by the rest of the patch, also allow to
have more shared code between 2 and 3 levels (as you could
point into the shared info structure in the 2 level case).

Jan

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-01-22  9:12 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-21 14:58 [RFC PATCH V2 0/8] Implement 3-level event channel in Linux Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 1/8] xen: fix output of xen_debug_interrupt Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 2/8] xen: sync public headers Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 3/8] xen: generalized event channel operations Wei Liu
2013-01-22  9:00   ` Jan Beulich
2013-01-21 14:58 ` [RFC PATCH V2 4/8] xen: dynamically allocate cpu_evtchn_mask Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 5/8] xen: implement 3-level event channel routines Wei Liu
2013-01-22  9:12   ` Jan Beulich
2013-01-21 14:58 ` [RFC PATCH V2 6/8] xen: introduce xen_event_channel_register_3level Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 7/8] xen: introduce interfaces to register N-level event channel Wei Liu
2013-01-21 14:58 ` [RFC PATCH V2 8/8] xen: register 3-level " Wei Liu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.