From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wei Liu Subject: Re: [PATCH 12/12] xen/events: use the FIFO-based ABI if available Date: Fri, 16 Aug 2013 18:47:23 +0100 Message-ID: <20130816174723.GJ12674@zion.uk.xensource.com> References: <1376072121-17786-1-git-send-email-david.vrabel@citrix.com> <1376072121-17786-13-git-send-email-david.vrabel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Content-Disposition: inline In-Reply-To: <1376072121-17786-13-git-send-email-david.vrabel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: David Vrabel Cc: Boris Ostrovsky , Wei Liu , xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On Fri, Aug 09, 2013 at 07:15:21PM +0100, David Vrabel wrote: > From: David Vrabel > > If the hypervisor supports the FIFO-based ABI, enable it by > initializing the control block for the boot VCPU and subsequent VCPUs > as they are brought up. The event array is expanded as required when > event ports are setup. > > This implementation has some known limitations: > > - Migration will not work as the control blocks or event arrays are > not remapped by Xen at the destination. > Is it possible to hook into xen_vcpu_restore to make it work? > - The timer VIRQ which previously was treated as the highest priority > event has the default priority. > So what's missing from the series is a patch that allows kernel to actually make use of the priority queues? IMHO it's not just about the timer VIRQ. How do you plan to expose this interface to drivers? > Signed-off-by: David Vrabel > --- > drivers/xen/events/Makefile | 1 + > drivers/xen/events/events.c | 7 +- > drivers/xen/events/events_internal.h | 1 + > drivers/xen/events/fifo.c | 335 ++++++++++++++++++++++++++++++++++ > 4 files changed, 343 insertions(+), 1 deletions(-) > create mode 100644 drivers/xen/events/fifo.c > > diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile > index aea331e..74644d0 100644 > --- a/drivers/xen/events/Makefile > +++ b/drivers/xen/events/Makefile > @@ -1,2 +1,3 @@ > obj-y += events.o > +obj-y += fifo.o > obj-y += n-level.o > diff --git a/drivers/xen/events/events.c b/drivers/xen/events/events.c > index 55d33d1..2e7416e 100644 > --- a/drivers/xen/events/events.c > +++ b/drivers/xen/events/events.c > @@ -1589,8 +1589,13 @@ void xen_callback_vector(void) {} > void __init xen_init_IRQ(void) > { > int i; > + int ret; > > - xen_evtchn_init_nlevel(); > + ret = xen_evtchn_init_fifo(); > + if (ret < 0) { > + printk(KERN_INFO "xen: falling back to n-level event channels"); > + xen_evtchn_init_nlevel(); > + } > > evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), > sizeof(*evtchn_to_irq), GFP_KERNEL); > diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h > index 9d8b70c..e14f616 100644 > --- a/drivers/xen/events/events_internal.h > +++ b/drivers/xen/events/events_internal.h > @@ -138,5 +138,6 @@ static inline void xen_evtchn_handle_events(int cpu) > } > > void xen_evtchn_init_nlevel(void); > +int xen_evtchn_init_fifo(void); > > #endif /* #ifndef __EVENTS_INTERNAL_H__ */ > diff --git a/drivers/xen/events/fifo.c b/drivers/xen/events/fifo.c > new file mode 100644 > index 0000000..b07a603 > --- /dev/null > +++ b/drivers/xen/events/fifo.c > @@ -0,0 +1,335 @@ > +/* > + * Xen event channels (FIFO-based ABI) > + * > + * Copyright (C) 2013 Citrix Systems R&D ltd. > + * > + * This source code is licensed under the GNU General Public License, > + * Version 2 or later. See the file COPYING for more details. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > + > +#include > +#include > +#include > +#include > + > +#include > +#include > +#include > +#include > +#include > + > +#include "events_internal.h" > + > +#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t)) > +#define MAX_EVENT_ARRAY_PAGES ((1 << EVTCHN_FIFO_LINK_BITS) \ > + / EVENT_WORDS_PER_PAGE) > + > +struct evtchn_fifo_queue { > + uint32_t head[EVTCHN_FIFO_MAX_QUEUES]; > +}; > + > +static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block); > +static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue); > +static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES]; > +static unsigned event_array_pages; > + > +#define BM(w) ((unsigned long *)(w)) > + > +static inline event_word_t *event_word_from_port(int port) > +{ > + int i = port / EVENT_WORDS_PER_PAGE; > + > + if (i >= event_array_pages) > + return NULL; > + return event_array[i] + port % EVENT_WORDS_PER_PAGE; > +} > + > +static unsigned fifo_max_channels(void) > +{ > + return 1 << EVTCHN_FIFO_LINK_BITS; > +} > + > +static unsigned fifo_nr_channels(void) > +{ > + return event_array_pages * EVENT_WORDS_PER_PAGE; > +} > + > +static int fifo_setup(struct irq_info *info) > +{ > + int port = info->evtchn; > + int i; > + int ret = -ENOMEM; > + > + i = port / EVENT_WORDS_PER_PAGE; > + > + if (i >= MAX_EVENT_ARRAY_PAGES) > + return -EINVAL; > + > + while (i >= event_array_pages) { > + struct page *array_page = NULL; > + struct evtchn_expand_array expand_array; > + > + array_page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (array_page == NULL) > + goto error; > + > + expand_array.array_mfn = virt_to_mfn(page_address(array_page)); > + > + ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array); > + if (ret < 0) { > + __free_page(array_page); > + goto error; > + } > + > + event_array[event_array_pages++] = page_address(array_page); > + } > + return 0; > + > + error: > + if (event_array_pages == 0) > + panic("xen: unable to expand event array with initial page (%d)\n", ret); > + else > + printk(KERN_ERR "xen: unable to expand event array (%d)\n", ret); > + return ret; > +} > + > +static void fifo_bind_to_cpu(struct irq_info *info, int cpu) > +{ > + /* no-op */ > +} > + > +static void fifo_clear_pending(int port) > +{ > + event_word_t *word = event_word_from_port(port); > + sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word)); > +} > + > +static void fifo_set_pending(int port) > +{ > + event_word_t *word = event_word_from_port(port); > + sync_set_bit(EVTCHN_FIFO_PENDING, BM(word)); > +} > + > +static bool fifo_is_pending(int port) > +{ > + event_word_t *word = event_word_from_port(port); > + return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word)); > +} > + > +static bool fifo_test_and_set_mask(int port) > +{ > + event_word_t *word = event_word_from_port(port); > + return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word)); > +} > + > +static void fifo_mask(int port) > +{ > + event_word_t *word = event_word_from_port(port); > + if (word) > + sync_set_bit(EVTCHN_FIFO_MASKED, BM(word)); > +} > + > +static void fifo_unmask(int port) > +{ > + unsigned int cpu = get_cpu(); > + bool do_hypercall = false; > + bool evtchn_pending = false; > + > + BUG_ON(!irqs_disabled()); > + > + if (unlikely((cpu != cpu_from_evtchn(port)))) > + do_hypercall = true; > + else { > + event_word_t *word = event_word_from_port(port); > + > + sync_clear_bit(EVTCHN_FIFO_MASKED, BM(word)); > + evtchn_pending = sync_test_bit(EVTCHN_FIFO_PENDING, BM(word)); > + if (evtchn_pending) > + do_hypercall = true; > + } > + > + if (do_hypercall) { > + struct evtchn_unmask unmask = { .port = port }; > + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); > + } > + > + put_cpu(); > +} > + > +static uint32_t clear_linked(volatile event_word_t *word) > +{ > + event_word_t n, o, w; > + > + w = *word; > + > + do { > + o = w; > + n = (w & ~((1 << EVTCHN_FIFO_LINKED) | EVTCHN_FIFO_LINK_MASK)); > + } while ( (w = sync_cmpxchg(word, o, n)) != o ); > + > + return w & EVTCHN_FIFO_LINK_MASK; > +} > + > +static void handle_irq_for_port(int port) > +{ > + int irq; > + struct irq_desc *desc; > + > + irq = get_evtchn_to_irq(port); > + if (irq != -1) { > + desc = irq_to_desc(irq); > + if (desc) > + generic_handle_irq_desc(irq, desc); > + } > +} > + > +static void consume_one_event(int cpu, > + struct evtchn_fifo_control_block *control_block, > + int priority, uint32_t *ready) > +{ > + struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu); > + uint32_t head; > + int port; > + event_word_t *word; > + > + head = q->head[priority]; > + > + /* Reached the tail last time? Read the new HEAD from the > + control block. */ > + if (head == 0) { > + rmb(); /* Ensure word is up-to-date before reading head. */ > + head = control_block->head[priority]; > + } > + > + port = head; > + word = event_word_from_port(port); > + head = clear_linked(word); > + > + /* > + * If the link is non-zero, there are more events in the > + * queue, otherwise the queue is empty. > + * > + * If the queue is empty, clear this priority from our local > + * copy of the ready word. > + */ > + if (head == 0) > + clear_bit(priority, BM(ready)); > + > + if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word)) > + && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word))) > + handle_irq_for_port(port); > + > + q->head[priority] = head; > +} > + > +#define EVTCHN_FIFO_READY_MASK ((1 << EVTCHN_FIFO_MAX_QUEUES) - 1) > + > +static void fifo_handle_events(int cpu) > +{ > + struct evtchn_fifo_control_block *control_block; > + uint32_t ready; > + int q; > + > + control_block = per_cpu(cpu_control_block, cpu); > + > + ready = xchg(&control_block->ready, 0); > + > + while (ready & EVTCHN_FIFO_READY_MASK) { > + q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES); > + consume_one_event(cpu, control_block, q, &ready); > + ready |= xchg(&control_block->ready, 0); > + } > +} > + > +static const struct evtchn_ops evtchn_ops_fifo = { > + .max_channels = fifo_max_channels, > + .nr_channels = fifo_nr_channels, > + .setup = fifo_setup, > + .bind_to_cpu = fifo_bind_to_cpu, > + .clear_pending = fifo_clear_pending, > + .set_pending = fifo_set_pending, > + .is_pending = fifo_is_pending, > + .test_and_set_mask = fifo_test_and_set_mask, > + .mask = fifo_mask, > + .unmask = fifo_unmask, > + .handle_events = fifo_handle_events, > +}; > + > +static int __cpuinit fifo_init_control_block(int cpu) > +{ > + struct page *control_block = NULL; > + struct evtchn_init_control init_control; > + int ret = -ENOMEM; > + > + control_block = alloc_page(GFP_KERNEL|__GFP_ZERO); > + if (control_block == NULL) > + goto error; > + > + init_control.control_mfn = virt_to_mfn(page_address(control_block)); > + init_control.offset = 0; > + init_control.vcpu = cpu; > + > + ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control); > + if (ret < 0) > + goto error; > + > + per_cpu(cpu_control_block, cpu) = page_address(control_block); > + > + return 0; > + > + error: > + __free_page(control_block); > + return ret; > +} > + > +static int __cpuinit fifo_cpu_notification(struct notifier_block *self, > + unsigned long action, void *hcpu) > +{ > + int cpu = (long)hcpu; > + int ret = 0; > + > + switch (action) { > + case CPU_UP_PREPARE: > + ret = fifo_init_control_block(cpu); > + break; On the hypervisor side fifo_init_control_block would return -EINVAL if this CPU has previous mapped control block. Do you need to tear down the control block when you offline a CPU? (That would mean another sub-op for the interface) > + default: > + break; > + } > + return ret < 0 ? NOTIFY_BAD : NOTIFY_OK; > +} > + > +static struct notifier_block fifo_cpu_notifier __cpuinitdata = { > + .notifier_call = fifo_cpu_notification, > +}; > + > + > +int __init xen_evtchn_init_fifo(void) > +{ > + int cpu = get_cpu(); > + int ret; > + > + ret = fifo_init_control_block(cpu); > + if (ret < 0) > + goto error; > + > + printk(KERN_INFO "xen: switching to FIFO-based event channels\n"); > + > + evtchn_ops = &evtchn_ops_fifo; > + > + register_cpu_notifier(&fifo_cpu_notifier); > + > + put_cpu(); > + return 0; > + > + error: > + put_cpu(); > + return ret; Could be written as: if (ret < 0) goto out; ... register_cpu_notifier(); ret = 0; out: put_cpu(); return ret; It is only a matter of personal taste though. Wei. > +} > -- > 1.7.2.5