From mboxrd@z Thu Jan 1 00:00:00 1970 From: David Vrabel Subject: [PATCH 8/8] evtchn: add FIFO-based event channel hypercalls and port ops Date: Fri, 9 Aug 2013 19:08:40 +0100 Message-ID: <1376071720-17644-9-git-send-email-david.vrabel@citrix.com> References: <1376071720-17644-1-git-send-email-david.vrabel@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1376071720-17644-1-git-send-email-david.vrabel@citrix.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel@lists.xen.org Cc: Wei Liu , Keir Fraser , David Vrabel List-Id: xen-devel@lists.xenproject.org From: David Vrabel Add the implementation for the FIFO-based event channel ABI. The new hypercall sub-ops (EVTCHNOP_init_control, EVTCHNOP_expand_array, EVTCHNOP_set_priority) and the required evtchn_ops (set_pending, unmask, etc.). This current implementation has three main limitations: - EVTCHNOP_set_limit is not yet implemented so any guest will be able to use up to 2^17 (requiring 128 global mapping pages for a fully populated event array). - The control block frames are not required to be shared with the vcpu_info structure. This requires an additional global mapping page per-VCPU. This does makes the guest implementation cleaner though so perhaps we do not need to fix this? Signed-off-by: David Vrabel --- xen/common/Makefile | 1 + xen/common/event_channel.c | 37 ++++ xen/common/event_fifo.c | 491 ++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/event.h | 6 +- xen/include/xen/event_fifo.h | 56 +++++ xen/include/xen/sched.h | 4 + 6 files changed, 593 insertions(+), 2 deletions(-) create mode 100644 xen/common/event_fifo.c create mode 100644 xen/include/xen/event_fifo.h diff --git a/xen/common/Makefile b/xen/common/Makefile index ef03eac..afd7d40 100644 --- a/xen/common/Makefile +++ b/xen/common/Makefile @@ -7,6 +7,7 @@ obj-y += domctl.o obj-y += domain.o obj-y += event_2l.o obj-y += event_channel.o +obj-y += event_fifo.o obj-y += grant_table.o obj-y += irq.o obj-y += kernel.o diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index 67dcdbc..a048107 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1053,6 +1054,40 @@ long do_event_channel_op(int cmd, XEN_GUEST_HANDLE_PARAM(void) arg) break; } + case EVTCHNOP_init_control: { + struct evtchn_init_control init_control; + if ( copy_from_guest(&init_control, arg, 1) != 0 ) + return -EFAULT; + rc = evtchn_fifo_init_control(&init_control); + if ( !rc && __copy_to_guest(arg, &init_control, 1) ) + rc = -EFAULT; + break; + } + + case EVTCHNOP_expand_array: { + struct evtchn_expand_array expand_array; + if ( copy_from_guest(&expand_array, arg, 1) != 0 ) + return -EFAULT; + rc = evtchn_fifo_expand_array(&expand_array); + break; + } + + case EVTCHNOP_set_priority: { + struct evtchn_set_priority set_priority; + if ( copy_from_guest(&set_priority, arg, 1) != 0 ) + return -EFAULT; + rc = evtchn_fifo_set_priority(&set_priority); + break; + } + + case EVTCHNOP_set_limit: { + struct evtchn_set_limit set_limit; + if ( copy_from_guest(&set_limit, arg, 1) != 0 ) + return -EFAULT; + rc = evtchn_fifo_set_limit(&set_limit); + break; + } + default: rc = -ENOSYS; break; @@ -1214,6 +1249,8 @@ void evtchn_destroy(struct domain *d) (void)__evtchn_close(d, i); } + evtchn_fifo_destroy(d); + /* Free all event-channel buckets. */ spin_lock(&d->event_lock); for ( i = 0; i < NR_EVTCHN_GROUPS; i++ ) diff --git a/xen/common/event_fifo.c b/xen/common/event_fifo.c new file mode 100644 index 0000000..a7b5dc9 --- /dev/null +++ b/xen/common/event_fifo.c @@ -0,0 +1,491 @@ +/* + * FIFO event channel management. + * + * Copyright (C) 2013 Citrix Systems R&D Ltd. + * + * This source code is licensed under the GNU General Public License, + * Version 2 or later. See the file COPYING for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static inline event_word_t *evtchn_fifo_word_from_port(struct domain *d, + unsigned port) +{ + unsigned p, w; + + if ( unlikely(port >= d->evtchn_fifo->num_evtchns) ) + return NULL; + + p = port / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE; + w = port % EVTCHN_FIFO_EVENT_WORDS_PER_PAGE; + + return d->evtchn_fifo->event_array[p].virt + w; +} + +static bool_t evtchn_fifo_set_link(event_word_t *word, uint32_t link) +{ + event_word_t n, o, w; + + w = *word; + + do { + if ( !(w & (1 << EVTCHN_FIFO_LINKED)) ) + return 0; + o = w; + n = (w & ~EVTCHN_FIFO_LINK_MASK) | link; + } while ( (w = cmpxchg(word, o, n)) != o ); + + return 1; +} + +static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn) +{ + struct domain *d = v->domain; + unsigned port; + event_word_t *word; + struct evtchn_fifo_queue *q; + unsigned long flags; + bool_t was_pending; + + port = evtchn->port; + word = evtchn_fifo_word_from_port(d, port); + if ( unlikely(!word) ) + return; + + /* + * No locking around getting the queue. This may race with + * changing the priority but we are allowed to signal the event + * once on the old priority. + */ + q = evtchn->queue; + + was_pending = test_and_set_bit(EVTCHN_FIFO_PENDING, word); + + /* + * Link the event if it unmasked and not already linked. + */ + if ( !test_bit(EVTCHN_FIFO_MASKED, word) + && !test_and_set_bit(EVTCHN_FIFO_LINKED, word) ) + { + event_word_t *tail_word; + bool_t linked = 0; + + spin_lock_irqsave(&q->lock, flags); + + /* + * Atomically link the tail to port iff the tail is linked. + * If the tail is unlinked the queue is empty. + * + * If port is the same as tail, the queue is empty but q->tail + * will appear linked as we just set LINKED above. + * + * If the queue is empty (i.e., we haven't linked to the new + * event), head must be updated. + */ + if ( port != q->tail ) + { + tail_word = evtchn_fifo_word_from_port(d, q->tail); + linked = evtchn_fifo_set_link(tail_word, port); + } + if ( !linked ) + write_atomic(q->head, port); + q->tail = port; + + spin_unlock_irqrestore(&q->lock, flags); + + if ( !test_and_set_bit(q->priority, &v->evtchn_fifo->control_block->ready) ) + vcpu_mark_events_pending(v); + } + + if ( !was_pending ) + evtchn_check_pollers(d, port); +} + +static void evtchn_fifo_clear_pending(struct domain *d, struct evtchn *evtchn) +{ + event_word_t *word; + + word = evtchn_fifo_word_from_port(d, evtchn->port); + if ( unlikely(!word) ) + return; + + /* + * Just clear the P bit. + * + * No need to unlink as the guest will unlink and ignore + * non-pending events. + */ + clear_bit(EVTCHN_FIFO_PENDING, word); +} + +static void evtchn_fifo_unmask(struct domain *d, struct evtchn *evtchn) +{ + struct vcpu *v = d->vcpu[evtchn->notify_vcpu_id]; + event_word_t *word; + + word = evtchn_fifo_word_from_port(d, evtchn->port); + if ( unlikely(!word) ) + return; + + clear_bit(EVTCHN_FIFO_MASKED, word); + + /* Relink if pending. */ + if ( test_bit(EVTCHN_FIFO_PENDING, word ) ) + evtchn_fifo_set_pending(v, evtchn); +} + +static bool_t evtchn_fifo_is_pending(struct domain *d, + const struct evtchn *evtchn) +{ + event_word_t *word; + + word = evtchn_fifo_word_from_port(d, evtchn->port); + if ( unlikely(!word) ) + return 0; + + return test_bit(EVTCHN_FIFO_PENDING, word); +} + +static bool_t evtchn_fifo_is_masked(struct domain *d, + const struct evtchn *evtchn) +{ + event_word_t *word; + + word = evtchn_fifo_word_from_port(d, evtchn->port); + if ( unlikely(!word) ) + return 1; + + return test_bit(EVTCHN_FIFO_MASKED, word); +} + +static void evtchn_fifo_bind_to_vcpu(struct domain *d, struct evtchn *evtchn, + struct vcpu *v) +{ + unsigned priority; + + /* Keep the same priority if possible, otherwise use the + default. */ + if ( evtchn->queue ) + priority = evtchn->queue->priority; + else + priority = EVTCHN_FIFO_PRIORITY_DEFAULT; + + evtchn->queue = &v->evtchn_fifo->queue[priority]; +} + +static const struct evtchn_port_ops evtchn_port_ops_fifo = +{ + .set_pending = evtchn_fifo_set_pending, + .clear_pending = evtchn_fifo_clear_pending, + .unmask = evtchn_fifo_unmask, + .is_pending = evtchn_fifo_is_pending, + .is_masked = evtchn_fifo_is_masked, + .bind_to_vcpu = evtchn_fifo_bind_to_vcpu, +}; + +static int map_guest_page(struct domain *d, uint64_t gfn, + struct page_info **page, void **virt) +{ + struct page_info *p; + + p = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC); + if ( !p ) + return -EINVAL; + + if ( !get_page_type(p, PGT_writable_page) ) + { + put_page(p); + return -EINVAL; + } + + *virt = map_domain_page_global(gfn); + if ( *virt == NULL ) + { + put_page_and_type(p); + return -ENOMEM; + } + *page = p; + return 0; +} + +static void unmap_guest_page(struct page_info *page, void *virt) +{ + if ( page == NULL ) + return; + + unmap_domain_page_global(virt); + put_page_and_type(page); +} + +static void cleanup_control_block(struct vcpu *v) +{ + if ( v->evtchn_fifo ) + { + unmap_guest_page(v->evtchn_fifo->cb_page, v->evtchn_fifo->control_block); + xfree(v->evtchn_fifo); + v->evtchn_fifo = NULL; + } +} + +static void init_queue(struct vcpu *v, struct evtchn_fifo_queue *q, unsigned i) +{ + spin_lock_init(&q->lock); + q->priority = i; + q->head = &v->evtchn_fifo->control_block->head[i]; +} + +static int setup_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset) +{ + struct domain *d = v->domain; + struct evtchn_fifo_vcpu *efv; + struct page_info *page; + void *virt; + unsigned i; + int rc; + + if ( v->evtchn_fifo ) + return -EINVAL; + + efv = xzalloc(struct evtchn_fifo_vcpu); + if ( efv == NULL ) + return -ENOMEM; + + rc = map_guest_page(d, gfn, &page, &virt); + if ( rc < 0 ) + { + xfree(efv); + return rc; + } + + v->evtchn_fifo = efv; + + v->evtchn_fifo->cb_page = page; + v->evtchn_fifo->control_block = virt + offset; + + for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ ) + init_queue(v, &v->evtchn_fifo->queue[i], i); + + return 0; +} + +/* + * Setup an event array with no pages. + */ +static int setup_event_array(struct domain *d) +{ + if ( d->evtchn_fifo ) + return 0; + + d->evtchn_fifo = xzalloc(struct evtchn_fifo_domain); + if ( d->evtchn_fifo == NULL ) + return -ENOMEM; + + d->evtchn_fifo->num_evtchns = 0; + + return 0; +} + +/* + * Some ports may already be bound, bind them to the correct VCPU so + * they have a valid queue. + * + * Note: any events that are currently pending will not be resent and + * will be lost. + */ +static void rebind_all_ports(struct domain *d) +{ + unsigned port; + + for ( port = 1; port < d->max_evtchns; port++ ) + { + struct evtchn *evtchn; + + if ( !port_is_valid(d, port) ) + break; + + evtchn = evtchn_from_port(d, port); + switch ( evtchn->state ) + { + case ECS_INTERDOMAIN: + case ECS_PIRQ: + case ECS_VIRQ: + evtchn_port_bind_to_vcpu(d, evtchn, d->vcpu[evtchn->notify_vcpu_id]); + break; + default: + break; + } + } +} + +static void cleanup_event_array(struct domain *d) +{ + unsigned i; + + if ( d->evtchn_fifo == NULL ) + return; + + for ( i = 0; i < EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES; i++ ) + { + unmap_guest_page(d->evtchn_fifo->event_array[i].page, + d->evtchn_fifo->event_array[i].virt); + } + xfree(d->evtchn_fifo); +} + +int evtchn_fifo_init_control(struct evtchn_init_control *init_control) +{ + struct domain *d = current->domain; + uint32_t vcpu_id; + uint64_t gfn; + uint32_t offset; + struct vcpu *v; + int rc; + + init_control->link_bits = EVTCHN_FIFO_LINK_BITS; + + vcpu_id = init_control->vcpu; + gfn = init_control->control_mfn; + offset = init_control->offset; + + if ( (vcpu_id >= d->max_vcpus) || (d->vcpu[vcpu_id] == NULL) ) + return -ENOENT; + v = d->vcpu[vcpu_id]; + + /* Must not cross page boundary. */ + if ( offset > (PAGE_SIZE - sizeof(evtchn_fifo_control_block_t)) ) + return -EINVAL; + + /* Must be 8-bytes aligned. */ + if ( offset & (8 - 1) ) + return -EINVAL; + + spin_lock(&d->event_lock); + + rc = setup_control_block(v, gfn, offset); + + /* If this is the first control block, setup an empty event array + and switch to the fifo port ops. */ + if ( d->evtchn_fifo == NULL ) + { + rc = setup_event_array(d); + if ( rc < 0 ) + cleanup_control_block(v); + else + { + d->evtchn_port_ops = &evtchn_port_ops_fifo; + d->max_evtchns = 1 << EVTCHN_FIFO_LINK_BITS; + rebind_all_ports(d); + } + } + + spin_unlock(&d->event_lock); + + return rc; +} + +static int add_page_to_event_array(struct domain *d, unsigned long gfn) +{ + struct page_info *page = NULL; + void *virt; + unsigned slot; + int rc; + + slot = d->evtchn_fifo->num_evtchns / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE; + if ( slot >= EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES ) + return -ENOSPC; + + rc = map_guest_page(d, gfn, &page, &virt); + if ( rc < 0 ) + return rc; + + d->evtchn_fifo->event_array[slot].page = page; + d->evtchn_fifo->event_array[slot].virt = virt; + + d->evtchn_fifo->num_evtchns += EVTCHN_FIFO_EVENT_WORDS_PER_PAGE; + + return 0; +} + +int evtchn_fifo_expand_array(struct evtchn_expand_array *expand_array) +{ + struct domain *d = current->domain; + int rc; + + spin_lock(&d->event_lock); + rc = add_page_to_event_array(d, expand_array->array_mfn); + spin_unlock(&d->event_lock); + + return rc; +} + +int evtchn_fifo_set_priority(struct evtchn_set_priority *set_priority) +{ + struct domain *d = current->domain; + struct vcpu *v; + uint32_t priority; + unsigned port; + struct evtchn *evtchn; + + priority = set_priority->priority; + port = set_priority->port; + + if ( priority > EVTCHN_FIFO_PRIORITY_MIN ) + return -EINVAL; + + spin_lock(&d->event_lock); + + if ( !port_is_valid(d, port) ) + { + spin_unlock(&d->event_lock); + return -EINVAL; + } + + /* + * Switch to the new queue for future events. If the event is + * already pending or in the process of being linked it will be on + * the old queue -- this is fine. + */ + evtchn = evtchn_from_port(d, port); + v = d->vcpu[evtchn->notify_vcpu_id]; + evtchn->queue = &v->evtchn_fifo->queue[priority]; + + spin_unlock(&d->event_lock); + + return 0; +} + +int evtchn_fifo_set_limit(struct evtchn_set_limit *set_limit) +{ + /* FIXME: not supported yet. */ + return -ENOSYS; +} + +void evtchn_fifo_destroy(struct domain *d) +{ + struct vcpu *v; + + for_each_vcpu( d, v ) + cleanup_control_block(v); + cleanup_event_array(d); +} + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index a795ae6..3a71956 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -127,12 +127,14 @@ struct evtchn_port_ops { struct vcpu *vcpu); }; -static inline void evtchn_port_set_pending(struct vcpu *v, struct evtchn *evtchn) +static inline void evtchn_port_set_pending(struct vcpu *v, + struct evtchn *evtchn) { v->domain->evtchn_port_ops->set_pending(v, evtchn); } -static inline void evtchn_port_clear_pending(struct domain *d, struct evtchn *evtchn) +static inline void evtchn_port_clear_pending(struct domain *d, + struct evtchn *evtchn) { d->evtchn_port_ops->clear_pending(d, evtchn); } diff --git a/xen/include/xen/event_fifo.h b/xen/include/xen/event_fifo.h new file mode 100644 index 0000000..2d7b7d0 --- /dev/null +++ b/xen/include/xen/event_fifo.h @@ -0,0 +1,56 @@ +/* + * FIFO-based event channel ABI. + * + * Copyright (C) 2013 Citrix Systems R&D Ltd. + * + * This source code is licensed under the GNU General Public License, + * Version 2 or later. See the file COPYING for more details. + */ +#ifndef __XEN_EVENT_FIFO_H__ +#define __XEN_EVENT_FIFO_H__ + +struct evtchn_fifo_queue { + uint32_t *head; /* points into control block */ + uint32_t tail; + spinlock_t lock; + uint8_t priority; +}; + +struct evtchn_fifo_vcpu { + struct page_info *cb_page; + struct evtchn_fifo_control_block *control_block; + struct evtchn_fifo_queue queue[EVTCHN_FIFO_MAX_QUEUES]; +}; + +#define EVTCHN_FIFO_EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t)) +#define EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES \ + ((1 << EVTCHN_FIFO_LINK_BITS) / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE) + + +struct evtchn_fifo_array_page { + struct page_info *page; + event_word_t *virt; +}; + +struct evtchn_fifo_domain { + struct evtchn_fifo_array_page event_array[EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES]; + unsigned num_evtchns; +}; + +int evtchn_fifo_init_control(struct evtchn_init_control *init_control); +int evtchn_fifo_expand_array(struct evtchn_expand_array *expand_array); +int evtchn_fifo_set_priority(struct evtchn_set_priority *set_priority); +int evtchn_fifo_set_limit(struct evtchn_set_limit *set_limit); +void evtchn_fifo_destroy(struct domain *domain); + +#endif /* __XEN_EVENT_FIFO_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index f95cf99..4c5cc20 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -96,6 +96,7 @@ struct evtchn } pirq; /* state == ECS_PIRQ */ u16 virq; /* state == ECS_VIRQ */ } u; + struct evtchn_fifo_queue *queue; #ifdef FLASK_ENABLE void *ssid; #endif @@ -210,6 +211,8 @@ struct vcpu /* Guest-specified relocation of vcpu_info. */ unsigned long vcpu_info_mfn; + struct evtchn_fifo_vcpu *evtchn_fifo; + struct arch_vcpu arch; }; @@ -290,6 +293,7 @@ struct domain unsigned max_evtchns; spinlock_t event_lock; const struct evtchn_port_ops *evtchn_port_ops; + struct evtchn_fifo_domain *evtchn_fifo; struct grant_table *grant_table; -- 1.7.2.5