All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v7 0/9] libxl: New event API
@ 2012-01-13 19:25 Ian Jackson
  2012-01-13 19:25 ` [PATCH 1/9] libxl: New API for providing OS events to libxl Ian Jackson
                   ` (8 more replies)
  0 siblings, 9 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel

This series has now been tested.  It includes bugfixes and all the
comments which people have made and which I said I would address.

These should be fairly uncontroversial:
 2/9  ocaml, libxl: support "private" fields
 4/9  libxl: introduce libxl_fd_set_nonblock, rationalise _cloexec
 7/9  libxl: New convenience macro CONTAINER_OF

These are the meat:
 1/9  libxl: New API for providing OS events to libxl
 3/9  libxl: New event generation API
 5/9  libxl: Permit multithreaded event waiting
 6/9  libxl: Asynchronous/long-running operation infrastructure
 8/9  libxl: Introduce libxl__ev_devstate
 9/9  libxl: Convert to asynchronous: device removal

^ permalink raw reply	[flat|nested] 31+ messages in thread

* [PATCH 1/9] libxl: New API for providing OS events to libxl
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-18 16:35   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 2/9] ocaml, libxl: support "private" fields Ian Jackson
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

We provide a new set of functions and related structures
  libxl_osevent_*
which are to be used by event-driven applications to receive
information from libxl about which fds libxl is interested in, and
what timeouts libxl is waiting for, and to pass back to libxl
information about which fds are readable/writeable etc., and which
timeouts have occurred.  Ie, low-level events.

In this patch, this new machinery is still all unused.  Callers will
appear in the next patch in the series, which introduces a new API for
applications to receive high-level events about actual domains etc.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/Makefile         |    2 +-
 tools/libxl/libxl.c          |   30 ++
 tools/libxl/libxl.h          |    6 +
 tools/libxl/libxl_event.c    |  750 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_event.h    |  205 ++++++++++++
 tools/libxl/libxl_internal.h |  277 +++++++++++++++-
 6 files changed, 1267 insertions(+), 3 deletions(-)
 create mode 100644 tools/libxl/libxl_event.c
 create mode 100644 tools/libxl/libxl_event.h

diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
index 3c3661b..b58c43e 100644
--- a/tools/libxl/Makefile
+++ b/tools/libxl/Makefile
@@ -49,7 +49,7 @@ LIBXL_LIBS += -lyajl
 LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o libxl_pci.o \
 			libxl_dom.o libxl_exec.o libxl_xshelp.o libxl_device.o \
 			libxl_internal.o libxl_utils.o libxl_uuid.o libxl_json.o \
-			libxl_qmp.o $(LIBXL_OBJS-y)
+			libxl_qmp.o libxl_event.o $(LIBXL_OBJS-y)
 LIBXL_OBJS += _libxl_types.o libxl_flask.o _libxl_types_internal.o
 
 $(LIBXL_OBJS): CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore) $(CFLAGS_libblktapctl)
diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 169fc97..413b684 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -45,6 +45,16 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
      * only as an initialiser, not as an expression. */
     memcpy(&ctx->lock, &mutex_value, sizeof(ctx->lock));
 
+    ctx->osevent_hooks = 0;
+
+    ctx->fd_rindex = 0;
+    LIBXL_LIST_INIT(&ctx->efds);
+    LIBXL_TAILQ_INIT(&ctx->etimes);
+
+    ctx->watch_slots = 0;
+    LIBXL_SLIST_INIT(&ctx->watch_freeslots);
+    libxl__ev_fd_init(&ctx->watch_efd);
+
     if ( stat(XENSTORE_PID_FILE, &stat_buf) != 0 ) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Is xenstore daemon running?\n"
                      "failed to stat %s", XENSTORE_PID_FILE);
@@ -79,9 +89,29 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
 int libxl_ctx_free(libxl_ctx *ctx)
 {
     if (!ctx) return 0;
+
+    int i;
+    GC_INIT(ctx);
+
+    /* Deregister all libxl__ev_KINDs: */
+
+    for (i = 0; i < ctx->watch_nslots; i++)
+        assert(!libxl__watch_slot_contents(gc, i));
+    libxl__ev_fd_deregister(gc, &ctx->watch_efd);
+
+    /* Now there should be no more events requested from the application: */
+
+    assert(LIBXL_LIST_EMPTY(&ctx->efds));
+    assert(LIBXL_TAILQ_EMPTY(&ctx->etimes));
+
     if (ctx->xch) xc_interface_close(ctx->xch);
     libxl_version_info_dispose(&ctx->version_info);
     if (ctx->xsh) xs_daemon_close(ctx->xsh);
+
+    free(ctx->fd_rindex);
+    free(ctx->watch_slots);
+
+    GC_FREE;
     free(ctx);
     return 0;
 }
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 723eac2..b067724 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -137,6 +137,7 @@
 #include <xen/sysctl.h>
 
 #include <libxl_uuid.h>
+#include <_libxl_list.h>
 
 typedef uint8_t libxl_mac[6];
 #define LIBXL_MAC_FMT "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx"
@@ -223,6 +224,9 @@ enum {
     ERROR_GUEST_TIMEDOUT = -8,
     ERROR_TIMEDOUT = -9,
     ERROR_NOPARAVIRT = -10,
+    ERROR_NOT_READY = -11,
+    ERROR_OSEVENT_REG_FAIL = -12,
+    ERROR_BUFFERFULL = -13,
 };
 
 #define LIBXL_VERSION 0
@@ -648,6 +652,8 @@ const char *libxl_xenpaging_dir_path(void);
 /* misc */
 int libxl_fd_set_cloexec(int fd);
 
+#include <libxl_event.h>
+
 #endif /* LIBXL_H */
 
 /*
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
new file mode 100644
index 0000000..ec66340
--- /dev/null
+++ b/tools/libxl/libxl_event.c
@@ -0,0 +1,750 @@
+/*
+ * Copyright (C) 2011      Citrix Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+/*
+ * Internal event machinery for use by other parts of libxl
+ */
+
+#include <poll.h>
+
+#include "libxl_internal.h"
+
+/*
+ * The counter osevent_in_hook is used to ensure that the application
+ * honours the reentrancy restriction documented in libxl_event.h.
+ *
+ * The application's registration hooks should be called ONLY via
+ * these macros, with the ctx locked.  Likewise all the "occurred"
+ * entrypoints from the application should assert(!in_hook);
+ */
+#define OSEVENT_HOOK_INTERN(defval, hookname, ...)                      \
+    (CTX->osevent_hooks                                                 \
+     ? (CTX->osevent_in_hook++,                                         \
+        CTX->osevent_hooks->hookname(CTX->osevent_user, __VA_ARGS__),   \
+        CTX->osevent_in_hook--)                                         \
+     : defval)
+
+#define OSEVENT_HOOK(hookname,...)                      \
+    OSEVENT_HOOK_INTERN(0, hookname, __VA_ARGS__)
+
+#define OSEVENT_HOOK_VOID(hookname,...)                 \
+    OSEVENT_HOOK_INTERN((void)0, hookname, __VA_ARGS__)
+
+/*
+ * fd events
+ */
+
+int libxl__ev_fd_register(libxl__gc *gc, libxl__ev_fd *ev,
+                          libxl__ev_fd_callback *func,
+                          int fd, short events)
+{
+    int rc;
+
+    assert(fd >= 0);
+
+    CTX_LOCK;
+
+    rc = OSEVENT_HOOK(fd_register, fd, &ev->for_app_reg, events, ev);
+    if (rc) goto out;
+
+    ev->fd = fd;
+    ev->events = events;
+    ev->func = func;
+
+    LIBXL_LIST_INSERT_HEAD(&CTX->efds, ev, entry);
+
+    rc = 0;
+
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+int libxl__ev_fd_modify(libxl__gc *gc, libxl__ev_fd *ev, short events)
+{
+    int rc;
+
+    CTX_LOCK;
+    assert(libxl__ev_fd_isregistered(ev));
+
+    rc = OSEVENT_HOOK(fd_modify, ev->fd, &ev->for_app_reg, events);
+    if (rc) goto out;
+
+    ev->events = events;
+
+    rc = 0;
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+void libxl__ev_fd_deregister(libxl__gc *gc, libxl__ev_fd *ev)
+{
+    CTX_LOCK;
+
+    if (!libxl__ev_fd_isregistered(ev))
+        goto out;
+
+    OSEVENT_HOOK_VOID(fd_deregister, ev->fd, ev->for_app_reg);
+    LIBXL_LIST_REMOVE(ev, entry);
+    ev->fd = -1;
+
+ out:
+    CTX_UNLOCK;
+}
+
+/*
+ * timeouts
+ */
+
+
+int libxl__gettimeofday(libxl__gc *gc, struct timeval *now_r)
+{
+    int rc = gettimeofday(now_r, 0);
+    if (rc) {
+        LIBXL__LOG_ERRNO(CTX, LIBXL__LOG_ERROR, "gettimeofday failed");
+        return ERROR_FAIL;
+    }
+    return 0;
+}
+
+static int time_rel_to_abs(libxl__gc *gc, int ms, struct timeval *abs_out)
+{
+    int rc;
+    struct timeval additional = {
+        .tv_sec = ms / 1000,
+        .tv_usec = (ms % 1000) * 1000
+    };
+    struct timeval now;
+
+    rc = libxl__gettimeofday(gc, &now);
+    if (rc) return rc;
+
+    timeradd(&now, &additional, abs_out);
+    return 0;
+}
+
+static void time_insert_finite(libxl__gc *gc, libxl__ev_time *ev)
+{
+    libxl__ev_time *evsearch;
+    LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
+                              timercmp(&ev->abs, &evsearch->abs, >));
+    ev->infinite = 0;
+}
+
+static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
+                                struct timeval abs)
+{
+    int rc;
+
+    rc = OSEVENT_HOOK(timeout_register, &ev->for_app_reg, abs, ev);
+    if (rc) return rc;
+
+    ev->infinite = 0;
+    ev->abs = abs;
+    time_insert_finite(gc, ev);
+
+    return 0;
+}
+
+static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
+{
+    if (!ev->infinite) {
+        OSEVENT_HOOK_VOID(timeout_deregister, &ev->for_app_reg);
+        LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
+    }
+}
+
+
+int libxl__ev_time_register_abs(libxl__gc *gc, libxl__ev_time *ev,
+                                libxl__ev_time_callback *func,
+                                struct timeval abs)
+{
+    int rc;
+
+    CTX_LOCK;
+
+    rc = time_register_finite(gc, ev, abs);
+    if (rc) goto out;
+
+    ev->func = func;
+
+    rc = 0;
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+
+int libxl__ev_time_register_rel(libxl__gc *gc, libxl__ev_time *ev,
+                                libxl__ev_time_callback *func,
+                                int milliseconds /* as for poll(2) */)
+{
+    struct timeval abs;
+    int rc;
+
+    CTX_LOCK;
+
+    if (milliseconds < 0) {
+        ev->infinite = 1;
+    } else {
+        rc = time_rel_to_abs(gc, milliseconds, &abs);
+        if (rc) goto out;
+
+        rc = time_register_finite(gc, ev, abs);
+        if (rc) goto out;
+    }
+
+    ev->func = func;
+    rc = 0;
+
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+int libxl__ev_time_modify_abs(libxl__gc *gc, libxl__ev_time *ev,
+                              struct timeval abs)
+{
+    int rc;
+
+    CTX_LOCK;
+
+    assert(libxl__ev_time_isregistered(ev));
+
+    if (ev->infinite) {
+        rc = time_register_finite(gc, ev, abs);
+        if (rc) goto out;
+    } else {
+        rc = OSEVENT_HOOK(timeout_modify, &ev->for_app_reg, abs);
+        if (rc) goto out;
+
+        LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
+        ev->abs = abs;
+        time_insert_finite(gc, ev);
+    }
+
+    rc = 0;
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+int libxl__ev_time_modify_rel(libxl__gc *gc, libxl__ev_time *ev,
+                              int milliseconds)
+{
+    struct timeval abs;
+    int rc;
+
+    CTX_LOCK;
+
+    assert(libxl__ev_time_isregistered(ev));
+
+    if (milliseconds < 0) {
+        time_deregister(gc, ev);
+        ev->infinite = 1;
+        rc = 0;
+        goto out;
+    }
+
+    rc = time_rel_to_abs(gc, milliseconds, &abs);
+    if (rc) goto out;
+
+    rc = libxl__ev_time_modify_abs(gc, ev, abs);
+    if (rc) goto out;
+
+    rc = 0;
+ out:
+    CTX_UNLOCK;
+    return rc;
+}
+
+void libxl__ev_time_deregister(libxl__gc *gc, libxl__ev_time *ev)
+{
+    CTX_LOCK;
+
+    if (!libxl__ev_time_isregistered(ev))
+        goto out;
+
+    time_deregister(gc, ev);
+    ev->func = 0;
+
+ out:
+    CTX_UNLOCK;
+    return;
+}
+
+
+/*
+ * xenstore watches
+ */
+
+libxl__ev_xswatch *libxl__watch_slot_contents(libxl__gc *gc, int slotnum)
+{
+    libxl__ev_watch_slot *slot = &CTX->watch_slots[slotnum];
+    libxl__ev_watch_slot *slotcontents = LIBXL_SLIST_NEXT(slot, empty);
+
+    if (slotcontents == NULL ||
+        ((uintptr_t)slotcontents >= (uintptr_t)CTX->watch_slots &&
+         (uintptr_t)slotcontents < (uintptr_t)(CTX->watch_slots +
+                                               CTX->watch_nslots)))
+        /* An empty slot has either a NULL pointer (end of the
+         * free list), or a pointer to another entry in the array.
+         * So we can do a bounds check to distinguish empty from
+         * full slots.
+         */
+        /* We need to do the comparisons as uintptr_t because
+         * comparing pointers which are not in the same object is
+         * undefined behaviour; if the compiler managed to figure
+         * out that watch_slots[0..watch_nslots-1] is all of the
+         * whole array object it could prove that the above bounds
+         * check was always true if it was legal, and remove it!
+         *
+         * uintptr_t because even on a machine with signed
+         * pointers, objects do not cross zero; whereas on
+         * machines with unsigned pointers, they may cross
+         * 0x8bazillion.
+         */
+        return NULL;
+
+        /* see comment near libxl__ev_watch_slot definition */
+    return (void*)slotcontents;
+}
+
+static void libxl__set_watch_slot_contents(libxl__ev_watch_slot *slot,
+                                           libxl__ev_xswatch *w)
+{
+    /* we look a bit behind the curtain of LIBXL_SLIST, to explicitly
+     * assign to the pointer that's the next link.  See the comment
+     * by the definition of libxl__ev_watch_slot */
+    slot->empty.sle_next = (void*)w;
+}
+
+static void watchfd_callback(libxl__egc *egc, libxl__ev_fd *ev,
+                             int fd, short events, short revents)
+{
+    EGC_GC;
+
+    for (;;) {
+        char **event = xs_check_watch(CTX->xsh);
+        if (!event) {
+            if (errno == EAGAIN) break;
+            if (errno == EINTR) continue;
+            LIBXL__EVENT_DISASTER(egc, "cannot check/read watches", errno, 0);
+            return;
+        }
+
+        const char *epath = event[0];
+        const char *token = event[1];
+        int slotnum;
+        uint32_t counterval;
+        int rc = sscanf(token, "%d/%"SCNx32, &slotnum, &counterval);
+        if (rc != 2) {
+            LIBXL__LOG(CTX, LIBXL__LOG_ERROR,
+                       "watch epath=%s token=%s: failed to parse token",
+                       epath, token);
+            /* oh well */
+            goto ignore;
+        }
+        if (slotnum < 0 || slotnum >= CTX->watch_nslots) {
+            /* perhaps in the future we will make the watchslots array shrink */
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "watch epath=%s token=%s:"
+                       " slotnum %d out of range [0,%d>",
+                       epath, token, slotnum, CTX->watch_nslots);
+            goto ignore;
+        }
+
+        libxl__ev_xswatch *w = libxl__watch_slot_contents(gc, slotnum);
+
+        if (!w) {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG,
+                       "watch epath=%s token=%s: empty slot",
+                       epath, token);
+            goto ignore;
+        }
+
+        if (w->counterval != counterval) {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG,
+                       "watch epath=%s token=%s: counter != %"PRIx32,
+                       epath, token, w->counterval);
+            goto ignore;
+        }
+
+        /* Now it's possible, though unlikely, that this was an event
+         * from a previous use of the same slot with the same counterval.
+         *
+         * In that case either:
+         *  - the event path is a child of the watch path, in
+         *    which case this watch would really have generated this
+         *    event if it had been registered soon enough and we are
+         *    OK to give this possibly-spurious event to the caller; or
+         * - it is not, in which case we must suppress it as the
+         *   caller should not see events for unrelated paths.
+         *
+         * See also docs/misc/xenstore.txt.
+         */
+        if (!xs_path_is_subpath(w->path, epath)) {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG,
+                       "watch epath=%s token=%s: not child of wpath=%s",
+                       epath, token, w->path);
+            goto ignore;
+        }
+
+        /* At last, we have checked everything! */
+        LIBXL__LOG(CTX, LIBXL__LOG_DEBUG,
+                   "watch event: epath=%s token=%s wpath=%s w=%p",
+                   epath, token, w->path, w);
+        w->callback(egc, w, w->path, epath);
+
+    ignore:
+        free(event);
+    }
+}
+
+static char *watch_token(libxl__gc *gc, int slotnum, uint32_t counterval)
+{
+    return libxl__sprintf(gc, "%d/%"PRIx32, slotnum, counterval);
+}
+
+int libxl__ev_xswatch_register(libxl__gc *gc, libxl__ev_xswatch *w,
+                               libxl__ev_xswatch_callback *func,
+                               const char *path /* copied */)
+{
+    libxl__ev_watch_slot *use = NULL;
+    char *path_copy = NULL;
+    int rc;
+
+    CTX_LOCK;
+
+    if (!libxl__ev_fd_isregistered(&CTX->watch_efd)) {
+        rc = libxl__ev_fd_register(gc, &CTX->watch_efd, watchfd_callback,
+                                   xs_fileno(CTX->xsh), POLLIN);
+        if (rc) goto out_rc;
+    }
+
+    if (LIBXL_SLIST_EMPTY(&CTX->watch_freeslots)) {
+        /* Free list is empty so there is not in fact a linked
+         * free list in the array and we can safely realloc it */
+        int newarraysize = (CTX->watch_nslots + 1) << 2;
+        int i;
+        libxl__ev_watch_slot *newarray =
+            realloc(CTX->watch_slots, sizeof(*newarray) * newarraysize);
+        if (!newarray) goto out_nomem;
+        for (i = CTX->watch_nslots; i < newarraysize; i++)
+            LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots,
+                                    &newarray[i], empty);
+        CTX->watch_slots = newarray;
+        CTX->watch_nslots = newarraysize;
+    }
+    use = LIBXL_SLIST_FIRST(&CTX->watch_freeslots);
+    assert(use);
+    LIBXL_SLIST_REMOVE_HEAD(&CTX->watch_freeslots, empty);
+
+    path_copy = strdup(path);
+    if (!path_copy) goto out_nomem;
+
+    int slotnum = use - CTX->watch_slots;
+    w->counterval = CTX->watch_counter++;
+
+    if (!xs_watch(CTX->xsh, path, watch_token(gc, slotnum, w->counterval))) {
+        LIBXL__LOG_ERRNOVAL(CTX, LIBXL__LOG_ERROR, errno,
+                            "create watch for path %s", path);
+        rc = ERROR_FAIL;
+        goto out_rc;
+    }
+
+    w->slotnum = slotnum;
+    w->path = path_copy;
+    w->callback = func;
+    libxl__set_watch_slot_contents(use, w);
+
+    CTX_UNLOCK;
+    return 0;
+
+ out_nomem:
+    rc = ERROR_NOMEM;
+ out_rc:
+    if (use)
+        LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots, use, empty);
+    if (path_copy)
+        free(path_copy);
+    CTX_UNLOCK;
+    return rc;
+}
+
+void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
+{
+    /* it is legal to deregister from within _callback */
+    CTX_LOCK;
+
+    if (w->slotnum >= 0) {
+        char *token = watch_token(gc, w->slotnum, w->counterval);
+        if (!xs_unwatch(CTX->xsh, w->path, token))
+            /* Oh well, we will just get watch events forever more
+             * and ignore them.  But we should complain to the log. */
+            LIBXL__LOG_ERRNOVAL(CTX, LIBXL__LOG_ERROR, errno,
+                                "remove watch for path %s", w->path);
+
+        libxl__ev_watch_slot *slot = &CTX->watch_slots[w->slotnum];
+        LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots, slot, empty);
+        w->slotnum = -1;
+    }
+
+    free(w->path);
+    w->path = NULL;
+
+    CTX_UNLOCK;
+}
+
+/*
+ * osevent poll
+ */
+
+int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
+                             struct pollfd *fds, int *timeout_upd,
+                             struct timeval now)
+{
+    libxl__ev_fd *efd;
+    int rc;
+
+    /*
+     * In order to be able to efficiently find the libxl__ev_fd
+     * for a struct poll during _afterpoll, we maintain a shadow
+     * data structure in CTX->fd_beforepolled: each slot in
+     * the fds array corresponds to a slot in fd_beforepolled.
+     */
+
+    GC_INIT(ctx);
+    CTX_LOCK;
+
+    if (*nfds_io) {
+        /*
+         * As an optimisation, we don't touch fd_rindex
+         * if *nfds_io is zero on entry, since in that case the
+         * caller just wanted to know how big an array to give us.
+         *
+         * If !*nfds_io, the unconditional parts below are guaranteed
+         * not to mess with fd_rindex.
+         */
+
+        int maxfd = 0;
+        LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
+            if (!efd->events)
+                continue;
+            if (efd->fd >= maxfd)
+                maxfd = efd->fd + 1;
+        }
+        /* make sure our array is as big as *nfds_io */
+        if (CTX->fd_rindex_allocd < maxfd) {
+            assert(maxfd < INT_MAX / sizeof(int) / 2);
+            int *newarray = realloc(CTX->fd_rindex, sizeof(int) * maxfd);
+            if (!newarray) { rc = ERROR_NOMEM; goto out; }
+            memset(newarray + CTX->fd_rindex_allocd, 0,
+                   sizeof(int) * (maxfd - CTX->fd_rindex_allocd));
+            CTX->fd_rindex = newarray;
+            CTX->fd_rindex_allocd = maxfd;
+        }
+    }
+
+    int used = 0;
+    LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
+        if (!efd->events)
+            continue;
+        if (used < *nfds_io) {
+            fds[used].fd = efd->fd;
+            fds[used].events = efd->events;
+            fds[used].revents = 0;
+            assert(efd->fd < CTX->fd_rindex_allocd);
+            CTX->fd_rindex[efd->fd] = used;
+        }
+        used++;
+    }
+    rc = used <= *nfds_io ? 0 : ERROR_BUFFERFULL;
+
+    *nfds_io = used;
+
+    libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
+    if (etime) {
+        int our_timeout;
+        struct timeval rel;
+        static struct timeval zero;
+
+        timersub(&etime->abs, &now, &rel);
+
+        if (timercmp(&rel, &zero, <)) {
+            our_timeout = 0;
+        } else if (rel.tv_sec >= 2000000) {
+            our_timeout = 2000000000;
+        } else {
+            our_timeout = rel.tv_sec * 1000 + (rel.tv_usec + 999) / 1000;
+        }
+        if (*timeout_upd < 0 || our_timeout < *timeout_upd)
+            *timeout_upd = our_timeout;
+    }
+
+ out:
+    CTX_UNLOCK;
+    GC_FREE;
+    return rc;
+}
+
+static int afterpoll_check_fd(libxl_ctx *ctx,
+                              const struct pollfd *fds, int nfds,
+                              int fd, int events)
+    /* returns mask of events which were requested and occurred */
+{
+    if (fd >= ctx->fd_rindex_allocd)
+        /* added after we went into poll, have to try again */
+        return 0;
+
+    int slot = ctx->fd_rindex[fd];
+
+    if (slot >= nfds)
+        /* stale slot entry; again, added afterwards */
+        return 0;
+
+    if (fds[slot].fd != fd)
+        /* again, stale slot entry */
+        return 0;
+
+    int revents = fds[slot].revents & events;
+    /* we mask in case requested events have changed */
+
+    return revents;
+}
+
+void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
+                             struct timeval now)
+{
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    libxl__ev_fd *efd;
+
+    LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
+        if (!efd->events)
+            continue;
+
+        int revents = afterpoll_check_fd(CTX,fds,nfds, efd->fd,efd->events);
+        if (revents)
+            efd->func(egc, efd, efd->fd, efd->events, revents);
+    }
+
+    for (;;) {
+        libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
+        if (!etime)
+            break;
+
+        assert(!etime->infinite);
+
+        if (timercmp(&etime->abs, &now, >))
+            break;
+
+        time_deregister(gc, etime);
+
+        etime->func(egc, etime, &etime->abs);
+    }
+
+    CTX_UNLOCK;
+    EGC_FREE;
+}
+
+
+/*
+ * osevent hook and callback machinery
+ */
+
+void libxl_osevent_register_hooks(libxl_ctx *ctx,
+                                  const libxl_osevent_hooks *hooks,
+                                  void *user)
+{
+    GC_INIT(ctx);
+    CTX_LOCK;
+    ctx->osevent_hooks = hooks;
+    ctx->osevent_user = user;
+    CTX_UNLOCK;
+    GC_FREE;
+}
+
+
+void libxl_osevent_occurred_fd(libxl_ctx *ctx, void *for_libxl,
+                               int fd, short events, short revents)
+{
+    libxl__ev_fd *ev = for_libxl;
+
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    assert(!CTX->osevent_in_hook);
+
+    assert(fd == ev->fd);
+    revents &= ev->events;
+    if (revents)
+        ev->func(egc, ev, fd, ev->events, revents);
+
+    CTX_UNLOCK;
+    EGC_FREE;
+}
+
+void libxl_osevent_occurred_timeout(libxl_ctx *ctx, void *for_libxl)
+{
+    libxl__ev_time *ev = for_libxl;
+
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    assert(!CTX->osevent_in_hook);
+
+    assert(!ev->infinite);
+    LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
+    ev->func(egc, ev, &ev->abs);
+
+    CTX_UNLOCK;
+    EGC_FREE;
+}
+
+void libxl__event_disaster(libxl__egc *egc, const char *msg, int errnoval,
+                           libxl_event_type type /* may be 0 */,
+                           const char *file, int line, const char *func)
+{
+    EGC_GC;
+
+    libxl__log(CTX, XTL_CRITICAL, errnoval, file, line, func,
+               "DISASTER in event loop: %s%s%s%s",
+               msg,
+               type ? " (relates to event type " : "",
+               type ? libxl_event_type_to_string(type) : "",
+               type ? ")" : "");
+
+    /*
+     * FIXME: This should call the "disaster" hook supplied to
+     * libxl_event_register_callbacks, which will be introduced in the
+     * next patch.
+     */
+
+    const char verybad[] =
+        "DISASTER in event loop not handled by libxl application";
+    LIBXL__LOG(CTX, XTL_CRITICAL, verybad);
+    fprintf(stderr, "libxl: fatal error, exiting program: %s\n", verybad);
+    exit(-1);
+}
+
+void libxl__egc_cleanup(libxl__egc *egc)
+{
+    libxl__free_all(&egc->gc);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
new file mode 100644
index 0000000..63ef65e
--- /dev/null
+++ b/tools/libxl/libxl_event.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (C) 2011      Citrix Ltd.
+ * Author Ian Jackson <ian.jackson@eu.citrix.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; version 2.1 only. with the special
+ * exception on linking described in file LICENSE.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+#ifndef LIBXL_EVENT_H
+#define LIBXL_EVENT_H
+
+#include <libxl.h>
+
+
+/*======================================================================*/
+
+/*
+ * OS event handling - passing low-level OS events to libxl
+ *
+ * Event-driven programs must use these facilities to allow libxl
+ * to become aware of readability/writeability of file descriptors
+ * and the occurrence of timeouts.
+ *
+ * There are two approaches available.  The first is appropriate for
+ * simple programs handling reasonably small numbers of domains:
+ *
+ *   for (;;) {
+ *      libxl_osevent_beforepoll(...)
+ *      poll();
+ *      libxl_osevent_afterpoll(...);
+ *      for (;;) {
+ *        r=libxl_event_check(...);
+ *        if (r==LIBXL_NOT_READY) break;
+ *        if (r) handle failure;
+ *        do something with the event;
+ *      }
+ *   }
+ *
+ * The second approach uses libxl_osevent_register_hooks and is
+ * suitable for programs which are already using a callback-based
+ * event library.
+ *
+ * An application may freely mix the two styles of interaction.
+ *
+ * (Callers inside libxl may not call libxl_osevent_... functions.)
+ */
+
+struct pollfd;
+
+/* The caller should provide beforepoll with some space for libxl's
+ * fds, and tell libxl how much space is available by setting *nfds_io.
+ * fds points to the start of this space (and fds may be a pointer into
+ * a larger array, for example, if the application has some fds of
+ * its own that it is interested in).
+ *
+ * On return *nfds_io will in any case have been updated by libxl
+ * according to how many fds libxl wants to poll on.
+ *
+ * If the space was sufficient, libxl fills in fds[0..<new
+ * *nfds_io>] suitably for poll(2), updates *timeout_upd if needed,
+ * and returns ok.
+ *
+ * If space was insufficient, fds[0..<old *nfds_io>] is undefined on
+ * return; *nfds_io on return will be greater than the value on
+ * entry; *timeout_upd may or may not have been updated; and
+ * libxl_osevent_beforepoll returns ERROR_BUFERFULL.  In this case
+ * the application needs to make more space (enough space for
+ * *nfds_io struct pollfd) and then call beforepoll again, before
+ * entering poll(2).  Typically this will involve calling realloc.
+ *
+ * The application may call beforepoll with fds==NULL and
+ * *nfds_io==0 in order to find out how much space is needed.
+ *
+ * *timeout_upd is as for poll(2): it's in milliseconds, and
+ * negative values mean no timeout (infinity).
+ * libxl_osevent_beforepoll will only reduce the timeout, naturally.
+ */
+int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
+                             struct pollfd *fds, int *timeout_upd,
+                             struct timeval now);
+
+/* nfds and fds[0..nfds] must be from the most recent call to
+ * _beforepoll, as modified by poll.  (It is therefore not possible
+ * to have multiple threads simultaneously polling using this
+ * interface.)
+ *
+ * This function actually performs all of the IO and other actions,
+ * and generates events (libxl_event), which are implied by either
+ * (a) the time of day or (b) both (i) the returned information from
+ * _beforepoll, and (ii) the results from poll specified in
+ * fds[0..nfds-1].  Generated events can then be retrieved by
+ * libxl_event_check.
+ */
+void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
+                             struct timeval now);
+
+
+typedef struct libxl_osevent_hooks {
+  int (*fd_register)(void *user, int fd, void **for_app_registration_out,
+                     short events, void *for_libxl);
+  int (*fd_modify)(void *user, int fd, void **for_app_registration_update,
+                   short events);
+  void (*fd_deregister)(void *user, int fd, void *for_app_registration);
+  int (*timeout_register)(void *user, void **for_app_registration_out,
+                          struct timeval abs, void *for_libxl);
+  int (*timeout_modify)(void *user, void **for_app_registration_update,
+                         struct timeval abs);
+  void (*timeout_deregister)(void *user, void *for_app_registration);
+} libxl_osevent_hooks;
+
+/* The application which calls register_fd_hooks promises to
+ * maintain a register of fds and timeouts that libxl is interested
+ * in, and make calls into libxl (libxl_osevent_occurred_*)
+ * when those fd events and timeouts occur.  This is more efficient
+ * than _beforepoll/_afterpoll if there are many fds (which can
+ * happen if the same libxl application is managing many domains).
+ *
+ * For an fd event, events is as for poll().  register or modify may
+ * be called with events==0, in which case it must still work
+ * normally, just not generate any events.
+ *
+ * For a timeout event, milliseconds is as for poll().
+ * Specifically, negative values of milliseconds mean NO TIMEOUT.
+ * This is used by libxl to temporarily disable a timeout.
+ *
+ * If the register or modify hook succeeds it may update
+ * *for_app_registration_out/_update and must then return 0.
+ * On entry to register, *for_app_registration_out is always NULL.
+ *
+ * A registration or modification hook may fail, in which case it
+ * must leave the registration state of the fd or timeout unchanged.
+ * It may then either return ERROR_OSEVENT_REG_FAIL or any positive
+ * int.  The value returned will be passed up through libxl and
+ * eventually returned back to the application.  When register
+ * fails, any value stored into *for_registration_out is ignored by
+ * libxl; when modify fails, any changed value stored into
+ * *for_registration_update is honoured by libxl and will be passed
+ * to future modify or deregister calls.
+ *
+ * libxl will only attempt to register one callback for any one fd.
+ * libxl will remember the value stored in *for_app_registration_out
+ * (or *for_app_registration_update) by a successful call to
+ * register (or modify), and pass it to subsequent calls to modify
+ * or deregister.
+ *
+ * register_fd_hooks may be called only once for each libxl_ctx.
+ * libxl may make calls to register/modify/deregister from within
+ * any libxl function (indeed, it will usually call register from
+ * register_event_hooks).  Conversely, the application MUST NOT make
+ * the event occurrence calls (libxl_osevent_occurred_*) into libxl
+ * reentrantly from within libxl (for example, from within the
+ * register/modify functions).
+ *
+ * Lock hierarchy: the register/modify/deregister functions may be
+ * called with locks held.  These locks (the "libxl internal locks")
+ * are inside the libxl_ctx.  Therefore, if those register functions
+ * acquire any locks of their own ("caller register locks") outside
+ * libxl, to avoid deadlock one of the following must hold for each
+ * such caller register lock:
+ *  (a) "acquire libxl internal locks before caller register lock":
+ *      No libxl function may be called with the caller register
+ *      lock held.
+ *  (b) "acquire caller register lock before libxl internal locks":
+ *      No libxl function may be called _without_ the caller
+ *      register lock held.
+ * Of these we would normally recommend (a).
+ *
+ * The value *hooks is not copied and must outlast the libxl_ctx.
+ */
+void libxl_osevent_register_hooks(libxl_ctx *ctx,
+                                  const libxl_osevent_hooks *hooks,
+                                  void *user);
+
+/* It is NOT legal to call _occurred_ reentrantly within any libxl
+ * function.  Specifically it is NOT legal to call it from within
+ * a register callback.  Conversely, libxl MAY call register/deregister
+ * from within libxl_event_registered_call_*.
+ */
+
+void libxl_osevent_occurred_fd(libxl_ctx *ctx, void *for_libxl,
+                               int fd, short events, short revents);
+
+/* Implicitly, on entry to this function the timeout has been
+ * deregistered.  If _occurred_timeout is called, libxl will not
+ * call timeout_deregister; if it wants to requeue the timeout it
+ * will call timeout_register again.
+ */
+void libxl_osevent_occurred_timeout(libxl_ctx *ctx, void *for_libxl);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 39e9e05..8c9f7c9 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -35,6 +35,7 @@
 #include <unistd.h>
 
 #include <sys/mman.h>
+#include <sys/poll.h>
 #include <sys/select.h>
 #include <sys/stat.h>
 #include <sys/time.h>
@@ -109,6 +110,71 @@ _hidden void libxl__log(libxl_ctx *ctx, xentoollog_level msglevel, int errnoval,
 
      /* these functions preserve errno (saving and restoring) */
 
+typedef struct libxl__gc libxl__gc;
+typedef struct libxl__egc libxl__egc;
+
+typedef struct libxl__ev_fd libxl__ev_fd;
+typedef void libxl__ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev,
+                                   int fd, short events, short revents);
+struct libxl__ev_fd {
+    /* caller should include this in their own struct */
+    /* read-only for caller, who may read only when registered: */
+    int fd;
+    short events;
+    libxl__ev_fd_callback *func;
+    /* remainder is private for libxl__ev_fd... */
+    LIBXL_LIST_ENTRY(libxl__ev_fd) entry;
+    void *for_app_reg;
+};
+
+
+typedef struct libxl__ev_time libxl__ev_time;
+typedef void libxl__ev_time_callback(libxl__egc *egc, libxl__ev_time *ev,
+                                     const struct timeval *requested_abs);
+struct libxl__ev_time {
+    /* caller should include this in their own struct */
+    /* read-only for caller, who may read only when registered: */
+    libxl__ev_time_callback *func;
+    /* remainder is private for libxl__ev_time... */
+    int infinite; /* not registered in list or with app if infinite */
+    LIBXL_TAILQ_ENTRY(libxl__ev_time) entry;
+    struct timeval abs;
+    void *for_app_reg;
+};
+
+typedef struct libxl__ev_xswatch libxl__ev_xswatch;
+typedef void libxl__ev_xswatch_callback(libxl__egc *egc, libxl__ev_xswatch*,
+                            const char *watch_path, const char *event_path);
+struct libxl__ev_xswatch {
+    /* caller should include this in their own struct */
+    /* read-only for caller, who may read only when registered: */
+    char *path;
+    libxl__ev_xswatch_callback *callback;
+    /* remainder is private for libxl__ev_xswatch... */
+    int slotnum; /* registered iff slotnum >= 0 */
+    uint32_t counterval;
+};
+
+/*
+ * An entry in the watch_slots table is either:
+ *  1. an entry in the free list, ie NULL or pointer to next free list entry
+ *  2. an pointer to a libxl__ev_xswatch
+ *
+ * But we don't want to use unions or type-punning because the
+ * compiler might "prove" that our code is wrong and misoptimise it.
+ *
+ * The rules say that all struct pointers have identical
+ * representation and alignment requirements (C99+TC1+TC2 6.2.5p26) so
+ * what we do is simply declare our array as containing only the free
+ * list pointers, and explicitly convert from and to our actual
+ * xswatch pointers when we store and retrieve them.
+ */
+typedef struct libxl__ev_watch_slot {
+    LIBXL_SLIST_ENTRY(struct libxl__ev_watch_slot) empty;
+} libxl__ev_watch_slot;
+    
+libxl__ev_xswatch *libxl__watch_slot_contents(libxl__gc *gc, int slotnum);
+
 struct libxl__ctx {
     xentoollog_logger *lg;
     xc_interface *xch;
@@ -127,6 +193,23 @@ struct libxl__ctx {
        * documented in the libxl public interface.
        */
 
+    int osevent_in_hook;
+    const libxl_osevent_hooks *osevent_hooks;
+    void *osevent_user;
+      /* See the comment for OSEVENT_HOOK_INTERN in libxl_event.c
+       * for restrictions on the use of the osevent fields. */
+
+    int fd_rindex_allocd;
+    int *fd_rindex; /* see libxl_osevent_beforepoll */
+    LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
+    LIBXL_TAILQ_HEAD(, libxl__ev_time) etimes;
+
+    libxl__ev_watch_slot *watch_slots;
+    int watch_nslots;
+    LIBXL_SLIST_HEAD(, libxl__ev_watch_slot) watch_freeslots;
+    uint32_t watch_counter; /* helps disambiguate slot reuse */
+    libxl__ev_fd watch_efd;
+
     /* for callers who reap children willy-nilly; caller must only
      * set this after libxl_init and before any other call - or
      * may leave them untouched */
@@ -157,12 +240,17 @@ typedef struct {
 
 #define PRINTF_ATTRIBUTE(x, y) __attribute__((format(printf, x, y)))
 
-typedef struct {
+struct libxl__gc {
     /* mini-GC */
     int alloc_maxsize;
     void **alloc_ptrs;
     libxl_ctx *owner;
-} libxl__gc;
+};
+
+struct libxl__egc {
+    /* for event-generating functions only */
+    struct libxl__gc gc;
+};
 
 #define LIBXL_INIT_GC(gc,ctx) do{               \
         (gc).alloc_maxsize = 0;                 \
@@ -234,6 +322,140 @@ _hidden bool libxl__xs_mkdir(libxl__gc *gc, xs_transaction_t t,
 
 _hidden char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid);
 
+
+/*
+ * Event generation functions provided by the libxl event core to the
+ * rest of libxl.  Implemented in terms of _beforepoll/_afterpoll
+ * and/or the fd registration machinery, as provided by the
+ * application.
+ *
+ * Semantics are similar to those of the fd and timeout registration
+ * functions provided to libxl_osevent_register_hooks.
+ *
+ * Non-0 returns from libxl__ev_{modify,deregister} have already been
+ * logged by the core and should be returned unmodified to libxl's
+ * caller; NB that they may be valid libxl error codes but they may
+ * also be positive numbers supplied by the caller.
+ *
+ * In each case, there is a libxl__ev_FOO structure which can be in
+ * one of three states:
+ *
+ *   Undefined   - Might contain anything.  All-bits-zero is
+ *                 an undefined state.
+ *
+ *   Idle        - Struct contents are defined enough to pass to any
+ *                 libxl__ev_FOO function but not registered and
+ *                 callback will not be called.  The struct does not
+ *                 contain references to any allocated resources so
+ *                 can be thrown away.
+ *
+ *   Active      - Request for events has been registered and events
+ *                 may be generated.  _deregister must be called to
+ *                 reclaim resources.
+ *
+ * These functions are provided for each kind of event KIND:
+ *
+ *   int libxl__ev_KIND_register(libxl__gc *gc, libxl__ev_KIND *GEN,
+ *                              libxl__ev_KIND_callback *FUNC,
+ *                              DETAILS);
+ *      On entry *GEN must be in state Undefined or Idle.
+ *      Returns a libxl error code; on error return *GEN is Idle.
+ *      On successful return *GEN is Active and FUNC wil be
+ *      called by the event machinery in future.  FUNC will
+ *      not be called from within the call to _register.
+ *      FUNC will be called with the context locked (with CTX_LOCK).
+ *
+ *  void libxl__ev_KIND_deregister(libxl__gc *gc, libxl__ev_KIND *GEN_upd);
+ *      On entry *GEN must be in state Active or Idle.
+ *      On return it is Idle.  (Idempotent.)
+ *
+ *  void libxl__ev_KIND_init(libxl__ev_KIND *GEN);
+ *      Provided for initialising an Undefined KIND.
+ *      On entry *GEN must be in state Idle or Undefined.
+ *      On return it is Idle.  (Idempotent.)
+ *
+ *  int libxl__ev_KIND_isregistered(const libxl__ev_KIND *GEN);
+ *      On entry *GEN must be Idle or Active.
+ *      Returns nonzero if it is Active, zero otherwise.
+ *      Cannot fail.
+ *
+ *  int libxl__ev_KIND_modify(libxl__gc*, libxl__ev_KIND *GEN,
+ *                            DETAILS);
+ *      Only provided for some kinds of generator.
+ *      On entry *GEN must be Active and on return, whether successful
+ *      or not, it will be Active.
+ *      Returns a libxl error code; on error the modification
+ *      is not effective.
+ *
+ * All of these functions are fully threadsafe and may be called by
+ * general code in libxl even from within event callback FUNCs.
+ * The ctx will be locked on entry to each FUNC and FUNC should not
+ * unlock it.
+ *
+ * Callers of libxl__ev_KIND_register must ensure that the
+ * registration is undone, with _deregister, in libxl_ctx_free.
+ */
+
+
+_hidden int libxl__ev_fd_register(libxl__gc*, libxl__ev_fd *ev_out,
+                                  libxl__ev_fd_callback*,
+                                  int fd, short events /* as for poll(2) */);
+_hidden int libxl__ev_fd_modify(libxl__gc*, libxl__ev_fd *ev,
+                                short events);
+_hidden void libxl__ev_fd_deregister(libxl__gc*, libxl__ev_fd *ev);
+static inline void libxl__ev_fd_init(libxl__ev_fd *efd)
+                    { efd->fd = -1; }
+static inline int libxl__ev_fd_isregistered(libxl__ev_fd *efd)
+                    { return efd->fd >= 0; }
+
+_hidden int libxl__ev_time_register_rel(libxl__gc*, libxl__ev_time *ev_out,
+                                        libxl__ev_time_callback*,
+                                        int milliseconds /* as for poll(2) */);
+_hidden int libxl__ev_time_register_abs(libxl__gc*, libxl__ev_time *ev_out,
+                                        libxl__ev_time_callback*,
+                                        struct timeval);
+_hidden int libxl__ev_time_modify_rel(libxl__gc*, libxl__ev_time *ev,
+                                      int milliseconds /* as for poll(2) */);
+_hidden int libxl__ev_time_modify_abs(libxl__gc*, libxl__ev_time *ev,
+                                      struct timeval);
+_hidden void libxl__ev_time_deregister(libxl__gc*, libxl__ev_time *ev);
+static inline void libxl__ev_time_init(libxl__ev_time *ev)
+                { ev->func = 0; }
+static inline int libxl__ev_time_isregistered(libxl__ev_time *ev)
+                { return !!ev->func; }
+
+
+_hidden int libxl__ev_xswatch_register(libxl__gc*, libxl__ev_xswatch *xsw_out,
+                                       libxl__ev_xswatch_callback*,
+                                       const char *path /* copied */);
+_hidden void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch*);
+
+static inline void libxl__ev_xswatch_init(libxl__ev_xswatch *xswatch_out)
+                { xswatch_out->slotnum = -1; }
+static inline int libxl__ev_xswatch_isregistered(const libxl__ev_xswatch *xw)
+                { return xw->slotnum >= 0; }
+
+
+/*
+ * In general, call this via the macro LIBXL__EVENT_DISASTER.
+ *
+ * Event-generating functions may call this if they might have wanted
+ * to generate an event (either an internal one ie a
+ * libxl__ev_FOO_callback or an application event), but are prevented
+ * from doing so due to eg lack of memory.
+ *
+ * NB that this function may return and the caller isn't supposed to
+ * then crash, although it may fail (and henceforth leave things in a
+ * state where many or all calls fail).
+ */
+_hidden void libxl__event_disaster(libxl__egc*, const char *msg, int errnoval,
+                                   libxl_event_type type /* may be 0 */,
+                                   const char *file, int line,
+                                   const char *func);
+#define LIBXL__EVENT_DISASTER(egc, msg, errnoval, type) \
+    libxl__event_disaster(egc, msg, errnoval, type, __FILE__,__LINE__,__func__)
+
+
 /* from xl_dom */
 _hidden libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_shutdown_reason(libxl__gc *gc, uint32_t domid);
@@ -600,6 +822,8 @@ _hidden int libxl__parse_mac(const char *s, libxl_mac mac);
 /* compare mac address @a and @b. 0 if the same, -ve if a<b and +ve if a>b */
 _hidden int libxl__compare_macs(libxl_mac *a, libxl_mac *b);
 
+_hidden int libxl__gettimeofday(libxl__gc *gc, struct timeval *now_r);
+
 #define STRINGIFY(x) #x
 #define TOSTRING(x) STRINGIFY(x)
 
@@ -737,6 +961,55 @@ libxl__device_model_version_running(libxl__gc *gc, uint32_t domid);
 
 
 /*
+ * Calling context and GC for event-generating functions:
+ *
+ * These are for use by parts of libxl which directly or indirectly
+ * call libxl__event_occurred.  These contain a gc but also a list of
+ * deferred events.
+ *
+ * You should never need to initialise an egc unless you are part of
+ * the event machinery itself.  Otherwise you will always be given an
+ * egc if you need one.  Even functions which generate specific kinds
+ * of events don't need to - rather, they will be passed an egc into
+ * their own callback function and should just use the one they're
+ * given.
+ *
+ * Functions using LIBXL__INIT_EGC may *not* generally be called from
+ * within libxl, because libxl__egc_cleanup may call back into the
+ * application.  This should be documented near the function
+ * prototype(s) for callers of LIBXL__INIT_EGC and EGC_INIT.  You
+ * should in any case not find it necessary to call egc-creators from
+ * within libxl.
+ *
+ * For the same reason libxl__egc_cleanup (or EGC_FREE) must be called
+ * with the ctx *unlocked*.  So the right pattern has the EGC_...
+ * macro calls on the outside of the CTX_... ones.
+ */
+
+/* useful for all functions which take an egc: */
+
+#define EGC_GC                                  \
+    libxl__gc *const gc = &egc->gc
+
+/* egc initialisation and destruction: */
+
+#define LIBXL_INIT_EGC(egc,ctx) do{             \
+        LIBXL_INIT_GC((egc).gc,ctx);            \
+        /* list of occurred events tbd */       \
+    } while(0)
+
+_hidden void libxl__egc_cleanup(libxl__egc *egc);
+
+/* convenience macros: */
+
+#define EGC_INIT(ctx)                       \
+    libxl__egc egc[1]; LIBXL_INIT_EGC(egc[0],ctx);      \
+    EGC_GC
+
+#define EGC_FREE           libxl__egc_cleanup(egc)
+
+
+/*
  * Convenience macros.
  */
 
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 2/9] ocaml, libxl: support "private" fields
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
  2012-01-13 19:25 ` [PATCH 1/9] libxl: New API for providing OS events to libxl Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-18 14:03   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 3/9] libxl: New event generation API Ian Jackson
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

The changeset
  24378:b4365e2c2595  libxl: idl: support new "private" type attribute
is not complete.  Actually using this feature does not work because
the ocaml idl generator does not know about it.

So add that support.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/ocaml/libs/xl/genwrap.py |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/tools/ocaml/libs/xl/genwrap.py b/tools/ocaml/libs/xl/genwrap.py
index 5f8639a..61abecf 100644
--- a/tools/ocaml/libs/xl/genwrap.py
+++ b/tools/ocaml/libs/xl/genwrap.py
@@ -91,6 +91,8 @@ def gen_ocaml_ml(ty, interface, indent=""):
             s += "\t{\n"
             
         for f in ty.fields:
+            if f.type.private:
+                continue
             x = ocaml_instance_of(f.type, f.name)
             x = x.replace("\n", "\n\t\t")
             s += "\t\t" + x + ";\n"
@@ -146,6 +148,8 @@ def c_val(ty, c, o, indent="", parent = None):
     elif isinstance(ty, libxltypes.Aggregate) and (parent is None):
         n = 0
         for f in ty.fields:
+            if f.type.private:
+                continue
             s += "%s\n" % c_val(f.type, "%s->%s" % (c, f.name), "Field(%s, %d)" % (o,n), parent="%s->" % (c))
             n = n + 1
     else:
@@ -210,6 +214,8 @@ def ocaml_Val(ty, o, c, indent="", parent = None):
         
         n = 0
         for f in ty.fields:
+            if f.type.private:
+                continue
             s += "\n"
             s += "\t%s\n" % ocaml_Val(f.type, "%s_field" % ty.rawname, "%s->%s" % (c,f.name), parent="%s->" % c)
             s += "\tStore_field(%s, %d, %s);\n" % (o, n, "%s_field" % ty.rawname)
@@ -288,6 +294,8 @@ if __name__ == '__main__':
     cinc.write(autogen_header("/*", "*/"))
 
     for ty in types:
+        if ty.private:
+            continue
         #sys.stdout.write(" TYPE    %-20s " % ty.rawname)
         ml.write(gen_ocaml_ml(ty, False))
         ml.write("\n")
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 3/9] libxl: New event generation API
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
  2012-01-13 19:25 ` [PATCH 1/9] libxl: New API for providing OS events to libxl Ian Jackson
  2012-01-13 19:25 ` [PATCH 2/9] ocaml, libxl: support "private" fields Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-18 17:33   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 4/9] libxl: introduce libxl_fd_set_nonblock, rationalise _cloexec Ian Jackson
                   ` (5 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Replace the existing API for retrieving high-level events (events
about domains, etc.) from libxl with a new one.

This changes the definition and semantics of the `libxl_event'
structure, and replaces the calls for obtaining information about
domain death and disk eject events.

This is an incompatible change, sorry.  The alternative was to try to
provide both the previous horrid API and the new one, and would also
involve never using the name `libxl_event' for the new interface.

The new "libxl_event" structure is blacklisted in the ocaml bindings
for two reasons:
  - It has a field name "type" (which is a keyword in ocaml);
    the ocaml idl generator should massage this field name on
    output, to "type_" perhaps.
  - The ocaml idl generator does not support KeyedUnion.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl.c            |  329 +++++++++++++++++++++++++++++-----------
 tools/libxl/libxl.h            |   55 +------
 tools/libxl/libxl_event.c      |  236 ++++++++++++++++++++++++++---
 tools/libxl/libxl_event.h      |  183 ++++++++++++++++++++++-
 tools/libxl/libxl_internal.h   |   77 +++++++++-
 tools/libxl/libxl_types.idl    |   34 ++++-
 tools/libxl/xl_cmdimpl.c       |  270 +++++++++++++++++++--------------
 tools/ocaml/libs/xl/genwrap.py |    1 +
 8 files changed, 908 insertions(+), 277 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 413b684..19ff12c 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -45,8 +45,11 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
      * only as an initialiser, not as an expression. */
     memcpy(&ctx->lock, &mutex_value, sizeof(ctx->lock));
 
+    LIBXL_TAILQ_INIT(&ctx->occurred);
+
     ctx->osevent_hooks = 0;
 
+    ctx->fd_polls = 0;
     ctx->fd_rindex = 0;
     LIBXL_LIST_INIT(&ctx->efds);
     LIBXL_TAILQ_INIT(&ctx->etimes);
@@ -55,6 +58,9 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
     LIBXL_SLIST_INIT(&ctx->watch_freeslots);
     libxl__ev_fd_init(&ctx->watch_efd);
 
+    LIBXL_TAILQ_INIT(&ctx->death_list);
+    libxl__ev_xswatch_init(&ctx->death_watch);
+
     if ( stat(XENSTORE_PID_FILE, &stat_buf) != 0 ) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Is xenstore daemon running?\n"
                      "failed to stat %s", XENSTORE_PID_FILE);
@@ -86,6 +92,20 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
     return rc;
 }
 
+static void free_disable_deaths(libxl__gc *gc,
+                                struct libxl__evgen_domain_death_list *l) {
+    libxl_evgen_domain_death *death;
+    while ((death = LIBXL_TAILQ_FIRST(l)))
+        libxl__evdisable_domain_death(gc, death);
+}
+
+static void discard_events(struct libxl__event_list *l) {
+    /* doesn't bother unlinking from the list, so l is corrupt on return */
+    libxl_event *ev;
+    LIBXL_TAILQ_FOREACH(ev, l, link)
+        libxl_event_free(0, ev);
+}
+
 int libxl_ctx_free(libxl_ctx *ctx)
 {
     if (!ctx) return 0;
@@ -95,6 +115,13 @@ int libxl_ctx_free(libxl_ctx *ctx)
 
     /* Deregister all libxl__ev_KINDs: */
 
+    free_disable_deaths(gc, &CTX->death_list);
+    free_disable_deaths(gc, &CTX->death_reported);
+
+    libxl_evgen_disk_eject *eject;
+    while ((eject = LIBXL_LIST_FIRST(&CTX->disk_eject_evgens)))
+        libxl__evdisable_disk_eject(gc, eject);
+
     for (i = 0; i < ctx->watch_nslots; i++)
         assert(!libxl__watch_slot_contents(gc, i));
     libxl__ev_fd_deregister(gc, &ctx->watch_efd);
@@ -108,9 +135,12 @@ int libxl_ctx_free(libxl_ctx *ctx)
     libxl_version_info_dispose(&ctx->version_info);
     if (ctx->xsh) xs_daemon_close(ctx->xsh);
 
+    free(ctx->fd_polls);
     free(ctx->fd_rindex);
     free(ctx->watch_slots);
 
+    discard_events(&ctx->occurred);
+
     GC_FREE;
     free(ctx);
     return 0;
@@ -646,117 +676,176 @@ int libxl_domain_reboot(libxl_ctx *ctx, uint32_t domid)
     return ret;
 }
 
-int libxl_get_wait_fd(libxl_ctx *ctx, int *fd)
-{
-    *fd = xs_fileno(ctx->xsh);
-    return 0;
-}
+static void domain_death_xswatch_callback(libxl__egc *egc, libxl__ev_xswatch *w,
+                                        const char *wpath, const char *epath) {
+    EGC_GC;
+    libxl_evgen_domain_death *evg;
+    uint32_t domid;
+    int rc;
 
-int libxl_wait_for_domain_death(libxl_ctx *ctx, uint32_t domid, libxl_waiter *waiter)
-{
-    waiter->path = strdup("@releaseDomain");
-    if (asprintf(&(waiter->token), "%d", LIBXL_EVENT_TYPE_DOMAIN_DEATH) < 0)
-        return -1;
-    if (!xs_watch(ctx->xsh, waiter->path, waiter->token))
-        return -1;
-    return 0;
-}
+    CTX_LOCK;
 
-int libxl_wait_for_disk_ejects(libxl_ctx *ctx, uint32_t guest_domid, libxl_device_disk *disks, int num_disks, libxl_waiter *waiter)
-{
-    GC_INIT(ctx);
-    int i, rc = -1;
-    uint32_t domid = libxl_get_stubdom_id(ctx, guest_domid);
+    evg = LIBXL_TAILQ_FIRST(&CTX->death_list);
+    if (!evg) goto out;
 
-    if (!domid)
-        domid = guest_domid;
+    domid = evg->domid;
 
-    for (i = 0; i < num_disks; i++) {
-        if (asprintf(&(waiter[i].path), "%s/device/vbd/%d/eject",
-                     libxl__xs_get_dompath(gc, domid),
-                     libxl__device_disk_dev_number(disks[i].vdev,
-                                                   NULL, NULL)) < 0)
-            goto out;
-        if (asprintf(&(waiter[i].token), "%d", LIBXL_EVENT_TYPE_DISK_EJECT) < 0)
+    for (;;) {
+        int nentries = LIBXL_TAILQ_NEXT(evg, entry) ? 200 : 1;
+        xc_domaininfo_t domaininfos[nentries];
+        const xc_domaininfo_t *got = domaininfos, *gotend;
+
+        rc = xc_domain_getinfolist(CTX->xch, domid, nentries, domaininfos);
+        if (rc == -1) {
+            LIBXL__EVENT_DISASTER(egc, "xc_domain_getinfolist failed while"
+                                  " processing @releaseDomain watch event",
+                                  errno, 0);
             goto out;
-        xs_watch(ctx->xsh, waiter[i].path, waiter[i].token);
+        }
+        gotend = &domaininfos[rc];
+
+        for (;;) {
+            if (!evg)
+                goto all_reported;
+
+            if (!rc || got->domain > evg->domid) {
+                /* ie, the list doesn't contain evg->domid any more so
+                 * the domain has been destroyed */
+                libxl_evgen_domain_death *evg_next;
+
+                libxl_event *ev = NEW_EVENT(egc, DOMAIN_DESTROY, evg->domid);
+                if (!ev) goto out;
+
+                libxl__event_occurred(egc, ev);
+
+                evg->death_reported = 1;
+                evg_next = LIBXL_TAILQ_NEXT(evg, entry);
+                LIBXL_TAILQ_REMOVE(&CTX->death_list, evg, entry);
+                LIBXL_TAILQ_INSERT_HEAD(&CTX->death_reported, evg, entry);
+                evg = evg_next;
+
+                continue;
+            }
+            
+            if (got == gotend)
+                break;
+
+            if (got->domain < evg->domid) {
+                got++;
+                continue;
+            }
+
+            assert(evg->domid == got->domain);
+
+            if (!evg->shutdown_reported &&
+                (got->flags & XEN_DOMINF_shutdown)) {
+                libxl_event *ev = NEW_EVENT(egc, DOMAIN_SHUTDOWN, got->domain);
+                if (!ev) goto out;
+                
+                ev->u.domain_shutdown.shutdown_reason =
+                    (got->flags >> XEN_DOMINF_shutdownshift) &
+                    XEN_DOMINF_shutdownmask;
+                libxl__event_occurred(egc, ev);
+
+                evg->shutdown_reported = 1;
+            }
+            evg = LIBXL_TAILQ_NEXT(evg, entry);
+        }
+
+        assert(rc); /* rc==0 results in us eating all evgs and quitting */
+        domid = gotend[-1].domain;
     }
-    rc = 0;
-out:
-    GC_FREE;
-    return rc;
+ all_reported:
+ out:
+
+    CTX_UNLOCK;
 }
 
-int libxl_get_event(libxl_ctx *ctx, libxl_event *event)
-{
-    unsigned int num;
-    char **events = xs_read_watch(ctx->xsh, &num);
-    if (num != 2) {
-        free(events);
-        return ERROR_FAIL;
+int libxl_evenable_domain_death(libxl_ctx *ctx, uint32_t domid,
+                libxl_ev_user user, libxl_evgen_domain_death **evgen_out) {
+    GC_INIT(ctx);
+    libxl_evgen_domain_death *evg, *evg_search;
+    int rc;
+    
+    CTX_LOCK;
+
+    evg = malloc(sizeof(*evg));  if (!evg) { rc = ERROR_NOMEM; goto out; }
+    memset(evg, 0, sizeof(*evg));
+    evg->domid = domid;
+    evg->user = user;
+
+    LIBXL_TAILQ_INSERT_SORTED(&ctx->death_list, entry, evg, evg_search, ,
+                              evg->domid > evg_search->domid);
+
+    if (!libxl__ev_xswatch_isregistered(&ctx->death_watch)) {
+        rc = libxl__ev_xswatch_register(gc, &ctx->death_watch,
+                        domain_death_xswatch_callback, "@releaseDomain");
+        if (rc) { libxl__evdisable_domain_death(gc, evg); goto out; }
     }
-    event->path = strdup(events[XS_WATCH_PATH]);
-    event->token = strdup(events[XS_WATCH_TOKEN]);
-    event->type = atoi(event->token);
-    free(events);
-    return 0;
-}
 
-int libxl_stop_waiting(libxl_ctx *ctx, libxl_waiter *waiter)
-{
-    if (!xs_unwatch(ctx->xsh, waiter->path, waiter->token))
-        return ERROR_FAIL;
-    else
-        return 0;
-}
+    *evgen_out = evg;
+    rc = 0;
 
-int libxl_free_event(libxl_event *event)
-{
-    free(event->path);
-    free(event->token);
-    return 0;
-}
+ out:
+    CTX_UNLOCK;
+    return rc;
+};
 
-int libxl_free_waiter(libxl_waiter *waiter)
-{
-    free(waiter->path);
-    free(waiter->token);
-    return 0;
-}
+void libxl__evdisable_domain_death(libxl__gc *gc,
+                                   libxl_evgen_domain_death *evg) {
+    CTX_LOCK;
 
-int libxl_event_get_domain_death_info(libxl_ctx *ctx, uint32_t domid, libxl_event *event, libxl_dominfo *info)
-{
-    if (libxl_domain_info(ctx, info, domid) < 0)
-        return 0;
+    if (!evg->death_reported)
+        LIBXL_TAILQ_REMOVE(&CTX->death_list, evg, entry);
+    else
+        LIBXL_TAILQ_REMOVE(&CTX->death_reported, evg, entry);
 
-    if (info->running || (!info->shutdown && !info->dying))
-        return ERROR_INVAL;
+    free(evg);
 
-    return 1;
+    if (!LIBXL_TAILQ_FIRST(&CTX->death_list) &&
+        libxl__ev_xswatch_isregistered(&CTX->death_watch))
+        libxl__ev_xswatch_deregister(gc, &CTX->death_watch);
+
+    CTX_UNLOCK;
 }
 
-int libxl_event_get_disk_eject_info(libxl_ctx *ctx, uint32_t domid, libxl_event *event, libxl_device_disk *disk)
-{
+void libxl_evdisable_domain_death(libxl_ctx *ctx,
+                                  libxl_evgen_domain_death *evg) {
     GC_INIT(ctx);
-    char *path;
+    libxl__evdisable_domain_death(gc, evg);
+    GC_FREE;
+}
+
+static void disk_eject_xswatch_callback(libxl__egc *egc, libxl__ev_xswatch *w,
+                                        const char *wpath, const char *epath) {
+    EGC_GC;
+    libxl_evgen_disk_eject *evg = (void*)w;
     char *backend;
     char *value;
     char backend_type[BACKEND_STRING_SIZE+1];
 
-    value = libxl__xs_read(gc, XBT_NULL, event->path);
+    value = libxl__xs_read(gc, XBT_NULL, wpath);
 
-    if (!value || strcmp(value,  "eject")) {
-        GC_FREE;
-        return 0;
+    if (!value || strcmp(value,  "eject"))
+        return;
+
+    if (libxl__xs_write(gc, XBT_NULL, wpath, "")) {
+        LIBXL__EVENT_DISASTER(egc, "xs_write failed acknowledging eject",
+                              errno, LIBXL_EVENT_TYPE_DISK_EJECT);
+        return;
     }
 
-    path = strdup(event->path);
-    path[strlen(path) - 6] = '\0';
-    backend = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/backend", path));
+    libxl_event *ev = NEW_EVENT(egc, DISK_EJECT, evg->domid);
+    libxl_device_disk *disk = &ev->u.disk_eject.disk;
+    
+    backend = libxl__xs_read(gc, XBT_NULL,
+                             libxl__sprintf(gc, "%.*s/backend",
+                                            (int)strlen(wpath)-6, wpath));
 
     sscanf(backend,
-            "/local/domain/%d/backend/%" TOSTRING(BACKEND_STRING_SIZE) "[a-z]/%*d/%*d",
-            &disk->backend_domid, backend_type);
+            "/local/domain/%d/backend/%" TOSTRING(BACKEND_STRING_SIZE)
+           "[a-z]/%*d/%*d",
+           &disk->backend_domid, backend_type);
     if (!strcmp(backend_type, "tap") || !strcmp(backend_type, "vbd")) {
         disk->backend = LIBXL_DISK_BACKEND_TAP;
     } else if (!strcmp(backend_type, "qdisk")) {
@@ -765,19 +854,83 @@ int libxl_event_get_disk_eject_info(libxl_ctx *ctx, uint32_t domid, libxl_event
         disk->backend = LIBXL_DISK_BACKEND_UNKNOWN;
     }
 
-    disk->pdev_path = strdup("");
+    disk->pdev_path = strdup(""); /* xxx fixme malloc failure */
     disk->format = LIBXL_DISK_FORMAT_EMPTY;
     /* this value is returned to the user: do not free right away */
-    disk->vdev = xs_read(ctx->xsh, XBT_NULL, libxl__sprintf(gc, "%s/dev", backend), NULL);
+    disk->vdev = xs_read(CTX->xsh, XBT_NULL,
+                         libxl__sprintf(gc, "%s/dev", backend), NULL);
     disk->removable = 1;
     disk->readwrite = 0;
     disk->is_cdrom = 1;
 
-    free(path);
+    libxl__event_occurred(egc, ev);
+}
+
+int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t guest_domid,
+                              const char *vdev, libxl_ev_user user,
+                              libxl_evgen_disk_eject **evgen_out) {
+    GC_INIT(ctx);
+    CTX_LOCK;
+    int rc;
+    char *path;
+    libxl_evgen_disk_eject *evg = NULL;
+
+    evg = malloc(sizeof(*evg));  if (!evg) { rc = ERROR_NOMEM; goto out; }
+    memset(evg, 0, sizeof(*evg));
+    evg->user = user;
+    evg->domid = guest_domid;
+    LIBXL_LIST_INSERT_HEAD(&CTX->disk_eject_evgens, evg, entry);
+
+    evg->vdev = strdup(vdev);
+    if (!evg->vdev) { rc = ERROR_NOMEM; goto out; }
+
+    uint32_t domid = libxl_get_stubdom_id(ctx, guest_domid);
+
+    if (!domid)
+        domid = guest_domid;
+
+    path = libxl__sprintf(gc, "%s/device/vbd/%d/eject",
+                 libxl__xs_get_dompath(gc, domid),
+                 libxl__device_disk_dev_number(vdev, NULL, NULL));
+    if (!path) { rc = ERROR_NOMEM; goto out; }
+
+    rc = libxl__ev_xswatch_register(gc, &evg->watch,
+                                    disk_eject_xswatch_callback, path);
+    if (rc) goto out;
+
+    *evgen_out = evg;
+    CTX_UNLOCK;
     GC_FREE;
-    return 1;
+    return 0;
+
+ out:
+    if (evg)
+        libxl__evdisable_disk_eject(gc, evg);
+    CTX_UNLOCK;
+    GC_FREE;
+    return rc;
+}
+
+void libxl__evdisable_disk_eject(libxl__gc *gc, libxl_evgen_disk_eject *evg) {
+    CTX_LOCK;
+
+    LIBXL_LIST_REMOVE(evg, entry);
+
+    if (libxl__ev_xswatch_isregistered(&evg->watch))
+        libxl__ev_xswatch_deregister(gc, &evg->watch);
+
+    free(evg->vdev);
+    free(evg);
+
+    CTX_UNLOCK;
 }
 
+void libxl_evdisable_disk_eject(libxl_ctx *ctx, libxl_evgen_disk_eject *evg) {
+    GC_INIT(ctx);
+    libxl__evdisable_disk_eject(gc, evg);
+    GC_FREE;
+}    
+
 int libxl_domain_destroy(libxl_ctx *ctx, uint32_t domid)
 {
     GC_INIT(ctx);
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index b067724..4d3391f 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -53,7 +53,10 @@
  *    A public function may be called from within libxl; the call
  *    context initialisation macros will make sure that the internal
  *    caller's context is reused (eg, so that the same xenstore
- *    transaction is used).
+ *    transaction is used).  But in-libxl callers of libxl public
+ *    functions should note that any libxl public function may cause
+ *    recursively reentry into libxl via the application's event
+ *    callback hook.
  *
  *    Public functions have names like libxl_foobar.
  *
@@ -152,6 +155,8 @@ void libxl_key_value_list_dispose(libxl_key_value_list *kvl);
 
 typedef uint32_t libxl_hwcap[8];
 
+typedef uint64_t libxl_ev_user;
+
 typedef struct {
     uint32_t size;          /* number of bytes in map */
     uint8_t *map;
@@ -200,6 +205,9 @@ typedef struct {
     int v;
 } libxl_enum_string_table;
 
+struct libxl_event;
+typedef LIBXL_TAILQ_ENTRY(struct libxl_event) libxl_ev_link;
+
 typedef struct libxl__ctx libxl_ctx;
 
 #include "_libxl_types.h"
@@ -300,51 +308,6 @@ int libxl_run_bootloader(libxl_ctx *ctx,
 
   /* 0 means ERROR_ENOMEM, which we have logged */
 
-/* events handling */
-
-typedef struct {
-    /* event type */
-    libxl_event_type type;
-    /* data for internal use of the library */
-    char *path;
-    char *token;
-} libxl_event;
-
-typedef struct {
-    char *path;
-    char *token;
-} libxl_waiter;
-
-
-int libxl_get_wait_fd(libxl_ctx *ctx, int *fd);
-/* waiter is allocated by the caller */
-int libxl_wait_for_domain_death(libxl_ctx *ctx, uint32_t domid, libxl_waiter *waiter);
-/* waiter is a preallocated array of num_disks libxl_waiter elements */
-int libxl_wait_for_disk_ejects(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disks, int num_disks, libxl_waiter *waiter);
-int libxl_get_event(libxl_ctx *ctx, libxl_event *event);
-int libxl_stop_waiting(libxl_ctx *ctx, libxl_waiter *waiter);
-int libxl_free_event(libxl_event *event);
-int libxl_free_waiter(libxl_waiter *waiter);
-
-/*
- * Returns:
- *  - 0 if the domain is dead but there is no cleanup to be done. e.g
- *    because someone else has already done it.
- *  - 1 if the domain is dead and there is cleanup to be done.
- *
- * Can return error if the domain exists and is still running.
- *
- * *info will contain valid domain state iff 1 is returned. In
- * particular if 1 is returned then info->shutdown_reason is
- * guaranteed to be valid since by definition the domain is
- * (shutdown||dying))
- */
-int libxl_event_get_domain_death_info(libxl_ctx *ctx, uint32_t domid, libxl_event *event, libxl_dominfo *info);
-
-/*
- * Returns true and fills *disk if the caller should eject the disk
- */
-int libxl_event_get_disk_eject_info(libxl_ctx *ctx, uint32_t domid, libxl_event *event, libxl_device_disk *disk);
 
 int libxl_domain_rename(libxl_ctx *ctx, uint32_t domid,
                         const char *old_name, const char *new_name);
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index ec66340..621a7cc 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -510,9 +510,9 @@ void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
  * osevent poll
  */
 
-int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
-                             struct pollfd *fds, int *timeout_upd,
-                             struct timeval now)
+static int beforepoll_internal(libxl__gc *gc, int *nfds_io,
+                               struct pollfd *fds, int *timeout_upd,
+                               struct timeval now)
 {
     libxl__ev_fd *efd;
     int rc;
@@ -524,9 +524,6 @@ int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
      * the fds array corresponds to a slot in fd_beforepolled.
      */
 
-    GC_INIT(ctx);
-    CTX_LOCK;
-
     if (*nfds_io) {
         /*
          * As an optimisation, we don't touch fd_rindex
@@ -593,8 +590,18 @@ int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
     }
 
  out:
+    return rc;
+}
+
+int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
+                             struct pollfd *fds, int *timeout_upd,
+                             struct timeval now)
+{
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    int rc = beforepoll_internal(gc, nfds_io, fds, timeout_upd, now);
     CTX_UNLOCK;
-    GC_FREE;
+    EGC_FREE;
     return rc;
 }
 
@@ -623,11 +630,11 @@ static int afterpoll_check_fd(libxl_ctx *ctx,
     return revents;
 }
 
-void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
-                             struct timeval now)
+static void afterpoll_internal(libxl__egc *egc,
+                               int nfds, const struct pollfd *fds,
+                               struct timeval now)
 {
-    EGC_INIT(ctx);
-    CTX_LOCK;
+    EGC_GC;
     libxl__ev_fd *efd;
 
     LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
@@ -653,12 +660,18 @@ void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
 
         etime->func(egc, etime, &etime->abs);
     }
+}
 
+void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
+                             struct timeval now)
+{
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    afterpoll_internal(egc, nfds, fds, now);
     CTX_UNLOCK;
     EGC_FREE;
 }
 
-
 /*
  * osevent hook and callback machinery
  */
@@ -723,11 +736,10 @@ void libxl__event_disaster(libxl__egc *egc, const char *msg, int errnoval,
                type ? libxl_event_type_to_string(type) : "",
                type ? ")" : "");
 
-    /*
-     * FIXME: This should call the "disaster" hook supplied to
-     * libxl_event_register_callbacks, which will be introduced in the
-     * next patch.
-     */
+    if (CTX->event_hooks && CTX->event_hooks->disaster) {
+        CTX->event_hooks->disaster(CTX->event_hooks_user, type, msg, errnoval);
+        return;
+    }
 
     const char verybad[] =
         "DISASTER in event loop not handled by libxl application";
@@ -736,9 +748,197 @@ void libxl__event_disaster(libxl__egc *egc, const char *msg, int errnoval,
     exit(-1);
 }
 
+static void egc_run_callbacks(libxl__egc *egc)
+{
+    EGC_GC;
+    libxl_event *ev, *ev_tmp;
+    LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) {
+        LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link);
+        CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev);
+    }
+}
+
 void libxl__egc_cleanup(libxl__egc *egc)
 {
-    libxl__free_all(&egc->gc);
+    EGC_GC;
+    libxl__free_all(gc);
+
+    egc_run_callbacks(egc);
+}
+
+/*
+ * Event retrieval etc.
+ */
+
+void libxl_event_register_callbacks(libxl_ctx *ctx,
+                  const libxl_event_hooks *hooks, void *user)
+{
+    ctx->event_hooks = hooks;
+    ctx->event_hooks_user = user;
+}
+
+void libxl__event_occurred(libxl__egc *egc, libxl_event *event)
+{
+    EGC_GC;
+
+    if (CTX->event_hooks &&
+        (CTX->event_hooks->event_occurs_mask & (1UL << event->type))) {
+        /* libxl__egc_cleanup will call the callback, just before exit
+         * from libxl.  This helps avoid reentrancy bugs: parts of
+         * libxl that call libxl__event_occurred do not have to worry
+         * that libxl might be reentered at that point. */
+        LIBXL_TAILQ_INSERT_TAIL(&egc->occurred_for_callback, event, link);
+        return;
+    } else {
+        LIBXL_TAILQ_INSERT_TAIL(&CTX->occurred, event, link);
+    }
+}
+
+void libxl_event_free(libxl_ctx *ctx, libxl_event *event)
+{
+    /* Exceptionally, this function may be called from libxl, with ctx==0 */
+    libxl_event_dispose(event);
+    free(event);
+}
+
+libxl_event *libxl__event_new(libxl__egc *egc,
+                              libxl_event_type type, uint32_t domid)
+{
+    libxl_event *ev;
+
+    ev = malloc(sizeof(*ev));
+    if (!ev) {
+        LIBXL__EVENT_DISASTER(egc, "allocate new event", errno, type);
+        return NULL;
+    }
+
+    memset(ev, 0, sizeof(*ev));
+    ev->type = type;
+    ev->domid = domid;
+
+    return ev;
+}
+
+static int event_check_internal(libxl__egc *egc, libxl_event **event_r,
+                                unsigned long typemask,
+                                libxl_event_predicate *pred, void *pred_user)
+{
+    EGC_GC;
+    libxl_event *ev;
+    int rc;
+
+    LIBXL_TAILQ_FOREACH(ev, &CTX->occurred, link) {
+        if (!(typemask & (1UL << ev->type)))
+            continue;
+
+        if (pred && !pred(ev, pred_user))
+            continue;
+
+        /* got one! */
+        LIBXL_TAILQ_REMOVE(&CTX->occurred, ev, link);
+        *event_r = ev;
+        rc = 0;
+        goto out;
+    }
+    rc = ERROR_NOT_READY;
+
+ out:
+    return rc;
+}
+
+int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
+                      unsigned long typemask,
+                      libxl_event_predicate *pred, void *pred_user)
+{
+    EGC_INIT(ctx);
+    CTX_LOCK;
+    int rc = event_check_internal(egc, event_r, typemask, pred, pred_user);
+    CTX_UNLOCK;
+    EGC_FREE;
+    return rc;
+}
+
+static int eventloop_iteration(libxl__egc *egc) {
+    EGC_GC;
+    int rc;
+    struct timeval now;
+    
+    CTX_LOCK;
+
+    rc = libxl__gettimeofday(gc, &now);
+    if (rc) goto out;
+
+    int timeout;
+
+    for (;;) {
+        int nfds = CTX->fd_polls_allocd;
+        timeout = -1;
+        rc = beforepoll_internal(gc, &nfds, CTX->fd_polls, &timeout, now);
+        if (!rc) break;
+        if (rc != ERROR_BUFFERFULL) goto out;
+
+        struct pollfd *newarray =
+            (nfds > INT_MAX / sizeof(struct pollfd) / 2) ? 0 :
+            realloc(CTX->fd_polls, sizeof(*newarray) * nfds);
+
+        if (!newarray) { rc = ERROR_NOMEM; goto out; }
+
+        CTX->fd_polls = newarray;
+        CTX->fd_polls_allocd = nfds;
+    }
+
+    rc = poll(CTX->fd_polls, CTX->fd_polls_allocd, timeout);
+    if (rc < 0) {
+        if (errno == EINTR)
+            return 0; /* will go round again if caller requires */
+
+        LIBXL__LOG_ERRNOVAL(CTX, LIBXL__LOG_ERROR, errno, "poll failed");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl__gettimeofday(gc, &now);
+    if (rc) goto out;
+
+    afterpoll_internal(egc, CTX->fd_polls_allocd, CTX->fd_polls, now);
+
+    CTX_UNLOCK;
+
+    rc = 0;
+ out:
+    return rc;
+}
+
+int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
+                     unsigned long typemask,
+                     libxl_event_predicate *pred, void *pred_user)
+{
+    int rc;
+
+    EGC_INIT(ctx);
+    CTX_LOCK;
+
+    for (;;) {
+        rc = event_check_internal(egc, event_r, typemask, pred, pred_user);
+        if (rc != ERROR_NOT_READY) goto out;
+
+        rc = eventloop_iteration(egc);
+        if (rc) goto out;
+
+        /* we unlock and cleanup the egc each time we go through this loop,
+         * so that (a) we don't accumulate garbage and (b) any events
+         * which are to be dispatched by callback are actually delivered
+         * in a timely fashion.
+         */
+        CTX_UNLOCK;
+        libxl__egc_cleanup(egc);
+        CTX_LOCK;
+    }
+
+ out:
+    CTX_UNLOCK;
+    EGC_FREE;
+    return rc;
 }
 
 /*
diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
index 63ef65e..0e83800 100644
--- a/tools/libxl/libxl_event.h
+++ b/tools/libxl/libxl_event.h
@@ -18,6 +18,181 @@
 
 #include <libxl.h>
 
+/*======================================================================*/
+
+/*
+ * Domain event handling - getting Xen events from libxl
+ *
+ * (Callers inside libxl may not call libxl_event_check or _wait.)
+ */
+
+#define LIBXL_EVENTMASK_ALL (~(unsigned long)0)
+
+typedef int libxl_event_predicate(const libxl_event*, void *user);
+  /* Return value is 0 if the event is unwanted or non-0 if it is.
+   * Predicates are not allowed to fail.
+   */
+
+int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
+                      unsigned long typemask,
+                      libxl_event_predicate *predicate, void *predicate_user);
+  /* Searches for an event, already-happened, which matches typemask
+   * and predicate.  predicate==0 matches any event.
+   * libxl_event_check returns the event, which must then later be
+   * freed by the caller using libxl_event_free.
+   *
+   * Returns ERROR_NOT_READY if no such event has happened.
+   */
+
+int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
+                     unsigned long typemask,
+                     libxl_event_predicate *predicate, void *predicate_user);
+  /* Like libxl_event_check but blocks if no suitable events are
+   * available, until some are.  Uses libxl_osevent_beforepoll/
+   * _afterpoll so may be inefficient if very many domains are being
+   * handled by a single program.
+   */
+
+void libxl_event_free(libxl_ctx *ctx, libxl_event *event);
+
+
+/* Alternatively or additionally, the application may also use this: */
+
+typedef struct libxl_event_hooks {
+    uint64_t event_occurs_mask;
+    void (*event_occurs)(void *user, const libxl_event *event);
+    void (*disaster)(void *user, libxl_event_type type,
+                     const char *msg, int errnoval);
+} libxl_event_hooks;
+
+void libxl_event_register_callbacks(libxl_ctx *ctx,
+                                    const libxl_event_hooks *hooks, void *user);
+  /*
+   * Arranges that libxl will henceforth call event_occurs for any
+   * events whose type is set in event_occurs_mask, rather than
+   * queueing the event for retrieval by libxl_event_check/wait.
+   * Events whose bit is clear in mask are not affected.
+   *
+   * event becomes owned by the application and must be freed, either
+   * by event_occurs or later.
+   *
+   * event_occurs may be NULL if mask is 0.
+   *
+   * libxl_event_register_callback also provides a way for libxl to
+   * report to the application that there was a problem reporting
+   * events; this can occur due to lack of host memory during event
+   * handling, or other wholly unrecoverable errors from system calls
+   * made by libxl.  This will not happen for frivolous reasons - only
+   * if the system, or the Xen components of it, are badly broken.
+   *
+   * msg and errnoval will describe the action that libxl was trying
+   * to do, and type specifies the type of libxl events which may be
+   * missing.  type may be 0 in which case events of all types may be
+   * missing.
+   *
+   * disaster may be NULL.  If it is, or if _register_callbacks has
+   * not been called, errors of this kind are fatal to the entire
+   * application: libxl will print messages to its logs and to stderr
+   * and call exit(-1).
+   *
+   * If disaster returns, it may be the case that some or all future
+   * libxl calls will return errors; likewise it may be the case that
+   * no more events (of the specified type, if applicable) can be
+   * produced.  An application which supplies a disaster function
+   * should normally react either by exiting, or by (when it has
+   * returned to its main event loop) shutting down libxl with
+   * libxl_ctx_free and perhaps trying to restart it with
+   * libxl_ctx_init.
+   *
+   * In any case before calling disaster, libxl will have logged a
+   * message with level XTL_CRITICAL.
+   *
+   * Reentrancy: it IS permitted to call libxl from within
+   * event_occurs.  It is NOT permitted to call libxl from within
+   * disaster.  The event_occurs and disaster callbacks may occur on
+   * any thread in which the application calls libxl.
+   *
+   * libxl_event_register_callbacks may be called as many times, with
+   * different parameters, as the application likes; the most recent
+   * call determines the libxl behaviour.  However it is NOT safe to
+   * call _register_callbacks concurrently with, or reentrantly from,
+   * any other libxl function.
+   *
+   * Calls to _register_callbacks do not affect events which have
+   * already occurred.
+   */
+
+
+/*
+ * Events are only generated if they have been requested.
+ * The following functions request the generation of specific events.
+ *
+ * Each set of functions for controlling event generation has this form:
+ *
+ *   typedef struct libxl__evgen_FOO libxl__evgen_FOO;
+ *   int libxl_evenable_FOO(libxl_ctx *ctx, FURTHER PARAMETERS,
+ *                          libxl_ev_user user, libxl__evgen_FOO **evgen_out);
+ *   void libxl_evdisable_FOO(libxl_ctx *ctx, libxl__evgen_FOO *evgen);
+ *
+ * The evenable function arranges that the events (as described in the
+ * doc comment for the individual function) will start to be generated
+ * by libxl.  On success, *evgen_out is set to a non-null pointer to
+ * an opaque struct.
+ *
+ * The user value is returned in the generated events and may be
+ * used by the caller for whatever it likes.  The type ev_user is
+ * guaranteed to be an unsigned integer type which is at least
+ * as big as uint64_t and is also guaranteed to be big enough to
+ * contain any intptr_t value.
+ *
+ * If it becomes desirable to stop generation of the relevant events,
+ * or to reclaim the resources in libxl associated with the evgen
+ * structure, the same evgen value should be passed to the evdisable
+ * function.  However, note that events which occurred prior to the
+ * evdisable call may still be returned.
+ *
+ * The caller may enable identical events more than once.  If they do
+ * so, each actual occurrence will generate several events to be
+ * returned by libxl_event_check, with the appropriate user value(s).
+ * Aside from this, each occurrence of each event is returned by
+ * libxl_event_check exactly once.
+ *
+ * An evgen is associated with the libxl_ctx used for its creation.
+ * After libxl_ctx_free, all corresponding evgen handles become
+ * invalid and must no longer be passed to evdisable.
+ *
+ * Events enabled with evenable prior to a fork and libxl_ctx_postfork
+ * are no longer generated after the fork/postfork; however the evgen
+ * structures are still valid and must be passed to evdisable if the
+ * memory they use should not be leaked.
+ *
+ * Applications should ensure that they eventually retrieve every
+ * event using libxl_event_check or libxl_event_wait, since events
+ * which occur but are not retreived by the application will be queued
+ * inside libxl indefinitely.  libxl_event_check/_wait may be O(n)
+ * where n is the number of queued events which do not match the
+ * criteria specified in the arguments to check/wait.
+ */
+
+typedef struct libxl__evgen_domain_death libxl_evgen_domain_death;
+int libxl_evenable_domain_death(libxl_ctx *ctx, uint32_t domid,
+                         libxl_ev_user, libxl_evgen_domain_death **evgen_out);
+void libxl_evdisable_domain_death(libxl_ctx *ctx, libxl_evgen_domain_death*);
+  /* Arranges for the generation of DOMAIN_SHUTDOWN and DOMAIN_DESTROY
+   * events.  A domain which is destroyed before it shuts down
+   * may generate only a DESTROY event.
+   */
+
+typedef struct libxl__evgen_disk_eject libxl_evgen_disk_eject;
+int libxl_evenable_disk_eject(libxl_ctx *ctx, uint32_t domid, const char *vdev,
+                        libxl_ev_user, libxl_evgen_disk_eject **evgen_out);
+void libxl_evdisable_disk_eject(libxl_ctx *ctx, libxl_evgen_disk_eject*);
+  /* Arranges for the generation of DISK_EJECT events.  A copy of the
+   * string *vdev will be made for libxl's internal use, and a pointer
+   * to this (or some other) copy will be returned as the vdev
+   * member of event.u.
+   */
+
 
 /*======================================================================*/
 
@@ -36,10 +211,10 @@
  *      poll();
  *      libxl_osevent_afterpoll(...);
  *      for (;;) {
- *        r=libxl_event_check(...);
- *        if (r==LIBXL_NOT_READY) break;
- *        if (r) handle failure;
- *        do something with the event;
+ *          r = libxl_event_check(...);
+ *          if (r==LIBXL_NOT_READY) break;
+ *          if (r) goto error_out;
+ *          do something with the event;
  *      }
  *   }
  *
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 8c9f7c9..edb73eb 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -175,11 +175,45 @@ typedef struct libxl__ev_watch_slot {
     
 libxl__ev_xswatch *libxl__watch_slot_contents(libxl__gc *gc, int slotnum);
 
+
+/*
+ * evgen structures, which are the state we use for generating
+ * events for the caller.
+ *
+ * In general in each case there's an internal and an external
+ * version of the _evdisable_FOO function; the internal one is
+ * used during cleanup.
+ */
+
+struct libxl__evgen_domain_death {
+    uint32_t domid;
+    unsigned shutdown_reported:1, death_reported:1;
+    LIBXL_TAILQ_ENTRY(libxl_evgen_domain_death) entry;
+        /* on list .death_reported ? CTX->death_list : CTX->death_reported */
+    libxl_ev_user user;
+};
+_hidden void
+libxl__evdisable_domain_death(libxl__gc*, libxl_evgen_domain_death*);
+
+struct libxl__evgen_disk_eject {
+    libxl__ev_xswatch watch;
+    uint32_t domid;
+    LIBXL_LIST_ENTRY(libxl_evgen_disk_eject) entry;
+    libxl_ev_user user;
+    char *vdev;
+};
+_hidden void
+libxl__evdisable_disk_eject(libxl__gc*, libxl_evgen_disk_eject*);
+
+
 struct libxl__ctx {
     xentoollog_logger *lg;
     xc_interface *xch;
     struct xs_handle *xsh;
 
+    const libxl_event_hooks *event_hooks;
+    void *event_hooks_user;
+
     pthread_mutex_t lock; /* protects data structures hanging off the ctx */
       /* Always use libxl__ctx_lock and _unlock (or the convenience
        * macors CTX_LOCK and CTX_UNLOCK) to manipulate this.
@@ -193,12 +227,16 @@ struct libxl__ctx {
        * documented in the libxl public interface.
        */
 
+    LIBXL_TAILQ_HEAD(libxl__event_list, libxl_event) occurred;
+
     int osevent_in_hook;
     const libxl_osevent_hooks *osevent_hooks;
     void *osevent_user;
       /* See the comment for OSEVENT_HOOK_INTERN in libxl_event.c
        * for restrictions on the use of the osevent fields. */
 
+    struct pollfd *fd_polls;
+    int fd_polls_allocd;
     int fd_rindex_allocd;
     int *fd_rindex; /* see libxl_osevent_beforepoll */
     LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
@@ -210,6 +248,13 @@ struct libxl__ctx {
     uint32_t watch_counter; /* helps disambiguate slot reuse */
     libxl__ev_fd watch_efd;
 
+    LIBXL_TAILQ_HEAD(libxl__evgen_domain_death_list, libxl_evgen_domain_death)
+        death_list /* sorted by domid */,
+        death_reported;
+    libxl__ev_xswatch death_watch;
+    
+    LIBXL_LIST_HEAD(, libxl_evgen_disk_eject) disk_eject_evgens;
+
     /* for callers who reap children willy-nilly; caller must only
      * set this after libxl_init and before any other call - or
      * may leave them untouched */
@@ -250,6 +295,7 @@ struct libxl__gc {
 struct libxl__egc {
     /* for event-generating functions only */
     struct libxl__gc gc;
+    struct libxl__event_list occurred_for_callback;
 };
 
 #define LIBXL_INIT_GC(gc,ctx) do{               \
@@ -394,6 +440,9 @@ _hidden char *libxl__xs_libxl_path(libxl__gc *gc, uint32_t domid);
  *
  * Callers of libxl__ev_KIND_register must ensure that the
  * registration is undone, with _deregister, in libxl_ctx_free.
+ * This means that normally each kind of libxl__evgen (ie each
+ * application-requested event source) needs to be on a list so that
+ * it can be automatically deregistered as promised in libxl_event.h.
  */
 
 
@@ -437,6 +486,25 @@ static inline int libxl__ev_xswatch_isregistered(const libxl__ev_xswatch *xw)
 
 
 /*
+ * Other event-handling support provided by the libxl event core to
+ * the rest of libxl.
+ */
+
+_hidden void libxl__event_occurred(libxl__egc*, libxl_event *event);
+  /* Arranges to notify the application that the event has occurred.
+   * event should be suitable for passing to libxl_event_free. */
+
+_hidden libxl_event *libxl__event_new(libxl__egc*, libxl_event_type,
+                                      uint32_t domid);
+  /* Convenience function.
+   * Allocates a new libxl_event, fills in domid and type.
+   * If allocation fails, calls _disaster, and returns NULL. */
+
+#define NEW_EVENT(egc, type, domid)                              \
+    libxl__event_new((egc), LIBXL_EVENT_TYPE_##type, (domid));
+    /* Convenience macro. */
+
+/*
  * In general, call this via the macro LIBXL__EVENT_DISASTER.
  *
  * Event-generating functions may call this if they might have wanted
@@ -993,12 +1061,15 @@ libxl__device_model_version_running(libxl__gc *gc, uint32_t domid);
 
 /* egc initialisation and destruction: */
 
-#define LIBXL_INIT_EGC(egc,ctx) do{             \
-        LIBXL_INIT_GC((egc).gc,ctx);            \
-        /* list of occurred events tbd */       \
+#define LIBXL_INIT_EGC(egc,ctx) do{                     \
+        LIBXL_INIT_GC((egc).gc,ctx);                    \
+        LIBXL_TAILQ_INIT(&(egc).occurred_for_callback); \
     } while(0)
 
 _hidden void libxl__egc_cleanup(libxl__egc *egc);
+  /* Frees memory allocated within this egc's gc, and and report all
+   * occurred events via callback, if applicable.  May reenter the
+   * application; see restrictions above. */
 
 /* convenience macros: */
 
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index 574dec7..a6dac79 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -75,11 +75,6 @@ libxl_action_on_shutdown = Enumeration("action_on_shutdown", [
     (6, "COREDUMP_RESTART"),
     ])
 
-libxl_event_type = Enumeration("event_type", [
-    (1, "DOMAIN_DEATH"),
-    (2, "DISK_EJECT"),
-    ])
-
 libxl_button = Enumeration("button", [
     (1, "POWER"),
     (2, "SLEEP"),
@@ -395,3 +390,32 @@ libxl_sched_sedf = Struct("sched_sedf", [
     ("extratime", integer),
     ("weight", integer),
     ], dispose_fn=None)
+
+libxl_event_type = Enumeration("event_type", [
+    (1, "DOMAIN_SHUTDOWN"),
+    (2, "DOMAIN_DESTROY"),
+    (3, "DISK_EJECT"),
+    ])
+
+libxl_ev_user = UInt(64)
+
+libxl_ev_link = Builtin("ev_link", passby=PASS_BY_REFERENCE, private=True)
+
+libxl_event = Struct("event",[
+    ("link",     libxl_ev_link,0),
+     # for use by libxl; caller may use this once the event has been
+     #   returned by libxl_event_{check,wait}
+    ("domid",    libxl_domid),
+    ("domuuid",  libxl_uuid),
+    ("for_user", libxl_ev_user),
+    ("type",     libxl_event_type),
+    ("u", KeyedUnion(None, libxl_event_type, "type",
+          [("domain_shutdown", Struct(None, [
+                                             ("shutdown_reason", uint8),
+                                      ])),
+           ("domain_destroy", Struct(None, [])),
+           ("disk_eject", Struct(None, [
+                                        ("vdev", string),
+                                        ("disk", libxl_device_disk),
+                                 ])),
+           ]))])
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index 8c30de1..e292bfc 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -1225,14 +1225,16 @@ skip_vfb:
     xlu_cfg_destroy(config);
 }
 
-/* Returns 1 if domain should be restarted, 2 if domain should be renamed then restarted  */
-static int handle_domain_death(libxl_ctx *ctx, uint32_t domid, libxl_event *event,
-                               libxl_domain_config *d_config, libxl_dominfo *info)
+/* Returns 1 if domain should be restarted,
+ * 2 if domain should be renamed then restarted, or 0 */
+static int handle_domain_death(libxl_ctx *ctx, uint32_t domid,
+                               libxl_event *event,
+                               libxl_domain_config *d_config)
 {
     int restart = 0;
     libxl_action_on_shutdown action;
 
-    switch (info->shutdown_reason) {
+    switch (event->u.domain_shutdown.shutdown_reason) {
     case SHUTDOWN_poweroff:
         action = d_config->on_poweroff;
         break;
@@ -1249,11 +1251,14 @@ static int handle_domain_death(libxl_ctx *ctx, uint32_t domid, libxl_event *even
         action = d_config->on_watchdog;
         break;
     default:
-        LOG("Unknown shutdown reason code %d. Destroying domain.", info->shutdown_reason);
+        LOG("Unknown shutdown reason code %d. Destroying domain.",
+            event->u.domain_shutdown.shutdown_reason);
         action = LIBXL_ACTION_ON_SHUTDOWN_DESTROY;
     }
 
-    LOG("Action for shutdown reason code %d is %s", info->shutdown_reason, action_on_shutdown_names[action]);
+    LOG("Action for shutdown reason code %d is %s",
+        event->u.domain_shutdown.shutdown_reason,
+        action_on_shutdown_names[action]);
 
     if (action == LIBXL_ACTION_ON_SHUTDOWN_COREDUMP_DESTROY || action == LIBXL_ACTION_ON_SHUTDOWN_COREDUMP_RESTART) {
         char *corefile;
@@ -1318,7 +1323,7 @@ static void replace_string(char **str, const char *val)
 
 
 static int preserve_domain(libxl_ctx *ctx, uint32_t domid, libxl_event *event,
-                           libxl_domain_config *d_config, libxl_dominfo *info)
+                           libxl_domain_config *d_config)
 {
     time_t now;
     struct tm tm;
@@ -1431,6 +1436,27 @@ static int autoconnect_console(libxl_ctx *ctx, uint32_t domid, void *priv)
     _exit(1);
 }
 
+static int domain_wait_event(libxl_event **event_r) {
+    int ret;
+    for (;;) {
+        ret = libxl_event_wait(ctx, event_r, LIBXL_EVENTMASK_ALL, 0,0);
+        if (ret) {
+            LOG("Domain %d, failed to get event, quitting (rc=%d)", domid, ret);
+            return ret;
+        }
+        if ((*event_r)->domid != domid) {
+            char *evstr = libxl_event_to_json(ctx, *event_r);
+            LOG("INTERNAL PROBLEM - ignoring unexpected event for"
+                " domain %d (expected %d): event=%s",
+                (*event_r)->domid, domid, evstr);
+            free(evstr);
+            libxl_event_free(ctx, *event_r);
+            continue;
+        }
+        return ret;
+    }
+}
+
 static int create_domain(struct domain_create *dom_info)
 {
     libxl_domain_config d_config;
@@ -1444,10 +1470,11 @@ static int create_domain(struct domain_create *dom_info)
     const char *restore_file = dom_info->restore_file;
     int migrate_fd = dom_info->migrate_fd;
 
-    int fd, i;
+    int i;
     int need_daemon = daemonize;
     int ret, rc;
-    libxl_waiter *w1 = NULL, *w2 = NULL;
+    libxl_evgen_domain_death *deathw = NULL;
+    libxl_evgen_disk_eject **diskws = NULL; /* one per disk */
     void *config_data = 0;
     int config_len = 0;
     int restore_fd = -1;
@@ -1658,14 +1685,14 @@ start:
                 if (errno != EINTR) {
                     perror("failed to wait for daemonizing child");
                     ret = ERROR_FAIL;
-                    goto error_out;
+                    goto out;
                 }
             }
             if (status) {
                 libxl_report_child_exitstatus(ctx, XTL_ERROR,
                            "daemonizing child", child1, status);
                 ret = ERROR_FAIL;
-                goto error_out;
+                goto out;
             }
             ret = domid;
             goto out;
@@ -1702,92 +1729,106 @@ start:
     }
     LOG("Waiting for domain %s (domid %d) to die [pid %ld]",
         d_config.c_info.name, domid, (long)getpid());
-    w1 = (libxl_waiter*) xmalloc(sizeof(libxl_waiter) * d_config.num_disks);
-    w2 = (libxl_waiter*) xmalloc(sizeof(libxl_waiter));
-    libxl_wait_for_disk_ejects(ctx, domid, d_config.disks, d_config.num_disks, w1);
-    libxl_wait_for_domain_death(ctx, domid, w2);
-    libxl_get_wait_fd(ctx, &fd);
-    while (1) {
-        int ret;
-        fd_set rfds;
-        libxl_dominfo info;
-        libxl_event event;
-        libxl_device_disk disk;
 
-        FD_ZERO(&rfds);
-        FD_SET(fd, &rfds);
+    ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
+    if (ret) goto out;
 
-        ret = select(fd + 1, &rfds, NULL, NULL, NULL);
-        if (!ret)
-            continue;
-        libxl_get_event(ctx, &event);
-        switch (event.type) {
-            case LIBXL_EVENT_TYPE_DOMAIN_DEATH:
-                ret = libxl_event_get_domain_death_info(ctx, domid, &event, &info);
-
-                if (ret < 0) {
-                    libxl_free_event(&event);
-                    continue;
+    if (!diskws) {
+        diskws = xmalloc(sizeof(*diskws) * d_config.num_disks);
+        for (i = 0; i < d_config.num_disks; i++)
+            diskws[i] = NULL;
+    }
+    for (i = 0; i < d_config.num_disks; i++) {
+        ret = libxl_evenable_disk_eject(ctx, domid, d_config.disks[i].vdev,
+                                        0, &diskws[i]);
+        if (ret) goto out;
+    }
+    while (1) {
+        libxl_event *event;
+        ret = domain_wait_event(&event);
+        if (ret) goto out;
+
+        switch (event->type) {
+
+        case LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN:
+            LOG("Domain %d has shut down, reason code %d 0x%x", domid,
+                event->u.domain_shutdown.shutdown_reason,
+                event->u.domain_shutdown.shutdown_reason);
+            switch (handle_domain_death(ctx, domid, event, &d_config)) {
+            case 2:
+                if (!preserve_domain(ctx, domid, event, &d_config)) {
+                    /* If we fail then exit leaving the old domain in place. */
+                    ret = -1;
+                    goto out;
                 }
 
-                LOG("Domain %d is dead", domid);
-
-                if (ret) {
-                    switch (handle_domain_death(ctx, domid, &event, &d_config, &info)) {
-                    case 2:
-                        if (!preserve_domain(ctx, domid, &event, &d_config, &info)) {
-                            /* If we fail then exit leaving the old domain in place. */
-                            ret = -1;
-                            goto out;
-                        }
-
-                        /* Otherwise fall through and restart. */
-                    case 1:
-
-                        for (i = 0; i < d_config.num_disks; i++)
-                            libxl_free_waiter(&w1[i]);
-                        libxl_free_waiter(w2);
-                        free(w1);
-                        free(w2);
-
-                        /*
-                         * Do not attempt to reconnect if we come round again due to a
-                         * guest reboot -- the stdin/out will be disconnected by then.
-                         */
-                        dom_info->console_autoconnect = 0;
-
-                        /* Some settings only make sense on first boot. */
-                        paused = 0;
-                        if (common_domname
-                            && strcmp(d_config.c_info.name, common_domname)) {
-                            d_config.c_info.name = strdup(common_domname);
-                        }
-
-                        /*
-                         * XXX FIXME: If this sleep is not there then domain
-                         * re-creation fails sometimes.
-                         */
-                        LOG("Done. Rebooting now");
-                        sleep(2);
-                        goto start;
-                    case 0:
-                        LOG("Done. Exiting now");
-                        ret = 0;
-                        goto out;
-                    }
-                } else {
-                    LOG("Unable to get domain death info, quitting");
-                    goto out;
+                /* Otherwise fall through and restart. */
+            case 1:
+                libxl_event_free(ctx, event);
+                libxl_evdisable_domain_death(ctx, deathw);
+                deathw = NULL;
+                for (i = 0; i < d_config.num_disks; i++) {
+                    libxl_evdisable_disk_eject(ctx, diskws[i]);
+                    diskws[i] = NULL;
                 }
-                break;
-            case LIBXL_EVENT_TYPE_DISK_EJECT:
-                if (libxl_event_get_disk_eject_info(ctx, domid, &event, &disk)) {
-                    libxl_cdrom_insert(ctx, domid, &disk);
-                    libxl_device_disk_dispose(&disk);
+                /* discard any other events which may have been generated */
+                while (!(ret = libxl_event_check(ctx, &event,
+                                                 LIBXL_EVENTMASK_ALL, 0,0))) {
+                    libxl_event_free(ctx, event);
                 }
-                break;
+                if (ret != ERROR_NOT_READY) {
+                    LOG("warning, libxl_event_check (cleanup) failed (rc=%d)",
+                        ret);
+                }
+
+                /*
+                 * Do not attempt to reconnect if we come round again due to a
+                 * guest reboot -- the stdin/out will be disconnected by then.
+                 */
+                dom_info->console_autoconnect = 0;
+
+                /* Some settings only make sense on first boot. */
+                paused = 0;
+                if (common_domname
+                    && strcmp(d_config.c_info.name, common_domname)) {
+                    d_config.c_info.name = strdup(common_domname);
+                }
+
+                /*
+                 * XXX FIXME: If this sleep is not there then domain
+                 * re-creation fails sometimes.
+                 */
+                LOG("Done. Rebooting now");
+                sleep(2);
+                goto start;
+
+            case 0:
+                LOG("Done. Exiting now");
+                ret = 0;
+                goto out;
+
+            default:
+                abort();
+            }
+
+        case LIBXL_EVENT_TYPE_DOMAIN_DESTROY:
+            LOG("Domain %d has been destroyed.", domid);
+            ret = 0;
+            goto out;
+
+        case LIBXL_EVENT_TYPE_DISK_EJECT:
+            /* XXX what is this for? */
+            libxl_cdrom_insert(ctx, domid, &event->u.disk_eject.disk);
+            break;
+
+        default:;
+            char *evstr = libxl_event_to_json(ctx, event);
+            LOG("warning, got unexpected event type %d, event=%s",
+                event->type, evstr);
+            free(evstr);
         }
-        libxl_free_event(&event);
+
+        libxl_event_free(ctx, event);
     }
 
 error_out:
@@ -2270,6 +2311,7 @@ static void destroy_domain(const char *p)
 static void shutdown_domain(const char *p, int wait)
 {
     int rc;
+    libxl_event *event;
 
     find_domain(p);
     rc=libxl_domain_shutdown(ctx, domid);
@@ -2284,37 +2326,39 @@ static void shutdown_domain(const char *p, int wait)
     }
 
     if (wait) {
-        libxl_waiter waiter;
-        int fd;
-
-        libxl_wait_for_domain_death(ctx, domid, &waiter);
+        libxl_evgen_domain_death *deathw;
 
-        libxl_get_wait_fd(ctx, &fd);
-
-        while (wait) {
-            fd_set rfds;
-            libxl_event event;
-            libxl_dominfo info;
+        rc = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
+        if (rc) {
+            fprintf(stderr,"wait for death failed (evgen, rc=%d)\n",rc);
+            exit(-1);
+        }
 
-            FD_ZERO(&rfds);
-            FD_SET(fd, &rfds);
+        for (;;) {
+            rc = domain_wait_event(&event);
+            if (rc) exit(-1);
 
-            if (!select(fd + 1, &rfds, NULL, NULL, NULL))
-                continue;
+            switch (event->type) {
 
-            libxl_get_event(ctx, &event);
+            case LIBXL_EVENT_TYPE_DOMAIN_DESTROY:
+                LOG("Domain %d has been destroyed", domid);
+                goto done;
 
-            if (event.type == LIBXL_EVENT_TYPE_DOMAIN_DEATH) {
-                if (libxl_event_get_domain_death_info(ctx, domid, &event, &info) < 0)
-                    continue;
+            case LIBXL_EVENT_TYPE_DOMAIN_SHUTDOWN:
+                LOG("Domain %d has been shut down, reason code %d %x", domid,
+                    event->u.domain_shutdown.shutdown_reason,
+                    event->u.domain_shutdown.shutdown_reason);
+                goto done;
 
-                LOG("Domain %d is dead", domid);
-                wait = 0;
+            default:
+                LOG("Unexpected event type %d", event->type);
+                break;
             }
-
-            libxl_free_event(&event);
+            libxl_event_free(ctx, event);
         }
-        libxl_free_waiter(&waiter);
+    done:
+        libxl_event_free(ctx, event);
+        libxl_evdisable_domain_death(ctx, deathw);
     }
 }
 
diff --git a/tools/ocaml/libs/xl/genwrap.py b/tools/ocaml/libs/xl/genwrap.py
index 61abecf..5a02e8f 100644
--- a/tools/ocaml/libs/xl/genwrap.py
+++ b/tools/ocaml/libs/xl/genwrap.py
@@ -273,6 +273,7 @@ if __name__ == '__main__':
         "device_model_info",
         "vcpuinfo",
         "topologyinfo",
+        "event",
         ]
 
     for t in blacklist:
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 4/9] libxl: introduce libxl_fd_set_nonblock, rationalise _cloexec
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (2 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 3/9] libxl: New event generation API Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-13 19:25 ` [PATCH 5/9] libxl: Permit multithreaded event waiting Ian Jackson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

We want a function for setting fds to nonblocking, so introduce one.

This is a very similar requirement to that for libxl_fd_set_cloexec,
so make it common with that.

While we're at it, fix a few deficiences that make this latter
function less desirable than it could be:
 * Change the return from 0/-1 (like a syscall) to a libxl error code
 * Take a boolean parameter for turning the flag on and off
 * Log on error (and so, take a ctx for this purpose)

Change callers of libxl_fd_set_cloexec to notice errors.  (Although,
such errors are highly unlikely.)

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
---
 tools/libxl/libxl.c      |   33 ++++++++++++++++++++++++++-------
 tools/libxl/libxl.h      |    7 ++++++-
 tools/libxl/libxl_qmp.c  |    3 ++-
 tools/libxl/xl_cmdimpl.c |    3 ++-
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 19ff12c..c68c165 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -3655,19 +3655,38 @@ int libxl_cpupool_movedomain(libxl_ctx *ctx, uint32_t poolid, uint32_t domid)
     return 0;
 }
 
-int libxl_fd_set_cloexec(int fd)
+static int fd_set_flags(libxl_ctx *ctx, int fd,
+                        int fcntlgetop, int fcntlsetop, const char *fl,
+                        int flagmask, int set_p)
 {
-    int flags = 0;
+    int flags, r;
 
-    if ((flags = fcntl(fd, F_GETFD)) == -1) {
-        flags = 0;
+    flags = fcntl(fd, fcntlgetop);
+    if (flags == -1) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "fcntl(,F_GET%s) failed",fl);
+        return ERROR_FAIL;
     }
-    if ((flags & FD_CLOEXEC)) {
-        return 0;
+
+    if (set_p)
+        flags |= flagmask;
+    else
+        flags &= ~flagmask;
+
+    r = fcntl(fd, fcntlsetop, flags);
+    if (r == -1) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "fcntl(,F_SET%s) failed",fl);
+        return ERROR_FAIL;
     }
-    return fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
+
+    return 0;
 }
 
+int libxl_fd_set_cloexec(libxl_ctx *ctx, int fd, int cloexec)
+  { return fd_set_flags(ctx,fd, F_GETFD,F_SETFD,"FD", FD_CLOEXEC, cloexec); }
+
+int libxl_fd_set_nonblock(libxl_ctx *ctx, int fd, int nonblock)
+  { return fd_set_flags(ctx,fd, F_GETFL,F_SETFL,"FL", O_NONBLOCK, nonblock); }
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 4d3391f..e32881b 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -613,7 +613,12 @@ const char *libxl_run_dir_path(void);
 const char *libxl_xenpaging_dir_path(void);
 
 /* misc */
-int libxl_fd_set_cloexec(int fd);
+
+/* Each of these sets or clears the flag according to whether the
+ * 2nd parameter is nonzero.  On failure, they log, and
+ * return ERROR_FAIL, but also leave errno valid. */
+int libxl_fd_set_cloexec(libxl_ctx *ctx, int fd, int cloexec);
+int libxl_fd_set_nonblock(libxl_ctx *ctx, int fd, int nonblock);
 
 #include <libxl_event.h>
 
diff --git a/tools/libxl/libxl_qmp.c b/tools/libxl/libxl_qmp.c
index 61d9769..1ee82ae 100644
--- a/tools/libxl/libxl_qmp.c
+++ b/tools/libxl/libxl_qmp.c
@@ -324,7 +324,8 @@ static int qmp_open(libxl__qmp_handler *qmp, const char *qmp_socket_path,
     if (fcntl(qmp->qmp_fd, F_SETFL, flags | O_NONBLOCK) == -1) {
         return -1;
     }
-    libxl_fd_set_cloexec(qmp->qmp_fd);
+    ret = libxl_fd_set_cloexec(qmp->ctx, qmp->qmp_fd, 1);
+    if (ret) return -1;
 
     memset(&qmp->addr, 0, sizeof (&qmp->addr));
     qmp->addr.sun_family = AF_UNIX;
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index e292bfc..c2b7a1e 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -1495,7 +1495,8 @@ static int create_domain(struct domain_create *dom_info)
             restore_fd = migrate_fd;
         } else {
             restore_fd = open(restore_file, O_RDONLY);
-            libxl_fd_set_cloexec(restore_fd);
+            rc = libxl_fd_set_cloexec(ctx, restore_fd, 1);
+            if (rc) return rc;
         }
 
         CHK_ERRNO( libxl_read_exactly(ctx, restore_fd, &hdr,
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 5/9] libxl: Permit multithreaded event waiting
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (3 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 4/9] libxl: introduce libxl_fd_set_nonblock, rationalise _cloexec Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-19 10:01   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure Ian Jackson
                   ` (3 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Previously, the context would be locked whenever we were waiting in
libxl's own call to poll (waiting for operating system events).

This would mean that multiple simultaneous calls to libxl_event_wait
in different threads with different parameters would not work
properly.

If we simply unlock the context, it would be possible for another
thread to discover the occurrence of the event we were waiting for,
without us even waking up, and we would remain in poll.  So we need a
way to wake up other threads: a pipe, one for each thread in poll.

We also need to move some variables from globals in the ctx to be
per-polling-thread.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl.c          |   18 +++-
 tools/libxl/libxl_event.c    |  196 ++++++++++++++++++++++++++++++++++--------
 tools/libxl/libxl_internal.h |   50 ++++++++++-
 3 files changed, 218 insertions(+), 46 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index c68c165..9890d79 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -49,8 +49,9 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
 
     ctx->osevent_hooks = 0;
 
-    ctx->fd_polls = 0;
-    ctx->fd_rindex = 0;
+    LIBXL_LIST_INIT(&ctx->pollers_event);
+    LIBXL_LIST_INIT(&ctx->pollers_idle);
+
     LIBXL_LIST_INIT(&ctx->efds);
     LIBXL_TAILQ_INIT(&ctx->etimes);
 
@@ -61,6 +62,9 @@ int libxl_ctx_alloc(libxl_ctx **pctx, int version,
     LIBXL_TAILQ_INIT(&ctx->death_list);
     libxl__ev_xswatch_init(&ctx->death_watch);
 
+    rc = libxl__poller_init(ctx, &ctx->poller_app);
+    if (rc) goto out;
+
     if ( stat(XENSTORE_PID_FILE, &stat_buf) != 0 ) {
         LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Is xenstore daemon running?\n"
                      "failed to stat %s", XENSTORE_PID_FILE);
@@ -135,8 +139,14 @@ int libxl_ctx_free(libxl_ctx *ctx)
     libxl_version_info_dispose(&ctx->version_info);
     if (ctx->xsh) xs_daemon_close(ctx->xsh);
 
-    free(ctx->fd_polls);
-    free(ctx->fd_rindex);
+    libxl__poller_dispose(&ctx->poller_app);
+    assert(LIBXL_LIST_EMPTY(&ctx->pollers_event));
+    libxl__poller *poller, *poller_tmp;
+    LIBXL_LIST_FOREACH_SAFE(poller, &ctx->pollers_idle, entry, poller_tmp) {
+        libxl__poller_dispose(poller);
+        free(poller);
+    }
+
     free(ctx->watch_slots);
 
     discard_events(&ctx->occurred);
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 621a7cc..82889f6 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -510,9 +510,9 @@ void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
  * osevent poll
  */
 
-static int beforepoll_internal(libxl__gc *gc, int *nfds_io,
-                               struct pollfd *fds, int *timeout_upd,
-                               struct timeval now)
+static int beforepoll_internal(libxl__gc *gc, libxl__poller *poller,
+                               int *nfds_io, struct pollfd *fds,
+                               int *timeout_upd, struct timeval now)
 {
     libxl__ev_fd *efd;
     int rc;
@@ -534,7 +534,7 @@ static int beforepoll_internal(libxl__gc *gc, int *nfds_io,
          * not to mess with fd_rindex.
          */
 
-        int maxfd = 0;
+        int maxfd = poller->wakeup_pipe[0] + 1;
         LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
             if (!efd->events)
                 continue;
@@ -542,30 +542,39 @@ static int beforepoll_internal(libxl__gc *gc, int *nfds_io,
                 maxfd = efd->fd + 1;
         }
         /* make sure our array is as big as *nfds_io */
-        if (CTX->fd_rindex_allocd < maxfd) {
+        if (poller->fd_rindex_allocd < maxfd) {
             assert(maxfd < INT_MAX / sizeof(int) / 2);
-            int *newarray = realloc(CTX->fd_rindex, sizeof(int) * maxfd);
+            int *newarray = realloc(poller->fd_rindex, sizeof(int) * maxfd);
             if (!newarray) { rc = ERROR_NOMEM; goto out; }
-            memset(newarray + CTX->fd_rindex_allocd, 0,
-                   sizeof(int) * (maxfd - CTX->fd_rindex_allocd));
-            CTX->fd_rindex = newarray;
-            CTX->fd_rindex_allocd = maxfd;
+            memset(newarray + poller->fd_rindex_allocd, 0,
+                   sizeof(int) * (maxfd - poller->fd_rindex_allocd));
+            poller->fd_rindex = newarray;
+            poller->fd_rindex_allocd = maxfd;
         }
     }
 
     int used = 0;
-    LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
-        if (!efd->events)
-            continue;
-        if (used < *nfds_io) {
-            fds[used].fd = efd->fd;
-            fds[used].events = efd->events;
-            fds[used].revents = 0;
-            assert(efd->fd < CTX->fd_rindex_allocd);
-            CTX->fd_rindex[efd->fd] = used;
-        }
-        used++;
-    }
+
+#define REQUIRE_FD(req_fd, req_events, efd) do{                 \
+        if ((req_events)) {                                     \
+            if (used < *nfds_io) {                              \
+                fds[used].fd = (req_fd);                        \
+                fds[used].events = (req_events);                \
+                fds[used].revents = 0;                          \
+                assert((req_fd) < poller->fd_rindex_allocd);    \
+                poller->fd_rindex[(req_fd)] = used;             \
+            }                                                   \
+            used++;                                             \
+        }                                                       \
+    }while(0)
+
+    LIBXL_LIST_FOREACH(efd, &CTX->efds, entry)
+        REQUIRE_FD(efd->fd, efd->events, efd);
+
+    REQUIRE_FD(poller->wakeup_pipe[0], POLLIN, 0);
+
+#undef REQUIRE_FD
+
     rc = used <= *nfds_io ? 0 : ERROR_BUFFERFULL;
 
     *nfds_io = used;
@@ -599,22 +608,23 @@ int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
 {
     EGC_INIT(ctx);
     CTX_LOCK;
-    int rc = beforepoll_internal(gc, nfds_io, fds, timeout_upd, now);
+    int rc = beforepoll_internal(gc, &ctx->poller_app,
+                                 nfds_io, fds, timeout_upd, now);
     CTX_UNLOCK;
     EGC_FREE;
     return rc;
 }
 
-static int afterpoll_check_fd(libxl_ctx *ctx,
+static int afterpoll_check_fd(libxl__poller *poller,
                               const struct pollfd *fds, int nfds,
                               int fd, int events)
     /* returns mask of events which were requested and occurred */
 {
-    if (fd >= ctx->fd_rindex_allocd)
+    if (fd >= poller->fd_rindex_allocd)
         /* added after we went into poll, have to try again */
         return 0;
 
-    int slot = ctx->fd_rindex[fd];
+    int slot = poller->fd_rindex[fd];
 
     if (slot >= nfds)
         /* stale slot entry; again, added afterwards */
@@ -630,22 +640,31 @@ static int afterpoll_check_fd(libxl_ctx *ctx,
     return revents;
 }
 
-static void afterpoll_internal(libxl__egc *egc,
+static void afterpoll_internal(libxl__egc *egc, libxl__poller *poller,
                                int nfds, const struct pollfd *fds,
                                struct timeval now)
 {
     EGC_GC;
     libxl__ev_fd *efd;
 
+
     LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
         if (!efd->events)
             continue;
 
-        int revents = afterpoll_check_fd(CTX,fds,nfds, efd->fd,efd->events);
+        int revents = afterpoll_check_fd(poller,fds,nfds, efd->fd,efd->events);
         if (revents)
             efd->func(egc, efd, efd->fd, efd->events, revents);
     }
 
+    if (afterpoll_check_fd(poller,fds,nfds, poller->wakeup_pipe[0],POLLIN)) {
+        char buf[256];
+        int r = read(poller->wakeup_pipe[0], buf, sizeof(buf));
+        if (r < 0)
+            if (errno != EINTR && errno != EWOULDBLOCK)
+                LIBXL__EVENT_DISASTER(egc, "read wakeup", errno, 0);
+    }
+
     for (;;) {
         libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
         if (!etime)
@@ -667,7 +686,7 @@ void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
 {
     EGC_INIT(ctx);
     CTX_LOCK;
-    afterpoll_internal(egc, nfds, fds, now);
+    afterpoll_internal(egc, &ctx->poller_app, nfds, fds, now);
     CTX_UNLOCK;
     EGC_FREE;
 }
@@ -790,7 +809,10 @@ void libxl__event_occurred(libxl__egc *egc, libxl_event *event)
         LIBXL_TAILQ_INSERT_TAIL(&egc->occurred_for_callback, event, link);
         return;
     } else {
+        libxl__poller *poller;
         LIBXL_TAILQ_INSERT_TAIL(&CTX->occurred, event, link);
+        LIBXL_LIST_FOREACH(poller, &CTX->pollers_event, entry)
+            libxl__poller_wakeup(egc, poller);
     }
 }
 
@@ -858,7 +880,94 @@ int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
     return rc;
 }
 
-static int eventloop_iteration(libxl__egc *egc) {
+/*
+ * Manipulation of pollers
+ */
+
+int libxl__poller_init(libxl_ctx *ctx, libxl__poller *p)
+{
+    int r, rc;
+    p->fd_polls = 0;
+    p->fd_rindex = 0;
+
+    r = pipe(p->wakeup_pipe);
+    if (r) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "cannot create poller pipe");
+        rc = ERROR_FAIL;
+        goto out;
+    }
+
+    rc = libxl_fd_set_nonblock(ctx, p->wakeup_pipe[0], 1);
+    if (rc) goto out;
+
+    rc = libxl_fd_set_nonblock(ctx, p->wakeup_pipe[1], 1);
+    if (rc) goto out;
+
+    return 0;
+
+ out:
+    libxl__poller_dispose(p);
+    return rc;
+}
+
+void libxl__poller_dispose(libxl__poller *p)
+{
+    if (p->wakeup_pipe[1] > 0) close(p->wakeup_pipe[1]);
+    if (p->wakeup_pipe[0] > 0) close(p->wakeup_pipe[0]);
+    free(p->fd_polls);
+    free(p->fd_rindex);
+}
+
+libxl__poller *libxl__poller_get(libxl_ctx *ctx)
+{
+    /* must be called with ctx locked */
+    int rc;
+
+    libxl__poller *p = LIBXL_LIST_FIRST(&ctx->pollers_idle);
+    if (p)
+        return p;
+
+    p = malloc(sizeof(*p));
+    if (!p) {
+        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "cannot allocate poller");
+        return 0;
+    }
+    memset(p, 0, sizeof(*p));
+
+    rc = libxl__poller_init(ctx, p);
+    if (rc) return NULL;
+
+    return p;
+}
+
+void libxl__poller_put(libxl_ctx *ctx, libxl__poller *p)
+{
+    LIBXL_LIST_INSERT_HEAD(&ctx->pollers_idle, p, entry);
+}
+
+void libxl__poller_wakeup(libxl__egc *egc, libxl__poller *p)
+{
+    static const char buf[1] = "";
+
+    for (;;) {
+        int r = write(p->wakeup_pipe[1], buf, 1);
+        if (r==1) return;
+        assert(r==-1);
+        if (errno == EINTR) continue;
+        if (errno == EWOULDBLOCK) return;
+        LIBXL__EVENT_DISASTER(egc, "cannot poke watch pipe", errno, 0);
+        return;
+    }
+}
+
+/*
+ * Main event loop iteration
+ */
+
+static int eventloop_iteration(libxl__egc *egc, libxl__poller *poller) {
+    /* The CTX must be locked EXACTLY ONCE so that this function
+     * can unlock it when it polls.
+     */
     EGC_GC;
     int rc;
     struct timeval now;
@@ -871,23 +980,27 @@ static int eventloop_iteration(libxl__egc *egc) {
     int timeout;
 
     for (;;) {
-        int nfds = CTX->fd_polls_allocd;
+        int nfds = poller->fd_polls_allocd;
         timeout = -1;
-        rc = beforepoll_internal(gc, &nfds, CTX->fd_polls, &timeout, now);
+        rc = beforepoll_internal(gc, poller, &nfds, poller->fd_polls,
+                                 &timeout, now);
         if (!rc) break;
         if (rc != ERROR_BUFFERFULL) goto out;
 
         struct pollfd *newarray =
             (nfds > INT_MAX / sizeof(struct pollfd) / 2) ? 0 :
-            realloc(CTX->fd_polls, sizeof(*newarray) * nfds);
+            realloc(poller->fd_polls, sizeof(*newarray) * nfds);
 
         if (!newarray) { rc = ERROR_NOMEM; goto out; }
 
-        CTX->fd_polls = newarray;
-        CTX->fd_polls_allocd = nfds;
+        poller->fd_polls = newarray;
+        poller->fd_polls_allocd = nfds;
     }
 
-    rc = poll(CTX->fd_polls, CTX->fd_polls_allocd, timeout);
+    CTX_UNLOCK;
+    rc = poll(poller->fd_polls, poller->fd_polls_allocd, timeout);
+    CTX_LOCK;
+
     if (rc < 0) {
         if (errno == EINTR)
             return 0; /* will go round again if caller requires */
@@ -900,7 +1013,8 @@ static int eventloop_iteration(libxl__egc *egc) {
     rc = libxl__gettimeofday(gc, &now);
     if (rc) goto out;
 
-    afterpoll_internal(egc, CTX->fd_polls_allocd, CTX->fd_polls, now);
+    afterpoll_internal(egc, poller,
+                       poller->fd_polls_allocd, poller->fd_polls, now);
 
     CTX_UNLOCK;
 
@@ -914,15 +1028,19 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
                      libxl_event_predicate *pred, void *pred_user)
 {
     int rc;
+    libxl__poller *poller = NULL;
 
     EGC_INIT(ctx);
     CTX_LOCK;
 
+    poller = libxl__poller_get(ctx);
+    if (!poller) { rc = ERROR_FAIL; goto out; }
+
     for (;;) {
         rc = event_check_internal(egc, event_r, typemask, pred, pred_user);
         if (rc != ERROR_NOT_READY) goto out;
 
-        rc = eventloop_iteration(egc);
+        rc = eventloop_iteration(egc, poller);
         if (rc) goto out;
 
         /* we unlock and cleanup the egc each time we go through this loop,
@@ -936,6 +1054,8 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
     }
 
  out:
+    libxl__poller_put(ctx, poller);
+
     CTX_UNLOCK;
     EGC_FREE;
     return rc;
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index edb73eb..53d2462 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -205,6 +205,33 @@ struct libxl__evgen_disk_eject {
 _hidden void
 libxl__evdisable_disk_eject(libxl__gc*, libxl_evgen_disk_eject*);
 
+typedef struct libxl__poller libxl__poller;
+struct libxl__poller {
+    /*
+     * These are used to allow other threads to wake up a thread which
+     * may be stuck in poll, because whatever it was waiting for
+     * hadn't happened yet.  Threads which generate events will write
+     * a byte to each pipe.  A thread which is waiting will empty its
+     * own pipe, and put its poller on the pollers_event list, before
+     * releasing the ctx lock and going into poll; when it comes out
+     * of poll it will take the poller off the pollers_event list.
+     *
+     * When a thread is done with a poller it should put it onto
+     * pollers_idle, where it can be reused later.
+     *
+     * The "poller_app" is never idle, but is sometimes on
+     * pollers_event.
+     */
+    LIBXL_LIST_ENTRY(libxl__poller) entry;
+
+    struct pollfd *fd_polls;
+    int fd_polls_allocd;
+
+    int fd_rindex_allocd;
+    int *fd_rindex; /* see libxl_osevent_beforepoll */
+
+    int wakeup_pipe[2]; /* 0 means no fd allocated */
+};
 
 struct libxl__ctx {
     xentoollog_logger *lg;
@@ -235,10 +262,9 @@ struct libxl__ctx {
       /* See the comment for OSEVENT_HOOK_INTERN in libxl_event.c
        * for restrictions on the use of the osevent fields. */
 
-    struct pollfd *fd_polls;
-    int fd_polls_allocd;
-    int fd_rindex_allocd;
-    int *fd_rindex; /* see libxl_osevent_beforepoll */
+    libxl__poller poller_app; /* libxl_osevent_beforepoll and _afterpoll */
+    LIBXL_LIST_HEAD(, libxl__poller) pollers_event, pollers_idle;
+
     LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
     LIBXL_TAILQ_HEAD(, libxl__ev_time) etimes;
 
@@ -524,6 +550,22 @@ _hidden void libxl__event_disaster(libxl__egc*, const char *msg, int errnoval,
     libxl__event_disaster(egc, msg, errnoval, type, __FILE__,__LINE__,__func__)
 
 
+/* Fills in, or disposes of, the resources held by, a poller whose
+ * space the caller has allocated.  ctx must be locked. */
+int libxl__poller_init(libxl_ctx *ctx, libxl__poller *p);
+void libxl__poller_dispose(libxl__poller *p);
+
+/* Obtain a fresh poller from malloc or the idle list, and put it
+ * away again afterwards.  _get can fail, returning NULL.
+ * ctx must be locked. */
+libxl__poller *libxl__poller_get(libxl_ctx *ctx);
+void libxl__poller_put(libxl_ctx *ctx, libxl__poller *p);
+
+/* Notifies whoever is polling using p that they should wake up.
+ * ctx must be locked. */
+void libxl__poller_wakeup(libxl__egc *egc, libxl__poller *p);
+
+
 /* from xl_dom */
 _hidden libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__domain_shutdown_reason(libxl__gc *gc, uint32_t domid);
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (4 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 5/9] libxl: Permit multithreaded event waiting Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-19 10:44   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 7/9] libxl: New convenience macro CONTAINER_OF Ian Jackson
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Provide a new set of machinery for writing public libxl functions
which may take a long time.  The application gets to decide whether
they want the function to be synchronous, or whether they'd prefer to
get a callback, or an event, when the operation is complete.

User(s) of this machinery will be introduced in later patch(es).

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl.h          |   50 ++++++++++++
 tools/libxl/libxl_event.c    |  183 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h |  112 ++++++++++++++++++++++++++
 tools/libxl/libxl_types.idl  |    4 +
 4 files changed, 349 insertions(+), 0 deletions(-)

diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index e32881b..416d6e8 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -235,8 +235,58 @@ enum {
     ERROR_NOT_READY = -11,
     ERROR_OSEVENT_REG_FAIL = -12,
     ERROR_BUFFERFULL = -13,
+    ERROR_ASYNC_INPROGRESS = -14,
 };
 
+
+/*
+ * Some libxl operations can take a long time.  These functions take a
+ * parameter to control their concurrency:
+ *     libxl_asyncop_how *ao_how
+ *
+ * If ao_how==NULL, the function will be synchronous.
+ *
+ * If ao_how!=NULL, the function will set the operation going, and
+ * if this is successful will return ERROR_ASYNCH_INPROGRESS.
+ *
+ * If ao_how->callback!=NULL, the callback will be called when the
+ * operation completes.  The same rules as for libxl_event_hooks
+ * apply, including the reentrancy rules and the possibility of
+ * "disaster", except that libxl calls ao_how->callback instead of
+ * libxl_event_hooks.event_occurs.
+ *
+ * If ao_how->callback==NULL, a libxl_event will be generated which
+ * can be obtained from libxl_event_wait or libxl_event_check.  The
+ * event will have type OPERATION_COMPLETE (which is not used
+ * elsewhere).
+ *
+ * Note that it is possible for an asynchronous operation which is to
+ * result in a callback to complete during its initiating function
+ * call.  In this case the initating function will return
+ * ERROR_ASYNCH_INPROGRESS, even though by the time it returns the
+ * operation is complete and the callback has already happened.
+ *
+ * The application must set and use ao_how->for_event (which will be
+ * copied into libxl_event.for_user) or ao_how->for_callback (passed
+ * to the callback) to determine which operation finished, and it must
+ * of course check the rc value for errors.
+ *
+ * *ao_how does not need to remain valid after the initiating function
+ * returns.
+ *
+ * Callbacks may occur on any thread in which the application calls
+ * libxl.
+ */
+
+typedef struct {
+    void (*callback)(libxl_ctx *ctx, int rc, void *for_callback);
+    union {
+        libxl_ev_user for_event; /* used if callback==NULL */
+        void *for_callback; /* passed to callback */
+    } u;
+} libxl_asyncop_how;
+
+
 #define LIBXL_VERSION 0
 
 typedef struct {
diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index 82889f6..b99049a 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -771,10 +771,21 @@ static void egc_run_callbacks(libxl__egc *egc)
 {
     EGC_GC;
     libxl_event *ev, *ev_tmp;
+
     LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) {
         LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link);
         CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev);
     }
+
+    libxl__ao *ao, *ao_tmp;
+    LIBXL_TAILQ_FOREACH_SAFE(ao, &egc->aos_for_callback,
+                             entry_for_callback, ao_tmp) {
+        LIBXL_TAILQ_REMOVE(&egc->aos_for_callback, ao, entry_for_callback);
+        ao->how.callback(CTX, ao->rc, ao->how.u.for_callback);
+        ao->notified = 1;
+        if (!ao->in_initiator)
+            libxl__ao__destroy(CTX, ao);
+    }
 }
 
 void libxl__egc_cleanup(libxl__egc *egc)
@@ -1061,6 +1072,178 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
     return rc;
 }
 
+
+
+/*
+ * The two possible state flow of an ao:
+ *
+ * Completion before initiator return:
+ *
+ *     Initiator thread                       Possible other threads
+ *
+ *   * ao_create allocates memory and
+ *     initialises the struct
+ *
+ *   * the initiator function does its
+ *     work, setting up various internal
+ *     asynchronous operations -----------> * asynchronous operations
+ *                                            start to take place and
+ *                                            might cause ao completion
+ *                                                |
+ *   * initiator calls ao_complete:               |
+ *     - if synchronous, run event loop           |
+ *       until the ao completes                   |
+ *                              - ao completes on some thread
+ *                              - completing thread releases the lock
+ *                     <--------------'
+ *     - ao_complete takes the lock
+ *     - destroy the ao
+ *
+ *
+ * Completion after initiator return (asynch. only):
+ *
+ *
+ *     Initiator thread                       Possible other threads
+ *
+ *   * ao_create allocates memory and
+ *     initialises the struct
+ *
+ *   * the initiator function does its
+ *     work, setting up various internal
+ *     asynchronous operations -----------> * asynchronous operations
+ *                                            start to take place and
+ *                                            might cause ao completion
+ *                                                |
+ *   * initiator calls ao_complete:               |
+ *     - observes event not net done,             |
+ *     - returns to caller                        |
+ *                                                |
+ *                              - ao completes on some thread
+ *                              - generate the event or call the callback
+ *                              - destroy the ao
+ */
+
+void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) {
+    if (!ao) return;
+    if (ao->poller) libxl__poller_put(ctx, ao->poller);
+    ao->magic = LIBXL__AO_MAGIC_DESTROYED;
+    libxl__free_all(&ao->gc);
+    free(ao);
+}
+
+void libxl__ao_abort(libxl__ao *ao) {
+    AO_GC;
+    assert(ao->magic == LIBXL__AO_MAGIC);
+    assert(ao->in_initiator);
+    assert(!ao->complete);
+    libxl__ao__destroy(CTX, ao);
+}
+
+void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) {
+    assert(ao->magic == LIBXL__AO_MAGIC);
+    assert(!ao->complete);
+    ao->complete = 1;
+    ao->rc = rc;
+
+    if (ao->poller) {
+        assert(ao->in_initiator);
+        libxl__poller_wakeup(egc, ao->poller);
+    } else if (ao->how.callback) {
+        LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback);
+    } else {
+        libxl_event *ev;
+        ev = NEW_EVENT(egc, OPERATION_COMPLETE, ao->domid);
+        if (ev) {
+            ev->for_user = ao->how.u.for_event;
+            ev->u.operation_complete.rc = ao->rc;
+            libxl__event_occurred(egc, ev);
+        }
+        ao->notified = 1;
+    }
+    if (!ao->in_initiator && ao->notified)
+        libxl__ao__destroy(libxl__gc_owner(&egc->gc), ao);
+}
+
+libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid,
+                            const libxl_asyncop_how *how) {
+    libxl__ao *ao;
+
+    ao = calloc(sizeof(*ao),1);
+    if (!ao) goto out;
+
+    ao->magic = LIBXL__AO_MAGIC;
+    ao->in_initiator = 1;
+    ao->poller = 0;
+    ao->domid = domid;
+    LIBXL_INIT_GC(ao->gc, ctx);
+
+    if (how) {
+        ao->how = *how;
+    } else {
+        ao->poller = libxl__poller_get(ctx);
+        if (!ao->poller) goto out;
+    }
+    return ao;
+
+ out:
+    if (ao) libxl__ao__destroy(ctx, ao);
+    return NULL;
+}
+
+int libxl__ao_inprogress(libxl__ao *ao) {
+    AO_GC;
+    int rc;
+
+    assert(ao->magic == LIBXL__AO_MAGIC);
+    assert(ao->in_initiator);
+
+    if (ao->poller) {
+        /* Caller wants it done synchronously. */
+        /* We use a fresh gc, so that we can free things
+         * each time round the loop. */
+        libxl__egc egc;
+        LIBXL_INIT_EGC(egc,CTX);
+
+        for (;;) {
+            assert(ao->magic == LIBXL__AO_MAGIC);
+
+            if (ao->complete) {
+                rc = ao->rc;
+                ao->notified = 1;
+                break;
+            }
+
+            rc = eventloop_iteration(&egc,ao->poller);
+            if (rc) {
+                /* Oh dear, this is quite unfortunate. */
+                LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "Error waiting for"
+                           " event during long-running operation (rc=%d)", rc);
+                sleep(1);
+                /* It's either this or return ERROR_I_DONT_KNOW_WHETHER
+                 * _THE_THING_YOU_ASKED_FOR_WILL_BE_DONE_LATER_WHEN
+                 * _YOU_DIDNT_EXPECT_IT, since we don't have any kind of
+                 * cancellation ability. */
+            }
+
+            CTX_UNLOCK;
+            libxl__egc_cleanup(&egc);
+            CTX_LOCK;
+        }
+    } else {
+        rc = ERROR_ASYNC_INPROGRESS;
+    }
+
+    ao->in_initiator = 0;
+
+    if (ao->notified) {
+        assert(ao->complete);
+        libxl__ao__destroy(CTX,ao);
+    }
+
+    return rc;
+}
+
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 53d2462..594b9fb 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -112,6 +112,7 @@ _hidden void libxl__log(libxl_ctx *ctx, xentoollog_level msglevel, int errnoval,
 
 typedef struct libxl__gc libxl__gc;
 typedef struct libxl__egc libxl__egc;
+typedef struct libxl__ao libxl__ao;
 
 typedef struct libxl__ev_fd libxl__ev_fd;
 typedef void libxl__ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev,
@@ -216,6 +217,10 @@ struct libxl__poller {
      * releasing the ctx lock and going into poll; when it comes out
      * of poll it will take the poller off the pollers_event list.
      *
+     * A thread which is waiting for completion of a synchronous ao
+     * will allocate a poller and record it in the ao, so that other
+     * threads can wake it up.
+     *
      * When a thread is done with a poller it should put it onto
      * pollers_idle, where it can be reused later.
      *
@@ -322,6 +327,21 @@ struct libxl__egc {
     /* for event-generating functions only */
     struct libxl__gc gc;
     struct libxl__event_list occurred_for_callback;
+    LIBXL_TAILQ_HEAD(, libxl__ao) aos_for_callback;
+};
+
+#define LIBXL__AO_MAGIC              0xA0FACE00ul
+#define LIBXL__AO_MAGIC_DESTROYED    0xA0DEAD00ul
+
+struct libxl__ao {
+    uint32_t magic;
+    unsigned in_initiator:1, complete:1, notified:1;
+    int rc;
+    libxl__gc gc;
+    libxl_asyncop_how how;
+    libxl__poller *poller;
+    uint32_t domid;
+    LIBXL_TAILQ_ENTRY(libxl__ao) entry_for_callback;
 };
 
 #define LIBXL_INIT_GC(gc,ctx) do{               \
@@ -1106,6 +1126,7 @@ libxl__device_model_version_running(libxl__gc *gc, uint32_t domid);
 #define LIBXL_INIT_EGC(egc,ctx) do{                     \
         LIBXL_INIT_GC((egc).gc,ctx);                    \
         LIBXL_TAILQ_INIT(&(egc).occurred_for_callback); \
+        LIBXL_TAILQ_INIT(&(egc).aos_for_callback);      \
     } while(0)
 
 _hidden void libxl__egc_cleanup(libxl__egc *egc);
@@ -1123,6 +1144,97 @@ _hidden void libxl__egc_cleanup(libxl__egc *egc);
 
 
 /*
+ * Machinery for asynchronous operations ("ao")
+ *
+ * All "slow" functions (includes anything that might block on a
+ * guest or an external script) need to use the asynchronous
+ * operation ("ao") machinery.  The function should take a parameter
+ * const libxl_asyncop_how *ao_how and must start with a call to
+ * AO_INITIATOR_ENTRY.  These functions MAY NOT be called from
+ * outside libxl, because they can cause reentrancy callbacks.
+ *
+ * No functions called internally within libxl should ever return
+ * ERROR_ASYNCH_INPROGRESS.
+ *
+ * Lifecycle of an ao:
+ *
+ * - Created by libxl__ao_create (or the AO_CREATE convenience macro).
+ *
+ * - After creation, can be used by code which implements
+ *   the operation as follows:
+ *      - the ao's gc, for allocating memory for the lifetime
+ *        of the operation (possibly with the help of the AO_GC
+ *        macro to introduce the gc into scope)
+ *      - the ao itself may be passed about to sub-functions
+ *        so that they can stash it away etc.
+ *      - in particular, the ao pointer must be stashed in some
+ *        per-operation structure which is also passed as a user
+ *        pointer to the internal event generation request routines
+ *        libxl__evgen_FOO, so that at some point a CALLBACK will be
+ *        made when the operation is complete.
+ *
+ * - If initiation is successful, the initiating function needs
+ *   to run libxl__ao_inprogress right before unlocking and
+ *   returning, and return whatever it returns (AO_INPROGRESS macro).
+ *
+ * - If the initiation is unsuccessful, the initiating function must
+ *   call libxl__ao_abort before unlocking and returning whatever
+ *   error code is appropriate (AO_ABORT macro).
+ *
+ * - Later, some callback function, whose callback has been requested
+ *   directly or indirectly, should call libxl__ao_complete (with the
+ *   ctx locked, as it will generally already be in any event callback
+ *   function).  This must happen exactly once for each ao (and not if
+ *   the ao has been destroyed, obviously), and it may not happen
+ *   until libxl__ao_inprogress has been called on the ao.
+ *
+ * - Note that during callback functions, two gcs are available:
+ *    - The one in egc, whose lifetime is only this callback
+ *    - The one in ao, whose lifetime is the asynchronous operation
+ *   Usually callback function should use GET_CONTAINING_STRUCT
+ *   to obtain its own structure, containing a pointer to the ao,
+ *   and then use the gc from that ao.
+ */
+
+#define AO_CREATE(ctx, domid, ao_how)                           \
+    libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how);       \
+    if (!ao) return ERROR_NOMEM;                                \
+    AO_GC;                                                      \
+    CTX_LOCK;
+
+#define AO_INPROGRESS do{                                       \
+        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
+        int ao__rc = libxl__ao_inprogress(ao);                  \
+        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \
+        return ao__rc;                                          \
+   }while(0)
+        
+
+#define AO_ABORT(rc) do{                                        \
+        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
+        assert(rc);                                             \
+        assert(rc != ERROR_ASYNC_INPROGRESS);                   \
+        libxl__ao_abort(ao);                                    \
+        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \
+        return (rc);                                            \
+    }while(0)
+
+#define AO_GC                                   \
+    libxl__gc *const gc = &ao->gc
+
+
+/* All of these MUST be called with the ctx locked.
+ * libxl__ao_inprogress MUST be called with the ctx locked exactly once. */
+_hidden libxl__ao *libxl__ao_create(libxl_ctx*, uint32_t domid,
+                                    const libxl_asyncop_how*);
+_hidden int libxl__ao_inprogress(libxl__ao *ao);
+_hidden void libxl__ao_abort(libxl__ao *ao);
+_hidden void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc);
+
+/* For use by ao machinery ONLY */
+_hidden void libxl__ao__destroy(libxl_ctx*, libxl__ao *ao);
+
+/*
  * Convenience macros.
  */
 
diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
index a6dac79..325bb21 100644
--- a/tools/libxl/libxl_types.idl
+++ b/tools/libxl/libxl_types.idl
@@ -395,6 +395,7 @@ libxl_event_type = Enumeration("event_type", [
     (1, "DOMAIN_SHUTDOWN"),
     (2, "DOMAIN_DESTROY"),
     (3, "DISK_EJECT"),
+    (4, "OPERATION_COMPLETE"),
     ])
 
 libxl_ev_user = UInt(64)
@@ -418,4 +419,7 @@ libxl_event = Struct("event",[
                                         ("vdev", string),
                                         ("disk", libxl_device_disk),
                                  ])),
+           ("operation_complete", Struct(None, [
+                                        ("rc", integer),
+                                 ])),
            ]))])
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 7/9] libxl: New convenience macro CONTAINER_OF
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (5 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-18 14:04   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 8/9] libxl: Introduce libxl__ev_devstate Ian Jackson
  2012-01-13 19:25 ` [PATCH 9/9] libxl: Convert to asynchronous: device removal Ian Jackson
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Provide a convenient and type-safe wrapper which does the correct
dance to subtract offsetof.  This is very similar to the
"container_of" macro in the Linux kernel, but it has an additional
feature that instead of the type argument you may also pass an
expression of that type; this makes initialising a variable with
CONTAINER_OF easier.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl_internal.h |   29 +++++++++++++++++++++++++++++
 1 files changed, 29 insertions(+), 0 deletions(-)

diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 594b9fb..213b5f9 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -1238,6 +1238,35 @@ _hidden void libxl__ao__destroy(libxl_ctx*, libxl__ao *ao);
  * Convenience macros.
  */
 
+/*
+ * CONTAINER_OF work like this.  Given:
+ *    typedef struct {
+ *      ...
+ *      member_type member_name;
+ *      ...
+ *    } outer_type;
+ *    outer_type outer, *outer_var;
+ *    member_type *inner_ptr = &outer->member_name;
+ *
+ * Then, effectively:
+ *    outer_type *CONTAINER_OF(member_type *inner_ptr,
+ *                             *outer_var, // or type name for outer_type
+ *                             member_name);
+ *
+ * So that:
+ *    CONTAINER_OF(inner_ptr, *outer_var, member_name) == &outer
+ *    CONTAINER_OF(inner_ptr, outer_type, member_name) == &outer
+ */
+#define CONTAINER_OF(inner_ptr, outer, member_name)                     \
+    ({                                                                  \
+        typeof(outer) *container_of_;                                   \
+        container_of_ = (void*)((char*)(inner_ptr) -                    \
+                                offsetof(typeof(outer), member_name));  \
+        (void)(&container_of_->member_name ==                           \
+               (typeof(inner_ptr))0) /* type check */;                  \
+        container_of_;                                                  \
+    })
+
 
 /*
  * All of these assume (or define)
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 8/9] libxl: Introduce libxl__ev_devstate
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (6 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 7/9] libxl: New convenience macro CONTAINER_OF Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-19 10:54   ` Ian Campbell
  2012-01-13 19:25 ` [PATCH 9/9] libxl: Convert to asynchronous: device removal Ian Jackson
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Provide a new-style asynchronous facility for waiting for device
states on xenbus.  This will replace libxl__wait_for_device_state,
after the callers have been updated in later patches.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl_event.c    |   75 ++++++++++++++++++++++++++++++++++++++++++
 tools/libxl/libxl_internal.h |   41 +++++++++++++++++++++++
 2 files changed, 116 insertions(+), 0 deletions(-)

diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
index b99049a..1d271b8 100644
--- a/tools/libxl/libxl_event.c
+++ b/tools/libxl/libxl_event.c
@@ -507,6 +507,81 @@ void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
 }
 
 /*
+ * waiting for device state
+ */
+
+static void devstate_watch_callback(libxl__egc *egc, libxl__ev_xswatch *watch,
+                                const char *watch_path, const char *event_path)
+{
+    EGC_GC;
+    libxl__ev_devstate *ds = CONTAINER_OF(watch, *ds, watch);
+    int rc;
+
+    char *sstate = libxl__xs_read(gc, XBT_NULL, watch_path);
+    if (!sstate) {
+        if (errno == ENOENT) {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
+                       " but it was removed", watch_path, ds->wanted);
+            rc = ERROR_INVAL;
+        } else {
+            LIBXL__LOG_ERRNO(CTX, LIBXL__LOG_ERROR, "backend %s wanted state"
+                             " %d but read failed", watch_path, ds->wanted);
+            rc = ERROR_FAIL;
+        }
+    } else {
+        int got = atoi(sstate);
+        if (got == ds->wanted) {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d ok",
+                       watch_path, ds->wanted);
+            rc = 0;
+        } else {
+            LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d"
+                       " still waiting state %d", watch_path, ds->wanted, got);
+            return;
+        }
+    }
+    libxl__ev_devstate_cancel(gc, ds);
+    ds->callback(egc, ds, rc);
+}
+
+static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                             const struct timeval *requested_abs)
+{
+    EGC_GC;
+    libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
+    LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
+               " timed out", ds->watch.path, ds->wanted);
+    libxl__ev_devstate_cancel(gc, ds);
+    ds->callback(egc, ds, ERROR_TIMEDOUT);
+}
+
+int libxl__ev_devstate_wait(libxl__gc *gc, libxl__ev_devstate *ds,
+                            libxl__ev_devstate_callback cb,
+                            const char *state_path, int state, int milliseconds)
+{
+    int rc;
+
+    libxl__ev_time_init(&ds->timeout);
+    libxl__ev_xswatch_init(&ds->watch);
+    ds->wanted = state;
+    ds->callback = cb;
+
+    rc = libxl__ev_time_register_rel(gc, &ds->timeout, devstate_timeout,
+                                     milliseconds);
+    if (rc) goto out;
+
+    rc = libxl__ev_xswatch_register(gc, &ds->watch, devstate_watch_callback,
+                                    state_path);
+    if (rc) goto out;
+
+    return 0;
+
+ out:
+    libxl__ev_devstate_cancel(gc, ds);
+    return rc;
+}
+
+/*
  * osevent poll
  */
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 213b5f9..b7f0f54 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -684,6 +684,47 @@ _hidden int libxl__wait_for_device_state(libxl__gc *gc, struct timeval *tv,
                                          libxl__device_state_handler handler);
 
 /*
+ * libxl__ev_devstate - waits a given time for a device to
+ * reach a given state.  Follows the libxl_ev_* conventions.
+ * Will generate only one event, and after that is automatically
+ * cancelled.
+ */
+typedef struct libxl__ev_devstate libxl__ev_devstate;
+typedef void libxl__ev_devstate_callback(libxl__egc *egc, libxl__ev_devstate*,
+                                         int rc);
+  /* rc will be 0, ERROR_TIMEDOUT, ERROR_INVAL (meaning path was removed),
+   * or ERROR_FAIL if other stuff went wrong (in which latter case, logged) */
+
+struct libxl__ev_devstate {
+    /* read-only for caller, who may read only when waiting: */
+    int wanted;
+    libxl__ev_devstate_callback *callback;
+    /* as for the remainder, read-only public parts may also be
+     * read by the caller (notably, watch.path), but only when waiting: */
+    libxl__ev_xswatch watch;
+    libxl__ev_time timeout;
+};
+
+static inline void libxl__ev_devstate_init(libxl__ev_devstate *ds)
+{
+    libxl__ev_time_init(&ds->timeout);
+    libxl__ev_xswatch_init(&ds->watch);
+}
+
+static inline void libxl__ev_devstate_cancel(libxl__gc *gc,
+                                             libxl__ev_devstate *ds)
+{
+    libxl__ev_time_deregister(gc,&ds->timeout);
+    libxl__ev_xswatch_deregister(gc,&ds->watch);
+}
+
+_hidden int libxl__ev_devstate_wait(libxl__gc *gc, libxl__ev_devstate *ds,
+                                    libxl__ev_devstate_callback cb,
+                                    const char *state_path,
+                                    int state, int milliseconds);
+
+
+/*
  * libxl__try_phy_backend - Check if there's support for the passed
  * type of file using the PHY backend
  * st_mode: mode_t of the file, as returned by stat function
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* [PATCH 9/9] libxl: Convert to asynchronous: device removal
  2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
                   ` (7 preceding siblings ...)
  2012-01-13 19:25 ` [PATCH 8/9] libxl: Introduce libxl__ev_devstate Ian Jackson
@ 2012-01-13 19:25 ` Ian Jackson
  2012-01-19 11:55   ` Ian Campbell
  8 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-13 19:25 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Jackson

Convert libxl_FOO_device_remove, and the function which does the bulk
of the work, libxl__device_remove, to the new async ops scheme.

Adjust all callers.

Also remove libxl__wait_for_device_state which is now obsolete.

Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 tools/libxl/libxl.c          |   60 +++++++++++++--------
 tools/libxl/libxl.h          |   16 ++++--
 tools/libxl/libxl_device.c   |  118 +++++++++++++-----------------------------
 tools/libxl/libxl_internal.h |   30 ++---------
 tools/libxl/xl_cmdimpl.c     |    4 +-
 5 files changed, 93 insertions(+), 135 deletions(-)

diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
index 9890d79..d63da97 100644
--- a/tools/libxl/libxl.c
+++ b/tools/libxl/libxl.c
@@ -1310,19 +1310,23 @@ out:
 }
 
 int libxl_device_disk_remove(libxl_ctx *ctx, uint32_t domid,
-                             libxl_device_disk *disk)
+                             libxl_device_disk *disk,
+                             const libxl_asyncop_how *ao_how)
 {
-    GC_INIT(ctx);
+    AO_CREATE(ctx, domid, ao_how);
     libxl__device device;
     int rc;
 
     rc = libxl__device_from_disk(gc, domid, disk, &device);
     if (rc != 0) goto out;
 
-    rc = libxl__device_remove(gc, &device, 1);
+    rc = libxl__initiate_device_remove(ao, &device);
+    if (rc) goto out;
+
+    AO_INPROGRESS;
+
 out:
-    GC_FREE;
-    return rc;
+    AO_ABORT(rc);
 }
 
 int libxl_device_disk_destroy(libxl_ctx *ctx, uint32_t domid,
@@ -1536,11 +1540,11 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk)
 
     ret = 0;
 
-    libxl_device_disk_remove(ctx, domid, disks + i);
+    libxl_device_disk_remove(ctx, domid, disks + i, 0);
     libxl_device_disk_add(ctx, domid, disk);
     stubdomid = libxl_get_stubdom_id(ctx, domid);
     if (stubdomid) {
-        libxl_device_disk_remove(ctx, stubdomid, disks + i);
+        libxl_device_disk_remove(ctx, stubdomid, disks + i, 0);
         libxl_device_disk_add(ctx, stubdomid, disk);
     }
 out:
@@ -1759,19 +1763,23 @@ out:
 }
 
 int libxl_device_nic_remove(libxl_ctx *ctx, uint32_t domid,
-                            libxl_device_nic *nic)
+                            libxl_device_nic *nic,
+                            const libxl_asyncop_how *ao_how)
 {
-    GC_INIT(ctx);
+    AO_CREATE(ctx, domid, ao_how);
     libxl__device device;
     int rc;
 
     rc = libxl__device_from_nic(gc, domid, nic, &device);
     if (rc != 0) goto out;
 
-    rc = libxl__device_remove(gc, &device, 1);
+    rc = libxl__initiate_device_remove(ao, &device);
+    if (rc) goto out;
+
+    AO_INPROGRESS;
+
 out:
-    GC_FREE;
-    return rc;
+    AO_ABORT(rc);
 }
 
 int libxl_device_nic_destroy(libxl_ctx *ctx, uint32_t domid,
@@ -2099,19 +2107,23 @@ out:
 }
 
 int libxl_device_vkb_remove(libxl_ctx *ctx, uint32_t domid,
-                            libxl_device_vkb *vkb)
+                            libxl_device_vkb *vkb,
+                            const libxl_asyncop_how *ao_how)
 {
-    GC_INIT(ctx);
+    AO_CREATE(ctx, domid, ao_how);
     libxl__device device;
     int rc;
 
     rc = libxl__device_from_vkb(gc, domid, vkb, &device);
     if (rc != 0) goto out;
 
-    rc = libxl__device_remove(gc, &device, 1);
+    rc = libxl__initiate_device_remove(ao, &device);
+    if (rc) goto out;
+
+    AO_INPROGRESS;
+
 out:
-    GC_FREE;
-    return rc;
+    AO_ABORT(rc);
 }
 
 int libxl_device_vkb_destroy(libxl_ctx *ctx, uint32_t domid,
@@ -2216,19 +2228,23 @@ out:
 }
 
 int libxl_device_vfb_remove(libxl_ctx *ctx, uint32_t domid,
-                            libxl_device_vfb *vfb)
+                            libxl_device_vfb *vfb,
+                            const libxl_asyncop_how *ao_how)
 {
-    GC_INIT(ctx);
+    AO_CREATE(ctx, domid, ao_how);
     libxl__device device;
     int rc;
 
     rc = libxl__device_from_vfb(gc, domid, vfb, &device);
     if (rc != 0) goto out;
 
-    rc = libxl__device_remove(gc, &device, 1);
+    rc = libxl__initiate_device_remove(ao, &device);
+    if (rc) goto out;
+
+    AO_INPROGRESS;
+
 out:
-    GC_FREE;
-    return rc;
+    AO_ABORT(rc);
 }
 
 int libxl_device_vfb_destroy(libxl_ctx *ctx, uint32_t domid,
diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
index 416d6e8..602bd01 100644
--- a/tools/libxl/libxl.h
+++ b/tools/libxl/libxl.h
@@ -464,7 +464,9 @@ libxl_vminfo * libxl_list_vm(libxl_ctx *ctx, int *nb_vm);
 /* Disks */
 int libxl_device_disk_init(libxl_ctx *ctx, libxl_device_disk *disk);
 int libxl_device_disk_add(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk);
-int libxl_device_disk_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk);
+int libxl_device_disk_remove(libxl_ctx *ctx, uint32_t domid,
+                             libxl_device_disk *disk,
+                             const libxl_asyncop_how *ao_how);
 int libxl_device_disk_destroy(libxl_ctx *ctx, uint32_t domid,
                               libxl_device_disk *disk);
 
@@ -488,7 +490,9 @@ int libxl_device_disk_local_detach(libxl_ctx *ctx, libxl_device_disk *disk);
 /* Network Interfaces */
 int libxl_device_nic_init(libxl_ctx *ctx, libxl_device_nic *nic);
 int libxl_device_nic_add(libxl_ctx *ctx, uint32_t domid, libxl_device_nic *nic);
-int libxl_device_nic_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_nic *nic);
+int libxl_device_nic_remove(libxl_ctx *ctx, uint32_t domid,
+                            libxl_device_nic *nic,
+                            const libxl_asyncop_how *ao_how);
 int libxl_device_nic_destroy(libxl_ctx *ctx, uint32_t domid, libxl_device_nic *nic);
 
 libxl_device_nic *libxl_device_nic_list(libxl_ctx *ctx, uint32_t domid, int *num);
@@ -498,13 +502,17 @@ int libxl_device_nic_getinfo(libxl_ctx *ctx, uint32_t domid,
 /* Keyboard */
 int libxl_device_vkb_init(libxl_ctx *ctx, libxl_device_vkb *vkb);
 int libxl_device_vkb_add(libxl_ctx *ctx, uint32_t domid, libxl_device_vkb *vkb);
-int libxl_device_vkb_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_vkb *vkb);
+int libxl_device_vkb_remove(libxl_ctx *ctx, uint32_t domid,
+                            libxl_device_vkb *vkb,
+                            const libxl_asyncop_how *ao_how);
 int libxl_device_vkb_destroy(libxl_ctx *ctx, uint32_t domid, libxl_device_vkb *vkb);
 
 /* Framebuffer */
 int libxl_device_vfb_init(libxl_ctx *ctx, libxl_device_vfb *vfb);
 int libxl_device_vfb_add(libxl_ctx *ctx, uint32_t domid, libxl_device_vfb *vfb);
-int libxl_device_vfb_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_vfb *vfb);
+int libxl_device_vfb_remove(libxl_ctx *ctx, uint32_t domid,
+                            libxl_device_vfb *vfb,
+                            const libxl_asyncop_how *ao_how);
 int libxl_device_vfb_destroy(libxl_ctx *ctx, uint32_t domid, libxl_device_vfb *vfb);
 
 /* PCI Passthrough */
diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
index 5d05e90..e905133 100644
--- a/tools/libxl/libxl_device.c
+++ b/tools/libxl/libxl_device.c
@@ -357,85 +357,41 @@ int libxl__device_disk_dev_number(const char *virtpath, int *pdisk,
     return -1;
 }
 
-/*
- * Returns 0 if a device is removed, ERROR_* if an error
- * or timeout occurred.
- */
-int libxl__wait_for_device_state(libxl__gc *gc, struct timeval *tv,
-                                 XenbusState state,
-                                 libxl__device_state_handler handler)
-{
-    libxl_ctx *ctx = libxl__gc_owner(gc);
-    int nfds, rc;
-    unsigned int n;
-    fd_set rfds;
-    char **l1 = NULL;
-
-start:
-    rc = 1;
-    nfds = xs_fileno(ctx->xsh) + 1;
-    FD_ZERO(&rfds);
-    FD_SET(xs_fileno(ctx->xsh), &rfds);
-    switch (select(nfds, &rfds, NULL, NULL, tv)) {
-        case -1:
-            if (errno == EINTR)
-                goto start;
-            rc = ERROR_FAIL;
-            break;
-        case 0:
-            rc = ERROR_TIMEDOUT;
-            break;
-        default:
-            l1 = xs_read_watch(ctx->xsh, &n);
-            if (l1 != NULL) {
-                char *sstate = libxl__xs_read(gc, XBT_NULL,
-                                             l1[XS_WATCH_PATH]);
-                if (!sstate || atoi(sstate) == state) {
-                    /* Call handler function if present */
-                    if (handler)
-                        rc = handler(gc, l1, sstate);
-                } else {
-                    /* State is different than expected, continue waiting... */
-                    goto start;
-                }
-                free(l1);
-            } else {
-                rc = ERROR_FAIL;
-            }
-            break;
-    }
-    return rc;
-}
 
-/*
- * Handler function for device destruction to be passed to
- * libxl__wait_for_device_state
- */
-static int destroy_device(libxl__gc *gc, char **l1, char *state)
-{
-    libxl_ctx *ctx = libxl__gc_owner(gc);
-
-    xs_unwatch(ctx->xsh, l1[0], l1[1]);
-    xs_rm(ctx->xsh, XBT_NULL, l1[XS_WATCH_TOKEN]);
-    LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
-               "Destroyed device backend at %s",
-               l1[XS_WATCH_TOKEN]);
+typedef struct {
+    libxl__ao *ao;
+    libxl__ev_devstate ds;
+} libxl__ao_device_remove;
+
+static void device_remove_cleanup(libxl__gc *gc,
+                                  libxl__ao_device_remove *aorm) {
+    if (!aorm) return;
+    libxl__ev_devstate_cancel(gc, &aorm->ds);
+}
 
-    return 0;
+static void device_remove_callback(libxl__egc *egc, libxl__ev_devstate *ds,
+                                   int rc) {
+    libxl__ao_device_remove *aorm = CONTAINER_OF(ds, *aorm, ds);
+    libxl__gc *gc = &aorm->ao->gc;
+    libxl__ao_complete(egc, aorm->ao, rc);
+    device_remove_cleanup(gc, aorm);
 }
 
-/*
- * Returns 0 (device already destroyed) or 1 (caller must
- * wait_for_dev_destroy) on success, ERROR_* on fail.
- */
-int libxl__device_remove(libxl__gc *gc, libxl__device *dev, int wait)
+int libxl__initiate_device_remove(libxl__ao *ao, libxl__device *dev)
 {
+    /* Arranges that dev will be removed from its guest.  When
+     * this is done, the ao will be completed.  An error
+     * return from libxl__device_remove means that the ao
+     * will _not_ be completed and the caller must do so.
+     */
+    AO_GC;
     libxl_ctx *ctx = libxl__gc_owner(gc);
     xs_transaction_t t;
     char *be_path = libxl__device_backend_path(gc, dev);
     char *state_path = libxl__sprintf(gc, "%s/state", be_path);
     char *state = libxl__xs_read(gc, XBT_NULL, state_path);
     int rc = 0;
+    libxl__ao_device_remove *aorm = 0;
 
     if (!state)
         goto out;
@@ -458,23 +414,21 @@ retry_transaction:
         }
     }
 
-    xs_watch(ctx->xsh, state_path, be_path);
     libxl__device_destroy_tapdisk(gc, be_path);
 
-    if (wait) {
-        struct timeval tv;
-        tv.tv_sec = LIBXL_DESTROY_TIMEOUT;
-        tv.tv_usec = 0;
-        rc = libxl__wait_for_device_state(gc, &tv, XenbusStateClosed,
-                                          destroy_device);
-        if (rc < 0) /* an error or timeout occurred, clear watches */
-            xs_unwatch(ctx->xsh, state_path, be_path);
-        xs_rm(ctx->xsh, XBT_NULL, libxl__device_frontend_path(gc, dev));
-    } else {
-        rc = 1; /* Caller must wait_for_dev_destroy */
-    }
+    aorm = libxl__zalloc(gc, sizeof(*aorm));
+    aorm->ao = ao;
+    libxl__ev_devstate_init(&aorm->ds);
 
-out:
+    rc = libxl__ev_devstate_wait(gc, &aorm->ds, device_remove_callback,
+                                 state_path, XenbusStateClosed,
+                                 LIBXL_DESTROY_TIMEOUT * 1000);
+    if (rc) goto out;
+
+    return 0;
+
+ out:
+    device_remove_cleanup(gc, aorm);
     return rc;
 }
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index b7f0f54..9920fb9 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -653,35 +653,15 @@ _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device);
 _hidden char *libxl__device_frontend_path(libxl__gc *gc, libxl__device *device);
 _hidden int libxl__parse_backend_path(libxl__gc *gc, const char *path,
                                       libxl__device *dev);
-_hidden int libxl__device_remove(libxl__gc *gc, libxl__device *dev, int wait);
 _hidden int libxl__device_destroy(libxl__gc *gc, libxl__device *dev);
 _hidden int libxl__devices_destroy(libxl__gc *gc, uint32_t domid);
 _hidden int libxl__wait_for_backend(libxl__gc *gc, char *be_path, char *state);
 
-/* Handler for the libxl__wait_for_device_state callback */
-/*
- * libxl__device_state_handler - Handler for the libxl__wait_for_device_state
- * gc: allocation pool
- * l1: array containing the path and token
- * state: string that contains the state of the device
- *
- * Returns 0 on success, and < 0 on error.
- */
-typedef int libxl__device_state_handler(libxl__gc *gc, char **l1, char *state);
-
-/*
- * libxl__wait_for_device_state - waits a given time for a device to
- * reach a given state
- * gc: allocation pool
- * tv: timeval struct containing the maximum time to wait
- * state: state to wait for (check xen/io/xenbus.h)
- * handler: callback function to execute when state is reached
- *
- * Returns 0 on success, and < 0 on error.
- */
-_hidden int libxl__wait_for_device_state(libxl__gc *gc, struct timeval *tv,
-                                         XenbusState state,
-                                         libxl__device_state_handler handler);
+/* Arranges that dev will be removed from its guest.  When
+ * this is done, the ao will be completed.  An error
+ * return from libxl__device_remove means that the ao
+ * will _not_ be completed and the caller must do so. */
+_hidden int libxl__initiate_device_remove(libxl__ao*, libxl__device *dev);
 
 /*
  * libxl__ev_devstate - waits a given time for a device to
diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
index c2b7a1e..659a9e6 100644
--- a/tools/libxl/xl_cmdimpl.c
+++ b/tools/libxl/xl_cmdimpl.c
@@ -4624,7 +4624,7 @@ int main_networkdetach(int argc, char **argv)
             return 1;
         }
     }
-    if (libxl_device_nic_remove(ctx, domid, &nic)) {
+    if (libxl_device_nic_remove(ctx, domid, &nic, 0)) {
         fprintf(stderr, "libxl_device_nic_del failed.\n");
         return 1;
     }
@@ -4719,7 +4719,7 @@ int main_blockdetach(int argc, char **argv)
         fprintf(stderr, "Error: Device %s not connected.\n", argv[optind+1]);
         return 1;
     }
-    if (libxl_device_disk_remove(ctx, domid, &disk)) {
+    if (libxl_device_disk_remove(ctx, domid, &disk, 0)) {
         fprintf(stderr, "libxl_device_disk_remove failed.\n");
     }
     return 0;
-- 
1.7.2.5

^ permalink raw reply related	[flat|nested] 31+ messages in thread

* Re: [PATCH 2/9] ocaml, libxl: support "private" fields
  2012-01-13 19:25 ` [PATCH 2/9] ocaml, libxl: support "private" fields Ian Jackson
@ 2012-01-18 14:03   ` Ian Campbell
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Campbell @ 2012-01-18 14:03 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> The changeset
>   24378:b4365e2c2595  libxl: idl: support new "private" type attribute
> is not complete.  Actually using this feature does not work because
> the ocaml idl generator does not know about it.
> 
> So add that support.
> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

> ---
>  tools/ocaml/libs/xl/genwrap.py |    8 ++++++++
>  1 files changed, 8 insertions(+), 0 deletions(-)
> 
> diff --git a/tools/ocaml/libs/xl/genwrap.py b/tools/ocaml/libs/xl/genwrap.py
> index 5f8639a..61abecf 100644
> --- a/tools/ocaml/libs/xl/genwrap.py
> +++ b/tools/ocaml/libs/xl/genwrap.py
> @@ -91,6 +91,8 @@ def gen_ocaml_ml(ty, interface, indent=""):
>              s += "\t{\n"
>              
>          for f in ty.fields:
> +            if f.type.private:
> +                continue
>              x = ocaml_instance_of(f.type, f.name)
>              x = x.replace("\n", "\n\t\t")
>              s += "\t\t" + x + ";\n"
> @@ -146,6 +148,8 @@ def c_val(ty, c, o, indent="", parent = None):
>      elif isinstance(ty, libxltypes.Aggregate) and (parent is None):
>          n = 0
>          for f in ty.fields:
> +            if f.type.private:
> +                continue
>              s += "%s\n" % c_val(f.type, "%s->%s" % (c, f.name), "Field(%s, %d)" % (o,n), parent="%s->" % (c))
>              n = n + 1
>      else:
> @@ -210,6 +214,8 @@ def ocaml_Val(ty, o, c, indent="", parent = None):
>          
>          n = 0
>          for f in ty.fields:
> +            if f.type.private:
> +                continue
>              s += "\n"
>              s += "\t%s\n" % ocaml_Val(f.type, "%s_field" % ty.rawname, "%s->%s" % (c,f.name), parent="%s->" % c)
>              s += "\tStore_field(%s, %d, %s);\n" % (o, n, "%s_field" % ty.rawname)
> @@ -288,6 +294,8 @@ if __name__ == '__main__':
>      cinc.write(autogen_header("/*", "*/"))
>  
>      for ty in types:
> +        if ty.private:
> +            continue
>          #sys.stdout.write(" TYPE    %-20s " % ty.rawname)
>          ml.write(gen_ocaml_ml(ty, False))
>          ml.write("\n")

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 7/9] libxl: New convenience macro CONTAINER_OF
  2012-01-13 19:25 ` [PATCH 7/9] libxl: New convenience macro CONTAINER_OF Ian Jackson
@ 2012-01-18 14:04   ` Ian Campbell
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Campbell @ 2012-01-18 14:04 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Provide a convenient and type-safe wrapper which does the correct
> dance to subtract offsetof.  This is very similar to the
> "container_of" macro in the Linux kernel, but it has an additional
> feature that instead of the type argument you may also pass an
> expression of that type; this makes initialising a variable with
> CONTAINER_OF easier.
> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

> ---
>  tools/libxl/libxl_internal.h |   29 +++++++++++++++++++++++++++++
>  1 files changed, 29 insertions(+), 0 deletions(-)
> 
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 594b9fb..213b5f9 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -1238,6 +1238,35 @@ _hidden void libxl__ao__destroy(libxl_ctx*, libxl__ao *ao);
>   * Convenience macros.
>   */
>  
> +/*
> + * CONTAINER_OF work like this.  Given:
> + *    typedef struct {
> + *      ...
> + *      member_type member_name;
> + *      ...
> + *    } outer_type;
> + *    outer_type outer, *outer_var;
> + *    member_type *inner_ptr = &outer->member_name;
> + *
> + * Then, effectively:
> + *    outer_type *CONTAINER_OF(member_type *inner_ptr,
> + *                             *outer_var, // or type name for outer_type
> + *                             member_name);
> + *
> + * So that:
> + *    CONTAINER_OF(inner_ptr, *outer_var, member_name) == &outer
> + *    CONTAINER_OF(inner_ptr, outer_type, member_name) == &outer
> + */
> +#define CONTAINER_OF(inner_ptr, outer, member_name)                     \
> +    ({                                                                  \
> +        typeof(outer) *container_of_;                                   \
> +        container_of_ = (void*)((char*)(inner_ptr) -                    \
> +                                offsetof(typeof(outer), member_name));  \
> +        (void)(&container_of_->member_name ==                           \
> +               (typeof(inner_ptr))0) /* type check */;                  \
> +        container_of_;                                                  \
> +    })
> +
>  
>  /*
>   * All of these assume (or define)

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/9] libxl: New API for providing OS events to libxl
  2012-01-13 19:25 ` [PATCH 1/9] libxl: New API for providing OS events to libxl Ian Jackson
@ 2012-01-18 16:35   ` Ian Campbell
  2012-01-18 17:06     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-18 16:35 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> We provide a new set of functions and related structures
>   libxl_osevent_*
> which are to be used by event-driven applications to receive
> information from libxl about which fds libxl is interested in, and
> what timeouts libxl is waiting for, and to pass back to libxl
> information about which fds are readable/writeable etc., and which
> timeouts have occurred.  Ie, low-level events.
> 
> In this patch, this new machinery is still all unused.  Callers will
> appear in the next patch in the series, which introduces a new API for
> applications to receive high-level events about actual domains etc.
> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/Makefile         |    2 +-
>  tools/libxl/libxl.c          |   30 ++
>  tools/libxl/libxl.h          |    6 +
>  tools/libxl/libxl_event.c    |  750 ++++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_event.h    |  205 ++++++++++++
>  tools/libxl/libxl_internal.h |  277 +++++++++++++++-
>  6 files changed, 1267 insertions(+), 3 deletions(-)
>  create mode 100644 tools/libxl/libxl_event.c
>  create mode 100644 tools/libxl/libxl_event.h

[...]
> @@ -109,6 +110,71 @@ _hidden void libxl__log(libxl_ctx *ctx, xentoollog_level msglevel, int errnoval,
> 
>       /* these functions preserve errno (saving and restoring) */
> 
> +typedef struct libxl__gc libxl__gc;
> +typedef struct libxl__egc libxl__egc;
> +
> +typedef struct libxl__ev_fd libxl__ev_fd;
> +typedef void libxl__ev_fd_callback(libxl__egc *egc, libxl__ev_fd *ev,
> +                                   int fd, short events, short revents);
> +struct libxl__ev_fd {
> +    /* caller should include this in their own struct */
> +    /* read-only for caller, who may read only when registered: */
> +    int fd;
> +    short events;
> +    libxl__ev_fd_callback *func;

Are there actually cases where a caller would want to read these?

The most obvious case would be in the callback but it already gets given
all three there.

Not suggesting we disallow this I'm just curious.

> +    /* remainder is private for libxl__ev_fd... */
> +    LIBXL_LIST_ENTRY(libxl__ev_fd) entry;
> +    void *for_app_reg;
> +};
[...]

> + *   int libxl__ev_KIND_register(libxl__gc *gc, libxl__ev_KIND *GEN,
> + *                              libxl__ev_KIND_callback *FUNC,
> + *                              DETAILS);
> + *      On entry *GEN must be in state Undefined or Idle.
> + *      Returns a libxl error code; on error return *GEN is Idle.
> + *      On successful return *GEN is Active and FUNC wil be

                                                        will

> + *      called by the event machinery in future.  FUNC will
> + *      not be called from within the call to _register.
> + *      FUNC will be called with the context locked (with CTX_LOCK).
[...]

Acked-by: Ian Campbell <ian.campbell@citrix.com>

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 1/9] libxl: New API for providing OS events to libxl
  2012-01-18 16:35   ` Ian Campbell
@ 2012-01-18 17:06     ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-18 17:06 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 1/9] libxl: New API for providing OS events to libxl"):
> On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > +struct libxl__ev_fd {
> > +    /* caller should include this in their own struct */
> > +    /* read-only for caller, who may read only when registered: */
> > +    int fd;
> > +    short events;
> > +    libxl__ev_fd_callback *func;
> 
> Are there actually cases where a caller would want to read these?
> 
> The most obvious case would be in the callback but it already gets given
> all three there.
> 
> Not suggesting we disallow this I'm just curious.

This is a change from my previous version of this series.  When
writing the device removal code I found myself wanting to read the
path member of a libxl__ev_xswatch:

+static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
+                             const struct timeval *requested_abs)
+{
+    EGC_GC;
+    libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
+    LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
+               " timed out", ds->watch.path, ds->wanted);
                              ^^^^^^^^^^^^^^
+    libxl__ev_devstate_cancel(gc, ds);
+    ds->callback(egc, ds, ERROR_TIMEDOUT);
+}

So my options were:
 0. Not print the path, rendering the message almost useless
 1. Copy the path an extra time, pointlessly
 2. Relax the rules about the contents of libxl__ev_xswatch, making
    a special exception for this particular struct
 3. Relax the rules about the contents of libxl__ev_* generally
 4. Change the API of libxl__ev_* so that the caller always writes
    the fd, path, etc.

Of these 3 seemed best.  I considered 4.; but the result would be that
libxl__ev_KIND_register wouldn't take the arguments specifying what to
wait for, and that seemed a step too far.  Particularly since needing
to read inside the struct isn't all that common.

> Acked-by: Ian Campbell <ian.campbell@citrix.com>

Thanks,
Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/9] libxl: New event generation API
  2012-01-13 19:25 ` [PATCH 3/9] libxl: New event generation API Ian Jackson
@ 2012-01-18 17:33   ` Ian Campbell
  2012-01-24 16:23     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-18 17:33 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Replace the existing API for retrieving high-level events (events
> about domains, etc.) from libxl with a new one.
> 
> This changes the definition and semantics of the `libxl_event'
> structure, and replaces the calls for obtaining information about
> domain death and disk eject events.
> 
> This is an incompatible change, sorry.  The alternative was to try to
> provide both the previous horrid API and the new one, and would also
> involve never using the name `libxl_event' for the new interface.
> 
> The new "libxl_event" structure is blacklisted in the ocaml bindings
> for two reasons:
>   - It has a field name "type" (which is a keyword in ocaml);
>     the ocaml idl generator should massage this field name on
>     output, to "type_" perhaps.
>   - The ocaml idl generator does not support KeyedUnion.
> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/libxl.c            |  329 +++++++++++++++++++++++++++++-----------
>  tools/libxl/libxl.h            |   55 +------
>  tools/libxl/libxl_event.c      |  236 ++++++++++++++++++++++++++---
>  tools/libxl/libxl_event.h      |  183 ++++++++++++++++++++++-
>  tools/libxl/libxl_internal.h   |   77 +++++++++-
>  tools/libxl/libxl_types.idl    |   34 ++++-
>  tools/libxl/xl_cmdimpl.c       |  270 +++++++++++++++++++--------------
>  tools/ocaml/libs/xl/genwrap.py |    1 +
>  8 files changed, 908 insertions(+), 277 deletions(-)
> 
> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
> index 413b684..19ff12c 100644
> --- a/tools/libxl/libxl.c
> +++ b/tools/libxl/libxl.c
> @@ -95,6 +115,13 @@ int libxl_ctx_free(libxl_ctx *ctx)
> 
>      /* Deregister all libxl__ev_KINDs: */
> 
> +    free_disable_deaths(gc, &CTX->death_list);
> +    free_disable_deaths(gc, &CTX->death_reported);
> +
> +    libxl_evgen_disk_eject *eject;
> +    while ((eject = LIBXL_LIST_FIRST(&CTX->disk_eject_evgens)))
> +        libxl__evdisable_disk_eject(gc, eject);

Why a helper for deaths but not ejects?

[...]

> diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
> index ec66340..621a7cc 100644
> --- a/tools/libxl/libxl_event.c
> +++ b/tools/libxl/libxl_event.c

> 
>  /*
> diff --git a/tools/libxl/libxl_event.h b/tools/libxl/libxl_event.h
> index 63ef65e..0e83800 100644
> --- a/tools/libxl/libxl_event.h
> +++ b/tools/libxl/libxl_event.h

> +#define LIBXL_EVENTMASK_ALL (~(unsigned long)0)
> +
> +typedef int libxl_event_predicate(const libxl_event*, void *user);
> +  /* Return value is 0 if the event is unwanted or non-0 if it is.
> +   * Predicates are not allowed to fail.
> +   */
> +
> +int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
> +                      unsigned long typemask,
> +                      libxl_event_predicate *predicate, void *predicate_user);
> +  /* Searches for an event, already-happened, which matches typemask
> +   * and predicate.  predicate==0 matches any event.
> +   * libxl_event_check returns the event, which must then later be
> +   * freed by the caller using libxl_event_free.
> +   *
> +   * Returns ERROR_NOT_READY if no such event has happened.
> +   */
> +
> +int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
> +                     unsigned long typemask,
> +                     libxl_event_predicate *predicate, void *predicate_user);
> +  /* Like libxl_event_check but blocks if no suitable events are
> +   * available, until some are.  Uses libxl_osevent_beforepoll/
> +   * _afterpoll so may be inefficient if very many domains are being
> +   * handled by a single program.
> +   */
> +
> +void libxl_event_free(libxl_ctx *ctx, libxl_event *event);
> +
> +
> +/* Alternatively or additionally, the application may also use this: */
> +
> +typedef struct libxl_event_hooks {
> +    uint64_t event_occurs_mask;

libxl_event_{wait,check} and LIBXL_EVENTMASK_ALL have an unsigned long
mask. Are they not the same set of bits?

[...]

> + * The user value is returned in the generated events and may be
> + * used by the caller for whatever it likes.  The type ev_user is
> + * guaranteed to be an unsigned integer type which is at least
> + * as big as uint64_t and is also guaranteed to be big enough to
> + * contain any intptr_t value.

Does anything actually guarantee that sizeof(uint64_t) >
sizeof(intptr_t)? I'm sure it's true in practice and I'm happy to rely
on it. Just interested.

> + *[...]

> + * Applications should ensure that they eventually retrieve every
> + * event using libxl_event_check or libxl_event_wait, since events
> + * which occur but are not retreived by the application will be queued

                              retrieved

> + * inside libxl indefinitely.  libxl_event_check/_wait may be O(n)
> + * where n is the number of queued events which do not match the
> + * criteria specified in the arguments to check/wait.
> + */
[...]
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index 574dec7..a6dac79 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -395,3 +390,32 @@ libxl_sched_sedf = Struct("sched_sedf", [
>      ("extratime", integer),
>      ("weight", integer),
>      ], dispose_fn=None)
> +
> +libxl_event_type = Enumeration("event_type", [
> +    (1, "DOMAIN_SHUTDOWN"),
> +    (2, "DOMAIN_DESTROY"),
> +    (3, "DISK_EJECT"),
> +    ])
> +
> +libxl_ev_user = UInt(64)

The other option here would be Builtin(...) and an entry in the builtin
table in the wrapper generator. 

Arguably the idl could be improved by causing Number() to have a width
field. Currently it has a signedness and width is a property of UInt but
the latter could be pushed up the hierarchy.

You'd still end up with 
	FOO = Number("FOO", width=X)
which isn't really much better.

Or the ocaml generate could handle Number as the biggest int.

Hrm. None of that seems all that much better than what you have. Chalk
it up to potential future work.

> +libxl_ev_link = Builtin("ev_link", passby=PASS_BY_REFERENCE, private=True)
> +
> +libxl_event = Struct("event",[
> +    ("link",     libxl_ev_link,0),

This "0" == "const=False" which is the default. I don't think it is
necessary.

[...]
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index 8c30de1..e292bfc 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c

> @@ -1702,92 +1729,106 @@ start:
>      }
>      LOG("Waiting for domain %s (domid %d) to die [pid %ld]",
>          d_config.c_info.name, domid, (long)getpid());
[...]
> +    ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
> +    if (ret) goto out;
> 
[...]
> +    if (!diskws) {
> +        diskws = xmalloc(sizeof(*diskws) * d_config.num_disks);

I didn't see this getting freed on the exit path.

> +        for (i = 0; i < d_config.num_disks; i++)
> +            diskws[i] = NULL;
> +    }
> +    for (i = 0; i < d_config.num_disks; i++) {
> +        ret = libxl_evenable_disk_eject(ctx, domid, d_config.disks[i].vdev,
> +                                        0, &diskws[i]);
> +        if (ret) goto out;
> +    }

This is all (I think) safe for num_disks == 0 but why waste the effort?

Incidentally we have libxl_device_disk.removable which might be an
opportunity to optimise? Assuming it is meaningful in that way. I think
in reality only emulated CD-ROM devices ever generate this event but
perhaps exposing that in the API overcomplicates things.

[...]

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 5/9] libxl: Permit multithreaded event waiting
  2012-01-13 19:25 ` [PATCH 5/9] libxl: Permit multithreaded event waiting Ian Jackson
@ 2012-01-19 10:01   ` Ian Campbell
  2012-01-24 16:34     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-19 10:01 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Previously, the context would be locked whenever we were waiting in
> libxl's own call to poll (waiting for operating system events).
> 
> This would mean that multiple simultaneous calls to libxl_event_wait
> in different threads with different parameters would not work
> properly.
> 
> If we simply unlock the context, it would be possible for another
> thread to discover the occurrence of the event we were waiting for,
> without us even waking up, and we would remain in poll.  So we need a
> way to wake up other threads: a pipe, one for each thread in poll.
> 
> We also need to move some variables from globals in the ctx to be
> per-polling-thread.

I don't think this relates to this patch, just that the mention of
multithreaded waiting brought it to mind. What are the intended
semantics of two calls to libxl_event_wait with overlapping event masks?

Do we expect that the caller must have called the appropriate evenables
twice such that both waits get an event (possibly discriminate via the
predicate)?

Presumably we want to ensure that one of the waits doesn't sleep for
ever.

How does this interact with events generated via the hooks mechanism? Do
we always deliver to the explicit wait in preference?

> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/libxl.c          |   18 +++-
>  tools/libxl/libxl_event.c    |  196 ++++++++++++++++++++++++++++++++++--------
>  tools/libxl/libxl_internal.h |   50 ++++++++++-
>  3 files changed, 218 insertions(+), 46 deletions(-)
[...]
> diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
> index 621a7cc..82889f6 100644
> --- a/tools/libxl/libxl_event.c
> +++ b/tools/libxl/libxl_event.c

> @@ -542,30 +542,39 @@ static int beforepoll_internal(libxl__gc *gc, int *nfds_io,
>                  maxfd = efd->fd + 1;
>          }
>          /* make sure our array is as big as *nfds_io */
> -        if (CTX->fd_rindex_allocd < maxfd) {
> +        if (poller->fd_rindex_allocd < maxfd) {
>              assert(maxfd < INT_MAX / sizeof(int) / 2);
> -            int *newarray = realloc(CTX->fd_rindex, sizeof(int) * maxfd);
> +            int *newarray = realloc(poller->fd_rindex, sizeof(int) * maxfd);
>              if (!newarray) { rc = ERROR_NOMEM; goto out; }
> -            memset(newarray + CTX->fd_rindex_allocd, 0,
> -                   sizeof(int) * (maxfd - CTX->fd_rindex_allocd));
> -            CTX->fd_rindex = newarray;
> -            CTX->fd_rindex_allocd = maxfd;
> +            memset(newarray + poller->fd_rindex_allocd, 0,
> +                   sizeof(int) * (maxfd - poller->fd_rindex_allocd));
> +            poller->fd_rindex = newarray;
> +            poller->fd_rindex_allocd = maxfd;
>          }
>      }
> 
>      int used = 0;
[...]
> +
> +#define REQUIRE_FD(req_fd, req_events, efd) do{                 \
> +        if ((req_events)) {                                     \
> +            if (used < *nfds_io) {                              \
> +                fds[used].fd = (req_fd);                        \
> +                fds[used].events = (req_events);                \
> +                fds[used].revents = 0;                          \
> +                assert((req_fd) < poller->fd_rindex_allocd);    \
> +                poller->fd_rindex[(req_fd)] = used;             \
> +            }                                                   \
> +            used++;                                             \

Used is expected to be in the calling context? IOC -- this is defined
temporarily within a function, the diff context (which I've now trimmed)
confused me.

Does this actually add anything above doing
	LIBXL_LIST_FOREACH(...) {
		/* the body of require_fd */
	}
?

> +        }                                                       \
> +    }while(0)
> +
> +    LIBXL_LIST_FOREACH(efd, &CTX->efds, entry)
> +        REQUIRE_FD(efd->fd, efd->events, efd);
> +
> +    REQUIRE_FD(poller->wakeup_pipe[0], POLLIN, 0);
> +
> +#undef REQUIRE_FD
> +
>      rc = used <= *nfds_io ? 0 : ERROR_BUFFERFULL;
> 
>      *nfds_io = used;
[...]
> @@ -630,22 +640,31 @@ static int afterpoll_check_fd(libxl_ctx *ctx,
>      return revents;
>  }
> 
> -static void afterpoll_internal(libxl__egc *egc,
> +static void afterpoll_internal(libxl__egc *egc, libxl__poller *poller,
>                                 int nfds, const struct pollfd *fds,
>                                 struct timeval now)
>  {
>      EGC_GC;
>      libxl__ev_fd *efd;
> 
> +
>      LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
>          if (!efd->events)
>              continue;
> 
> -        int revents = afterpoll_check_fd(CTX,fds,nfds, efd->fd,efd->events);
> +        int revents = afterpoll_check_fd(poller,fds,nfds, efd->fd,efd->events);
>          if (revents)
>              efd->func(egc, efd, efd->fd, efd->events, revents);
>      }
> 
> +    if (afterpoll_check_fd(poller,fds,nfds, poller->wakeup_pipe[0],POLLIN)) {
> +        char buf[256];

Is it (theoretically) possible to have more than 256 events pending?

> +        int r = read(poller->wakeup_pipe[0], buf, sizeof(buf));
> +        if (r < 0)
> +            if (errno != EINTR && errno != EWOULDBLOCK)
> +                LIBXL__EVENT_DISASTER(egc, "read wakeup", errno, 0);
> +    }
> +
>      for (;;) {
>          libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
>          if (!etime)
[...]
> @@ -858,7 +880,94 @@ int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
>      return rc;
>  }
> 
> -static int eventloop_iteration(libxl__egc *egc) {
> +/*
> + * Manipulation of pollers
> + */
> +
> +int libxl__poller_init(libxl_ctx *ctx, libxl__poller *p)
> +{
> +    int r, rc;
> +    p->fd_polls = 0;
> +    p->fd_rindex = 0;
> +
> +    r = pipe(p->wakeup_pipe);
> +    if (r) {
> +        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "cannot create poller pipe");
> +        rc = ERROR_FAIL;
> +        goto out;
> +    }
> +
> +    rc = libxl_fd_set_nonblock(ctx, p->wakeup_pipe[0], 1);
> +    if (rc) goto out;
> +
> +    rc = libxl_fd_set_nonblock(ctx, p->wakeup_pipe[1], 1);
> +    if (rc) goto out;
> +
> +    return 0;
> +
> + out:
> +    libxl__poller_dispose(p);

The dispose function checks for fd > 0 before closing but if you take
the first goto out (pipe failed) then wake_pipe[{0,1}] are still
undefined?

> +    return rc;
> +}
> +
> +void libxl__poller_dispose(libxl__poller *p)
> +{
> +    if (p->wakeup_pipe[1] > 0) close(p->wakeup_pipe[1]);
> +    if (p->wakeup_pipe[0] > 0) close(p->wakeup_pipe[0]);

Strictly speaking 0 is a valid value for an open fd.

I once saw a bug (in gzip iirc) where, because stdin had inadvertently
been closed, a dup() of some sort returned 0 but the subsequent checks
were for >0 rather than >=. Slightly unusual case but it took an age
(for someone else...) to debug.

> +    free(p->fd_polls);
> +    free(p->fd_rindex);
> +}
> +
> +libxl__poller *libxl__poller_get(libxl_ctx *ctx)
> +{
> +    /* must be called with ctx locked */
> +    int rc;
> +
> +    libxl__poller *p = LIBXL_LIST_FIRST(&ctx->pollers_idle);
> +    if (p)
> +        return p;
> +
> +    p = malloc(sizeof(*p));
> +    if (!p) {
> +        LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "cannot allocate poller");
> +        return 0;
> +    }
> +    memset(p, 0, sizeof(*p));

Hrm, I guess this is where p->wakeup_pipe gets initialised. Likewise the
call to _init in ctx_alloc is preceded by a memset. Not entirely obvious
but I guess its ok. (initialising to -1 in _init would solve both this
and the 0-is-a-valid-fd)

> +
> +    rc = libxl__poller_init(ctx, p);
> +    if (rc) return NULL;
> +
> +    return p;
> +}
> +
> +void libxl__poller_put(libxl_ctx *ctx, libxl__poller *p)
> +{
> +    LIBXL_LIST_INSERT_HEAD(&ctx->pollers_idle, p, entry);
> +}
> +
> +void libxl__poller_wakeup(libxl__egc *egc, libxl__poller *p)
> +{
> +    static const char buf[1] = "";
> +
> +    for (;;) {
> +        int r = write(p->wakeup_pipe[1], buf, 1);
> +        if (r==1) return;
> +        assert(r==-1);

There's no possibility of r == 0 here?

> +        if (errno == EINTR) continue;
> +        if (errno == EWOULDBLOCK) return;

write(2) says that both EWOULDBLOCK and EAGAIN are valid returns for a
non-blocking fd and may have different values so apps should check for
both.

> +        LIBXL__EVENT_DISASTER(egc, "cannot poke watch pipe", errno, 0);
> +        return;
> +    }
> +}
> +
> +/*
> + * Main event loop iteration
> + */
> +
> +static int eventloop_iteration(libxl__egc *egc, libxl__poller *poller) {
> +    /* The CTX must be locked EXACTLY ONCE so that this function
> +     * can unlock it when it polls.
> +     */
>      EGC_GC;
>      int rc;
>      struct timeval now;
[...]
> @@ -900,7 +1013,8 @@ static int eventloop_iteration(libxl__egc *egc) {
>      rc = libxl__gettimeofday(gc, &now);
>      if (rc) goto out;
> 
> -    afterpoll_internal(egc, CTX->fd_polls_allocd, CTX->fd_polls, now);
> +    afterpoll_internal(egc, poller,
> +                       poller->fd_polls_allocd, poller->fd_polls, now);

Can this function be simplified to take just (egc, poller, now)?
Likewise beforepoll_internal?

> 
>      CTX_UNLOCK;
> 

> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index edb73eb..53d2462 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -205,6 +205,33 @@ struct libxl__evgen_disk_eject {
>  _hidden void
>  libxl__evdisable_disk_eject(libxl__gc*, libxl_evgen_disk_eject*);
> 
> +typedef struct libxl__poller libxl__poller;
> +struct libxl__poller {
> +    /*
> +     * These are used to allow other threads to wake up a thread which
> +     * may be stuck in poll, because whatever it was waiting for
> +     * hadn't happened yet.  Threads which generate events will write
> +     * a byte to each pipe.  A thread which is waiting will empty its
> +     * own pipe, and put its poller on the pollers_event list, before
> +     * releasing the ctx lock and going into poll; when it comes out
> +     * of poll it will take the poller off the pollers_event list.
> +     *
> +     * When a thread is done with a poller it should put it onto
> +     * pollers_idle, where it can be reused later.
> +     *
> +     * The "poller_app" is never idle, but is sometimes on
> +     * pollers_event.
> +     */
> +    LIBXL_LIST_ENTRY(libxl__poller) entry;
> +
> +    struct pollfd *fd_polls;
> +    int fd_polls_allocd;
> +
> +    int fd_rindex_allocd;
> +    int *fd_rindex; /* see libxl_osevent_beforepoll */
> +
> +    int wakeup_pipe[2]; /* 0 means no fd allocated */

Or does it ;-)
> +};
> 
>  struct libxl__ctx {
>      xentoollog_logger *lg;
> @@ -235,10 +262,9 @@ struct libxl__ctx {
>        /* See the comment for OSEVENT_HOOK_INTERN in libxl_event.c
>         * for restrictions on the use of the osevent fields. */
> 
> -    struct pollfd *fd_polls;
> -    int fd_polls_allocd;
> -    int fd_rindex_allocd;
> -    int *fd_rindex; /* see libxl_osevent_beforepoll */
> +    libxl__poller poller_app; /* libxl_osevent_beforepoll and _afterpoll */

This presumably means that an app can only use before/afterpoll from one
thread at a time. Hardly an onerous requirement but worth noting
perhaps?

Could also check that pooler_app.entry is not currently on a list?

> +    LIBXL_LIST_HEAD(, libxl__poller) pollers_event, pollers_idle;
> +
>      LIBXL_LIST_HEAD(, libxl__ev_fd) efds;
>      LIBXL_TAILQ_HEAD(, libxl__ev_time) etimes;
> 
> @@ -524,6 +550,22 @@ _hidden void libxl__event_disaster(libxl__egc*, const char *msg, int errnoval,
>      libxl__event_disaster(egc, msg, errnoval, type, __FILE__,__LINE__,__func__)
> 
> 
> +/* Fills in, or disposes of, the resources held by, a poller whose

That third comma read weirdly to me.

> + * space the caller has allocated.  ctx must be locked. */

init doesn't appear to do anything which needs a lock?

> +int libxl__poller_init(libxl_ctx *ctx, libxl__poller *p);
> +void libxl__poller_dispose(libxl__poller *p);
> +
> +/* Obtain a fresh poller from malloc or the idle list, and put it
> + * away again afterwards.  _get can fail, returning NULL.
> + * ctx must be locked. */
> +libxl__poller *libxl__poller_get(libxl_ctx *ctx);
> +void libxl__poller_put(libxl_ctx *ctx, libxl__poller *p);
> +
> +/* Notifies whoever is polling using p that they should wake up.
> + * ctx must be locked. */
> +void libxl__poller_wakeup(libxl__egc *egc, libxl__poller *p);
> +

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-13 19:25 ` [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure Ian Jackson
@ 2012-01-19 10:44   ` Ian Campbell
  2012-01-24 17:27     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-19 10:44 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Provide a new set of machinery for writing public libxl functions
> which may take a long time.  The application gets to decide whether
> they want the function to be synchronous, or whether they'd prefer to
> get a callback, or an event, when the operation is complete.
> 
> User(s) of this machinery will be introduced in later patch(es).

You've done device removal, do you have a list of other things which
should use this? (perhaps with an associated list of people's names...)

> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/libxl.h          |   50 ++++++++++++
>  tools/libxl/libxl_event.c    |  183 ++++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_internal.h |  112 ++++++++++++++++++++++++++
>  tools/libxl/libxl_types.idl  |    4 +
>  4 files changed, 349 insertions(+), 0 deletions(-)
> 
> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index e32881b..416d6e8 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h
> @@ -235,8 +235,58 @@ enum {
>      ERROR_NOT_READY = -11,
>      ERROR_OSEVENT_REG_FAIL = -12,
>      ERROR_BUFFERFULL = -13,
> +    ERROR_ASYNC_INPROGRESS = -14,
>  };
> 
> +
> +/*
> + * Some libxl operations can take a long time.  These functions take a
> + * parameter to control their concurrency:
> + *     libxl_asyncop_how *ao_how
> + *
> + * If ao_how==NULL, the function will be synchronous.
> + *
> + * If ao_how!=NULL, the function will set the operation going, and
> + * if this is successful will return ERROR_ASYNCH_INPROGRESS.

There's an extra H here compared with the actual symbol name (I think
the symbol is right).

Is there a possibility that libxl might decide that the operation isn't
actually going to take all the long and do things synchronously,
returning normal success (e.g. 0)? Is that the reason for the separate
return code for this "I did what you asked me" case?

Can we drop the ERROR_ prefix? I know that's inconsistent with the other
return codes but those actually are errors.

> + *
> + * If ao_how->callback!=NULL, the callback will be called when the
> + * operation completes.  The same rules as for libxl_event_hooks
> + * apply, including the reentrancy rules and the possibility of

           ^ (see above/below) -- depending on how these comments end up
relative to each other.

> + * "disaster", except that libxl calls ao_how->callback instead of
> + * libxl_event_hooks.event_occurs.
> + *
> + * If ao_how->callback==NULL, a libxl_event will be generated which
> + * can be obtained from libxl_event_wait or libxl_event_check.

Or be delivered via event_occurs?

>   The
> + * event will have type OPERATION_COMPLETE (which is not used
> + * elsewhere).
> + *
> + * Note that it is possible for an asynchronous operation which is to
> + * result in a callback to complete during its initiating function
> + * call.  In this case the initating function will return

                              initiating

> + * ERROR_ASYNCH_INPROGRESS, even though by the time it returns the

Another stray H.

> + * operation is complete and the callback has already happened.
> + *
> + * The application must set and use ao_how->for_event (which will be
> + * copied into libxl_event.for_user) or ao_how->for_callback (passed
> + * to the callback) to determine which operation finished, and it must
> + * of course check the rc value for errors.
> + *
> + * *ao_how does not need to remain valid after the initiating function
> + * returns.
> + *
> + * Callbacks may occur on any thread in which the application calls
> + * libxl.
> + */
> +
> +typedef struct {
> +    void (*callback)(libxl_ctx *ctx, int rc, void *for_callback);
> +    union {
> +        libxl_ev_user for_event; /* used if callback==NULL */
> +        void *for_callback; /* passed to callback */

Why void * for one bit of "closure" but an explicit uint64_t for the
other. I nearly commented on the use of uint64_t previously -- void *,
or perhaps (u)intptr_t is more normal.

> +    } u;
> +} libxl_asyncop_how;
> +
> +
>  #define LIBXL_VERSION 0
> 
>  typedef struct {
> diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
> index 82889f6..b99049a 100644
> --- a/tools/libxl/libxl_event.c
> +++ b/tools/libxl/libxl_event.c
> @@ -771,10 +771,21 @@ static void egc_run_callbacks(libxl__egc *egc)
>  {
>      EGC_GC;
>      libxl_event *ev, *ev_tmp;
> +
>      LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) {
>          LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link);
>          CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev);
>      }
> +
> +    libxl__ao *ao, *ao_tmp;
> +    LIBXL_TAILQ_FOREACH_SAFE(ao, &egc->aos_for_callback,
> +                             entry_for_callback, ao_tmp) {
> +        LIBXL_TAILQ_REMOVE(&egc->aos_for_callback, ao, entry_for_callback);
> +        ao->how.callback(CTX, ao->rc, ao->how.u.for_callback);
> +        ao->notified = 1;
> +        if (!ao->in_initiator)
> +            libxl__ao__destroy(CTX, ao);
> +    }
>  }
> 
>  void libxl__egc_cleanup(libxl__egc *egc)
> @@ -1061,6 +1072,178 @@ int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
>      return rc;
>  }
> 
> +
> +
> +/*
> + * The two possible state flow of an ao:
> + *
> + * Completion before initiator return:
> + *
> + *     Initiator thread                       Possible other threads
> + *
> + *   * ao_create allocates memory and
> + *     initialises the struct
> + *
> + *   * the initiator function does its
> + *     work, setting up various internal
> + *     asynchronous operations -----------> * asynchronous operations
> + *                                            start to take place and
> + *                                            might cause ao completion
> + *                                                |
> + *   * initiator calls ao_complete:               |
> + *     - if synchronous, run event loop           |
> + *       until the ao completes                   |
> + *                              - ao completes on some thread
> + *                              - completing thread releases the lock
> + *                     <--------------'
> + *     - ao_complete takes the lock
> + *     - destroy the ao
> + *
> + *
> + * Completion after initiator return (asynch. only):
> + *
> + *
> + *     Initiator thread                       Possible other threads
> + *
> + *   * ao_create allocates memory and
> + *     initialises the struct
> + *
> + *   * the initiator function does its
> + *     work, setting up various internal
> + *     asynchronous operations -----------> * asynchronous operations
> + *                                            start to take place and
> + *                                            might cause ao completion
> + *                                                |
> + *   * initiator calls ao_complete:               |
> + *     - observes event not net done,             |
> + *     - returns to caller                        |
> + *                                                |
> + *                              - ao completes on some thread
> + *                              - generate the event or call the callback
> + *                              - destroy the ao

Where does ao_inprogress fit into these diagrams?

> + */
> +
> +void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) {

CODING_STYLE wants these braces on the next line (a bunch more follow)

> +    if (!ao) return;
> +    if (ao->poller) libxl__poller_put(ctx, ao->poller);
> +    ao->magic = LIBXL__AO_MAGIC_DESTROYED;
> +    libxl__free_all(&ao->gc);
> +    free(ao);
> +}
> +
> +void libxl__ao_abort(libxl__ao *ao) {
> +    AO_GC;
> +    assert(ao->magic == LIBXL__AO_MAGIC);
> +    assert(ao->in_initiator);
> +    assert(!ao->complete);
> +    libxl__ao__destroy(CTX, ao);
> +}
> +
> +void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) {
> +    assert(ao->magic == LIBXL__AO_MAGIC);
> +    assert(!ao->complete);
> +    ao->complete = 1;
> +    ao->rc = rc;
> +
> +    if (ao->poller) {
> +        assert(ao->in_initiator);
> +        libxl__poller_wakeup(egc, ao->poller);
> +    } else if (ao->how.callback) {
> +        LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback);
> +    } else {
> +        libxl_event *ev;
> +        ev = NEW_EVENT(egc, OPERATION_COMPLETE, ao->domid);
> +        if (ev) {
> +            ev->for_user = ao->how.u.for_event;
> +            ev->u.operation_complete.rc = ao->rc;
> +            libxl__event_occurred(egc, ev);
> +        }
> +        ao->notified = 1;
> +    }
> +    if (!ao->in_initiator && ao->notified)
> +        libxl__ao__destroy(libxl__gc_owner(&egc->gc), ao);

You added a helper for this libxl__gc_owner(&egc..) construct.

> +}
> +
> +libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid,
> +                            const libxl_asyncop_how *how) {
> +    libxl__ao *ao;
> +
> +    ao = calloc(sizeof(*ao),1);

calloc is actually (nmemb, size). I'm sure it doesn't really matter
though.


> +    if (!ao) goto out;
> +
> +    ao->magic = LIBXL__AO_MAGIC;
> +    ao->in_initiator = 1;
> +    ao->poller = 0;
> +    ao->domid = domid;
> +    LIBXL_INIT_GC(ao->gc, ctx);
> +
> +    if (how) {
> +        ao->how = *how;
> +    } else {
> +        ao->poller = libxl__poller_get(ctx);
> +        if (!ao->poller) goto out;
> +    }
> +    return ao;
> +
> + out:
> +    if (ao) libxl__ao__destroy(ctx, ao);
> +    return NULL;
> +}
> +
> +int libxl__ao_inprogress(libxl__ao *ao) {
> +    AO_GC;
> +    int rc;
> +
> +    assert(ao->magic == LIBXL__AO_MAGIC);
> +    assert(ao->in_initiator);
> +
> +    if (ao->poller) {
> +        /* Caller wants it done synchronously. */
> +        /* We use a fresh gc, so that we can free things
> +         * each time round the loop. */
> +        libxl__egc egc;
> +        LIBXL_INIT_EGC(egc,CTX);
> +
> +        for (;;) {
> +            assert(ao->magic == LIBXL__AO_MAGIC);
> +
> +            if (ao->complete) {
> +                rc = ao->rc;
> +                ao->notified = 1;
> +                break;
> +            }
> +
> +            rc = eventloop_iteration(&egc,ao->poller);
> +            if (rc) {
> +                /* Oh dear, this is quite unfortunate. */
> +                LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "Error waiting for"
> +                           " event during long-running operation (rc=%d)", rc);
> +                sleep(1);
> +                /* It's either this or return ERROR_I_DONT_KNOW_WHETHER
> +                 * _THE_THING_YOU_ASKED_FOR_WILL_BE_DONE_LATER_WHEN
> +                 * _YOU_DIDNT_EXPECT_IT, since we don't have any kind of
> +                 * cancellation ability. */

Does this constitute a "disaster" (in the special hook sense)?

> +            }
> +
> +            CTX_UNLOCK;
> +            libxl__egc_cleanup(&egc);
> +            CTX_LOCK;
> +        }
> +    } else {
> +        rc = ERROR_ASYNC_INPROGRESS;
> +    }
> +
> +    ao->in_initiator = 0;
> +
> +    if (ao->notified) {
> +        assert(ao->complete);
> +        libxl__ao__destroy(CTX,ao);
> +    }
> +
> +    return rc;
> +}
> +
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index 53d2462..594b9fb 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
[...]
> @@ -1123,6 +1144,97 @@ _hidden void libxl__egc_cleanup(libxl__egc *egc);
> 
> 
>  /*
> + * Machinery for asynchronous operations ("ao")
> + *
> + * All "slow" functions (includes anything that might block on a
> + * guest or an external script) need to use the asynchronous
> + * operation ("ao") machinery.  The function should take a parameter
> + * const libxl_asyncop_how *ao_how and must start with a call to
> + * AO_INITIATOR_ENTRY.  These functions MAY NOT be called from
> + * outside libxl, because they can cause reentrancy callbacks.
> + *
> + * No functions called internally within libxl should ever return
> + * ERROR_ASYNCH_INPROGRESS.

Aitch.

> + *
> + * Lifecycle of an ao:
> + *
> + * - Created by libxl__ao_create (or the AO_CREATE convenience macro).
> + *
> + * - After creation, can be used by code which implements
> + *   the operation as follows:
> + *      - the ao's gc, for allocating memory for the lifetime
> + *        of the operation (possibly with the help of the AO_GC
> + *        macro to introduce the gc into scope)
> + *      - the ao itself may be passed about to sub-functions
> + *        so that they can stash it away etc.
> + *      - in particular, the ao pointer must be stashed in some
> + *        per-operation structure which is also passed as a user
> + *        pointer to the internal event generation request routines
> + *        libxl__evgen_FOO, so that at some point a CALLBACK will be
> + *        made when the operation is complete.
> + *
> + * - If initiation is successful, the initiating function needs
> + *   to run libxl__ao_inprogress right before unlocking and
> + *   returning, and return whatever it returns (AO_INPROGRESS macro).
> + *
> + * - If the initiation is unsuccessful, the initiating function must
> + *   call libxl__ao_abort before unlocking and returning whatever
> + *   error code is appropriate (AO_ABORT macro).
> + *
> + * - Later, some callback function, whose callback has been requested
> + *   directly or indirectly, should call libxl__ao_complete (with the
> + *   ctx locked, as it will generally already be in any event callback
> + *   function).  This must happen exactly once for each ao (and not if
> + *   the ao has been destroyed, obviously), and it may not happen
> + *   until libxl__ao_inprogress has been called on the ao.
> + *
> + * - Note that during callback functions, two gcs are available:
> + *    - The one in egc, whose lifetime is only this callback
> + *    - The one in ao, whose lifetime is the asynchronous operation
> + *   Usually callback function should use GET_CONTAINING_STRUCT

Now called CONTAINER_OF

> + *   to obtain its own structure, containing a pointer to the ao,
> + *   and then use the gc from that ao.
> + */
> +
> +#define AO_CREATE(ctx, domid, ao_how)                           \
> +    libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how);       \
> +    if (!ao) return ERROR_NOMEM;                                \
> +    AO_GC;                                                      \
> +    CTX_LOCK;

Where does the unlock which balances this come from? The only unlock I
see in this patch is the temporary drop in libxl__ao_inprogress which is
matched by another lock.

> +
> +#define AO_INPROGRESS do{                                       \
> +        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
> +        int ao__rc = libxl__ao_inprogress(ao);                  \
> +        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \

Is this supposed to be unlock answering my question above? Likewise in
ABORT?

> +        return ao__rc;                                          \
> +   }while(0)

Can we arrange for AO_INPROGRESS and AO_ABORT to return the rc? So it
would become
	return AO_INPROGRESS;

Is the ({stuff,stuff,stuff,val}) syntax a gcc-ism?

> +
> +
> +#define AO_ABORT(rc) do{                                        \
> +        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
> +        assert(rc);                                             \
> +        assert(rc != ERROR_ASYNC_INPROGRESS);                   \
> +        libxl__ao_abort(ao);                                    \
> +        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \
> +        return (rc);                                            \
> +    }while(0)
> +
> +#define AO_GC                                   \
> +    libxl__gc *const gc = &ao->gc
> +
> +
> +/* All of these MUST be called with the ctx locked.

Except libxl__ao_create? at least according to the implementation of
AO_CREATE which takes the lock after.

> + * libxl__ao_inprogress MUST be called with the ctx locked exactly once. */
> +_hidden libxl__ao *libxl__ao_create(libxl_ctx*, uint32_t domid,
> +                                    const libxl_asyncop_how*);
> +_hidden int libxl__ao_inprogress(libxl__ao *ao);
> +_hidden void libxl__ao_abort(libxl__ao *ao);
> +_hidden void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc);
> +
> +/* For use by ao machinery ONLY */
> +_hidden void libxl__ao__destroy(libxl_ctx*, libxl__ao *ao);
> +
> +/*
>   * Convenience macros.
>   */
> 
> diff --git a/tools/libxl/libxl_types.idl b/tools/libxl/libxl_types.idl
> index a6dac79..325bb21 100644
> --- a/tools/libxl/libxl_types.idl
> +++ b/tools/libxl/libxl_types.idl
> @@ -395,6 +395,7 @@ libxl_event_type = Enumeration("event_type", [
>      (1, "DOMAIN_SHUTDOWN"),
>      (2, "DOMAIN_DESTROY"),
>      (3, "DISK_EJECT"),
> +    (4, "OPERATION_COMPLETE"),
>      ])
> 
>  libxl_ev_user = UInt(64)
> @@ -418,4 +419,7 @@ libxl_event = Struct("event",[
>                                          ("vdev", string),
>                                          ("disk", libxl_device_disk),
>                                   ])),
> +           ("operation_complete", Struct(None, [
> +                                        ("rc", integer),
> +                                 ])),
>             ]))])
> --
> 1.7.2.5
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 8/9] libxl: Introduce libxl__ev_devstate
  2012-01-13 19:25 ` [PATCH 8/9] libxl: Introduce libxl__ev_devstate Ian Jackson
@ 2012-01-19 10:54   ` Ian Campbell
  2012-01-24 17:33     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-19 10:54 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Provide a new-style asynchronous facility for waiting for device
> states on xenbus.  This will replace libxl__wait_for_device_state,
> after the callers have been updated in later patches.

Is event-with-timeout likely to be a useful/common enough pattern to be
worth baking into the infrastructure/helpers rather than implementing
just for this one event type? (if yes then, "I will refactor for the
second user is a valid response").

> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/libxl_event.c    |   75 ++++++++++++++++++++++++++++++++++++++++++
>  tools/libxl/libxl_internal.h |   41 +++++++++++++++++++++++
>  2 files changed, 116 insertions(+), 0 deletions(-)
> 
> diff --git a/tools/libxl/libxl_event.c b/tools/libxl/libxl_event.c
> index b99049a..1d271b8 100644
> --- a/tools/libxl/libxl_event.c
> +++ b/tools/libxl/libxl_event.c
> @@ -507,6 +507,81 @@ void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
[...]
> +    libxl__ev_devstate_cancel(gc, ds);
> +    ds->callback(egc, ds, rc);
> +}
> +
> +static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
> +                             const struct timeval *requested_abs)
> +{
> +    EGC_GC;
> +    libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
> +    LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
> +               " timed out", ds->watch.path, ds->wanted);
> +    libxl__ev_devstate_cancel(gc, ds);

What prevents racing here with the watch happening? Might the caller see
two callbacks?

> +    ds->callback(egc, ds, ERROR_TIMEDOUT);
> +}
> +

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 9/9] libxl: Convert to asynchronous: device removal
  2012-01-13 19:25 ` [PATCH 9/9] libxl: Convert to asynchronous: device removal Ian Jackson
@ 2012-01-19 11:55   ` Ian Campbell
  2012-01-24 17:39     ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-19 11:55 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> Convert libxl_FOO_device_remove, and the function which does the bulk
> of the work, libxl__device_remove, to the new async ops scheme.
> 
> Adjust all callers.
> 
> Also remove libxl__wait_for_device_state which is now obsolete.
> 
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> ---
>  tools/libxl/libxl.c          |   60 +++++++++++++--------
>  tools/libxl/libxl.h          |   16 ++++--
>  tools/libxl/libxl_device.c   |  118 +++++++++++++-----------------------------
>  tools/libxl/libxl_internal.h |   30 ++---------
>  tools/libxl/xl_cmdimpl.c     |    4 +-
>  5 files changed, 93 insertions(+), 135 deletions(-)
> 
> diff --git a/tools/libxl/libxl.c b/tools/libxl/libxl.c
> index 9890d79..d63da97 100644
> --- a/tools/libxl/libxl.c
> +++ b/tools/libxl/libxl.c
> @@ -1310,19 +1310,23 @@ out:
>  }
> 
>  int libxl_device_disk_remove(libxl_ctx *ctx, uint32_t domid,
> -                             libxl_device_disk *disk)
> +                             libxl_device_disk *disk,
> +                             const libxl_asyncop_how *ao_how)
>  {
> -    GC_INIT(ctx);
> +    AO_CREATE(ctx, domid, ao_how);
>      libxl__device device;
>      int rc;
> 
>      rc = libxl__device_from_disk(gc, domid, disk, &device);
>      if (rc != 0) goto out;
> 
> -    rc = libxl__device_remove(gc, &device, 1);
> +    rc = libxl__initiate_device_remove(ao, &device);
> +    if (rc) goto out;
> +
> +    AO_INPROGRESS;
> +
>  out:
> -    GC_FREE;
> -    return rc;
> +    AO_ABORT(rc);
>  }

After all the internal complexity the actual usage is refreshingly
simple. Phew!

> 
>  int libxl_device_disk_destroy(libxl_ctx *ctx, uint32_t domid,
> @@ -1536,11 +1540,11 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk)
> 
>      ret = 0;
> 
> -    libxl_device_disk_remove(ctx, domid, disks + i);
> +    libxl_device_disk_remove(ctx, domid, disks + i, 0);
>      libxl_device_disk_add(ctx, domid, disk);
>      stubdomid = libxl_get_stubdom_id(ctx, domid);
>      if (stubdomid) {
> -        libxl_device_disk_remove(ctx, stubdomid, disks + i);
> +        libxl_device_disk_remove(ctx, stubdomid, disks + i, 0);
>          libxl_device_disk_add(ctx, stubdomid, disk);
>      }
>  out:

The async capability here ought to be propagated to the
libxl_cdrom_insert interface too. I guess that would mean handling
compound asynchronous operations.

...in the fullness of time, of course.


> diff --git a/tools/libxl/libxl.h b/tools/libxl/libxl.h
> index 416d6e8..602bd01 100644
> --- a/tools/libxl/libxl.h
> +++ b/tools/libxl/libxl.h

> diff --git a/tools/libxl/libxl_device.c b/tools/libxl/libxl_device.c
> index 5d05e90..e905133 100644
> --- a/tools/libxl/libxl_device.c
> +++ b/tools/libxl/libxl_device.c
> @@ -357,85 +357,41 @@ int libxl__device_disk_dev_number(const char *virtpath, int *pdisk,
>      return -1;
>  }
> 
[...]
> +typedef struct {
> +    libxl__ao *ao;
> +    libxl__ev_devstate ds;
> +} libxl__ao_device_remove;
> +
> +static void device_remove_cleanup(libxl__gc *gc,
> +                                  libxl__ao_device_remove *aorm) {
> +    if (!aorm) return;
> +    libxl__ev_devstate_cancel(gc, &aorm->ds);
> +}
> 
[...]
> +static void device_remove_callback(libxl__egc *egc, libxl__ev_devstate *ds,
> +                                   int rc) {
> +    libxl__ao_device_remove *aorm = CONTAINER_OF(ds, *aorm, ds);
> +    libxl__gc *gc = &aorm->ao->gc;
> +    libxl__ao_complete(egc, aorm->ao, rc);
> +    device_remove_cleanup(gc, aorm);
>  }
> 
[...]
> +int libxl__initiate_device_remove(libxl__ao *ao, libxl__device *dev)
>  {
> +    /* Arranges that dev will be removed from its guest.  When
> +     * this is done, the ao will be completed.  An error
> +     * return from libxl__device_remove means that the ao
> +     * will _not_ be completed and the caller must do so.

Do you mean aborted or cancelled rather than completed here?

> +     */
> +    AO_GC;
>      libxl_ctx *ctx = libxl__gc_owner(gc);
>      xs_transaction_t t;
>      char *be_path = libxl__device_backend_path(gc, dev);
>      char *state_path = libxl__sprintf(gc, "%s/state", be_path);
>      char *state = libxl__xs_read(gc, XBT_NULL, state_path);
>      int rc = 0;
> +    libxl__ao_device_remove *aorm = 0;
> 
>      if (!state)
>          goto out;
> @@ -458,23 +414,21 @@ retry_transaction:
>          }
>      }
> 
[...]
>      libxl__device_destroy_tapdisk(gc, be_path);
> 
[...]
> +    aorm = libxl__zalloc(gc, sizeof(*aorm));
> +    aorm->ao = ao;
> +    libxl__ev_devstate_init(&aorm->ds);
> 
[...]
> +    rc = libxl__ev_devstate_wait(gc, &aorm->ds, device_remove_callback,
> +                                 state_path, XenbusStateClosed,
> +                                 LIBXL_DESTROY_TIMEOUT * 1000);
> +    if (rc) goto out;
> +
> +    return 0;
> +
> + out:
> +    device_remove_cleanup(gc, aorm);
>      return rc;
>  }
> 
> diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> index b7f0f54..9920fb9 100644
> --- a/tools/libxl/libxl_internal.h
> +++ b/tools/libxl/libxl_internal.h
> @@ -653,35 +653,15 @@ _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device);
[...]
> +/* Arranges that dev will be removed from its guest.  When
> + * this is done, the ao will be completed.  An error
> + * return from libxl__device_remove means that the ao
> + * will _not_ be completed and the caller must do so. */

This is the same comment as at the head of the implementation so the
same comment re aborting applies. Do we need both comments?

> +_hidden int libxl__initiate_device_remove(libxl__ao*, libxl__device *dev);
> 
>  /*
>   * libxl__ev_devstate - waits a given time for a device to
> diff --git a/tools/libxl/xl_cmdimpl.c b/tools/libxl/xl_cmdimpl.c
> index c2b7a1e..659a9e6 100644
> --- a/tools/libxl/xl_cmdimpl.c
> +++ b/tools/libxl/xl_cmdimpl.c
> @@ -4624,7 +4624,7 @@ int main_networkdetach(int argc, char **argv)
>              return 1;
>          }
>      }
> -    if (libxl_device_nic_remove(ctx, domid, &nic)) {
> +    if (libxl_device_nic_remove(ctx, domid, &nic, 0)) {
>          fprintf(stderr, "libxl_device_nic_del failed.\n");
>          return 1;
>      }

There aren't actually any examples of a caller using the ao-ness in xl
are there?

I know that sync is for the most part ao+wait but I'm a bit concerned
that e.g. several of the paths in libxl__ao_complete probably haven't
been run and one of the flow-charts added to tools/libxl/libxl_event.c
in patch 6/8 has probably never happened either.

IMHO this isn't a blocker for this patch but since xl is, in addition to
being a toolstack, a testbed for libxl perhaps one or more "gratuitously
asynchronous" calls could be made? Perhaps the libxl_cdrom_insert case
would be an interesting one? In particular the case in the
create_domain() event loop (e.g. so that the response to a cdrom eject
does not block shutdown processing).

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/9] libxl: New event generation API
  2012-01-18 17:33   ` Ian Campbell
@ 2012-01-24 16:23     ` Ian Jackson
  2012-01-24 16:38       ` Ian Campbell
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 16:23 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, Ian Jackson

Ian Campbell writes ("Re: [Xen-devel] [PATCH 3/9] libxl: New event generation API"):
> On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > +    free_disable_deaths(gc, &CTX->death_list);
> > +    free_disable_deaths(gc, &CTX->death_reported);
> > +
> > +    libxl_evgen_disk_eject *eject;
> > +    while ((eject = LIBXL_LIST_FIRST(&CTX->disk_eject_evgens)))
> > +        libxl__evdisable_disk_eject(gc, eject);
> 
> Why a helper for deaths but not ejects?

Because the deaths function needs to be called twice, once for each of
two lists.


> > +typedef struct libxl_event_hooks {
> > +    uint64_t event_occurs_mask;
> 
> libxl_event_{wait,check} and LIBXL_EVENTMASK_ALL have an unsigned long
> mask. Are they not the same set of bits?

This is a mistake.  I'll change it to 64 everywhere.


> > + * The user value is returned in the generated events and may be
> > + * used by the caller for whatever it likes.  The type ev_user is
> > + * guaranteed to be an unsigned integer type which is at least
> > + * as big as uint64_t and is also guaranteed to be big enough to
> > + * contain any intptr_t value.
> 
> Does anything actually guarantee that sizeof(uint64_t) >
> sizeof(intptr_t)? I'm sure it's true in practice and I'm happy to rely
> on it. Just interested.

No, nothing does.  If we port this code to a platform where pointers
are more than 64 bits, we will need to change the type of this field
(to make it be an intptr_t or some other type).

> > +libxl_ev_user = UInt(64)
> 
> The other option here would be Builtin(...) and an entry in the builtin
> table in the wrapper generator. 

As I say, that prevents the ocaml idl generator from knowing that the
type is 64 bits and so prevents it from using the right ocaml type.

> Arguably the idl could be improved by causing Number() to have a width
> field. Currently it has a signedness and width is a property of UInt but
> the latter could be pushed up the hierarchy.
> 
> You'd still end up with 
> 	FOO = Number("FOO", width=X)
> which isn't really much better.

Indeed.

> Or the ocaml generate could handle Number as the biggest int.

That's a bit wasteful.

> Hrm. None of that seems all that much better than what you have. Chalk
> it up to potential future work.

Right.


> > +libxl_ev_link = Builtin("ev_link", passby=PASS_BY_REFERENCE, private=True)
> > +
> > +libxl_event = Struct("event",[
> > +    ("link",     libxl_ev_link,0),
> 
> This "0" == "const=False" which is the default. I don't think it is
> necessary.

This is a leftover from when there was an in-idl comment parameter.  I
will remove it.

> [...]
> > +    ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
> > +    if (ret) goto out;
> > 
> [...]
> > +    if (!diskws) {
> > +        diskws = xmalloc(sizeof(*diskws) * d_config.num_disks);
> 
> I didn't see this getting freed on the exit path.

This is deliberate.  Why bother freeing things when the process is
about to exit.

> > +        for (i = 0; i < d_config.num_disks; i++)
> > +            diskws[i] = NULL;
> > +    }
> > +    for (i = 0; i < d_config.num_disks; i++) {
> > +        ret = libxl_evenable_disk_eject(ctx, domid, d_config.disks[i].vdev,
> > +                                        0, &diskws[i]);
> > +        if (ret) goto out;
> > +    }
> 
> This is all (I think) safe for num_disks == 0 but why waste the effort?

I'm not sure I follow.  Do you think I should put an if() round it,
testing whether d_config.num_disks is nonzero ?

In which case by "effort" do you mean computer effort ?  Surely you
can't mean that because this performance detail of this code is
entirely irrelevant.  But you can't mean human effort in the code
because adding the test would be additional code to write, read,
understand and maintain ...

> Incidentally we have libxl_device_disk.removable which might be an
> opportunity to optimise? Assuming it is meaningful in that way. I think
> in reality only emulated CD-ROM devices ever generate this event but
> perhaps exposing that in the API overcomplicates things.

Optimise to save on pointless xenstore watches you mean ?

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 5/9] libxl: Permit multithreaded event waiting
  2012-01-19 10:01   ` Ian Campbell
@ 2012-01-24 16:34     ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 16:34 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 5/9] libxl: Permit multithreaded event waiting"):
> On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > We also need to move some variables from globals in the ctx to be
> > per-polling-thread.
> 
> I don't think this relates to this patch, just that the mention of
> multithreaded waiting brought it to mind. What are the intended
> semantics of two calls to libxl_event_wait with overlapping event masks?

You get each event exactly once, via one of the (possibly several)
suitable libxl_event_wait calls.

> Do we expect that the caller must have called the appropriate evenables
> twice such that both waits get an event (possibly discriminate via the
> predicate)?

No.  Well, I guess the caller could do that by divvying up the ev_user
space between the two calls, but it would be a very perverse thing to
do.

> Presumably we want to ensure that one of the waits doesn't sleep for
> ever.

Yes, that's what the wakeup pipe is for.

> How does this interact with events generated via the hooks mechanism? Do
> we always deliver to the explicit wait in preference?

No.  As the doc comment for libxl_event_register_callbacks says:

   * Arranges that libxl will henceforth call event_occurs for any
   * events whose type is set in event_occurs_mask, rather than
   * queueing the event for retrieval by libxl_event_check/wait.
   * Events whose bit is clear in mask are not affected.

So if you ask for callbacks you don't get the events via wait.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/9] libxl: New event generation API
  2012-01-24 16:23     ` Ian Jackson
@ 2012-01-24 16:38       ` Ian Campbell
  2012-01-24 18:43         ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-24 16:38 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Tue, 2012-01-24 at 16:23 +0000, Ian Jackson wrote:
> Ian Campbell writes ("Re: [Xen-devel] [PATCH 3/9] libxl: New event generation API"):
> > On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:

> > > +libxl_ev_user = UInt(64)
> > 
> > The other option here would be Builtin(...) and an entry in the builtin
> > table in the wrapper generator. 
> 
> As I say, that prevents the ocaml idl generator from knowing that the
> type is 64 bits and so prevents it from using the right ocaml type.

The ocaml type of a "builtin" is supplied by the builtin table in the
generator. However:

[...]
> > Hrm. None of that seems all that much better than what you have. Chalk
> > it up to potential future work.
> 
> Right.

ACK.

> > [...]
> > > +    ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
> > > +    if (ret) goto out;
> > > 
> > [...]
> > > +    if (!diskws) {
> > > +        diskws = xmalloc(sizeof(*diskws) * d_config.num_disks);
> > 
> > I didn't see this getting freed on the exit path.
> 
> This is deliberate.  Why bother freeing things when the process is
> about to exit.

Usually I agree.

However xl is intended as a libxl testbed as well as a toolstack in its
own right and it is useful to be able to run tools such as valgrind on
it to detect leaks in the library, but this requires not having too many
"false positives" in xl.

> > > +        for (i = 0; i < d_config.num_disks; i++)
> > > +            diskws[i] = NULL;
> > > +    }
> > > +    for (i = 0; i < d_config.num_disks; i++) {
> > > +        ret = libxl_evenable_disk_eject(ctx, domid, d_config.disks[i].vdev,
> > > +                                        0, &diskws[i]);
> > > +        if (ret) goto out;
> > > +    }
> > 
> > This is all (I think) safe for num_disks == 0 but why waste the effort?
> 
> I'm not sure I follow.  Do you think I should put an if() round it,
> testing whether d_config.num_disks is nonzero ?

I did but then I read the below which is entirely correct.

> In which case by "effort" do you mean computer effort ?  Surely you
> can't mean that because this performance detail of this code is
> entirely irrelevant.  But you can't mean human effort in the code
> because adding the test would be additional code to write, read,
> understand and maintain ...
> 
> > Incidentally we have libxl_device_disk.removable which might be an
> > opportunity to optimise? Assuming it is meaningful in that way. I think
> > in reality only emulated CD-ROM devices ever generate this event but
> > perhaps exposing that in the API overcomplicates things.
> 
> Optimise to save on pointless xenstore watches you mean ?

That and event overhead generally of having the events registered and
being tracked etc. In the common case we probably have 1 disk and 1
cdrom so we've effectively doubled the amount of stuff we're dealing
with -- whether this becomes significant e.g. on a system with 100+ VMs
I'm not sure.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-19 10:44   ` Ian Campbell
@ 2012-01-24 17:27     ` Ian Jackson
  2012-01-25 10:48       ` Ian Campbell
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 17:27 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Thanks for the thorough review.

Ian Campbell writes ("Re: [Xen-devel] [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure"):
> You've done device removal, do you have a list of other things which
> should use this? (perhaps with an associated list of people's names...)

No, I don't have such a list, but offhand:
  domain creation (bootloader, etc.)
  qmp
  device attach
  domain destruction
  bootloader
  vncviewer exec (or vncviewer parameter fetching - api should change)
(these may overlap).


> > + * If ao_how==NULL, the function will be synchronous.
> > + *
> > + * If ao_how!=NULL, the function will set the operation going, and
> > + * if this is successful will return ERROR_ASYNCH_INPROGRESS.
> 
> There's an extra H here compared with the actual symbol name (I think
> the symbol is right).

Fixed everywhere.


> Is there a possibility that libxl might decide that the operation isn't
> actually going to take all the long and do things synchronously,
> returning normal success (e.g. 0)? Is that the reason for the separate
> return code for this "I did what you asked me" case?

No, even if the operation completes quickly, the same exit path is
taken.  So if you ask for a callback or an event, you get that even if
the callback or event happened before your initiating function
returns.  As I say:

 * Note that it is possible for an asynchronous operation which is to
 * result in a callback to complete during its initiating function
 * call.  In this case the initating function will return
 * ERROR_ASYNC_INPROGRESS, even though by the time it returns the
 * operation is complete and the callback has already happened.

If ao_how is non-NULL then these functions cannot return 0.
If it is NULL they cannot return ASYNC_INPROGRESS.

I chose to use a new exit status because it seemed safer but that's a
matter of taste and if you prefer I could return 0 for that case.


> Can we drop the ERROR_ prefix? I know that's inconsistent with the other
> return codes but those actually are errors.

I guess we could but isn't this going to become a proper IDL enum at
some point ?


> > + *
> > + * If ao_how->callback!=NULL, the callback will be called when the
> > + * operation completes.  The same rules as for libxl_event_hooks
> > + * apply, including the reentrancy rules and the possibility of
> 
>            ^ (see above/below) -- depending on how these comments end up
> relative to each other.

It's actually in a different file.  I'll add a cross-reference.


> > + * "disaster", except that libxl calls ao_how->callback instead of
> > + * libxl_event_hooks.event_occurs.
> > + *
> > + * If ao_how->callback==NULL, a libxl_event will be generated which
> > + * can be obtained from libxl_event_wait or libxl_event_check.
> 
> Or be delivered via event_occurs?

Yes, in principle, although that would be a silly thing to ask for.
Why would you want your ao completions delivered via some central
callback function just so that you could split them up again ?



> > + * call.  In this case the initating function will return
> 
>                               initiating

Fixed.


> > +typedef struct {
> > +    void (*callback)(libxl_ctx *ctx, int rc, void *for_callback);
> > +    union {
> > +        libxl_ev_user for_event; /* used if callback==NULL */
> > +        void *for_callback; /* passed to callback */
> 
> Why void * for one bit of "closure" but an explicit uint64_t for the
> other. I nearly commented on the use of uint64_t previously -- void *,
> or perhaps (u)intptr_t is more normal.

The context value in an event needs to be marshallable to a foreign
language or a foreign process.  So it can't be a pointer.  Or at
least, it has to be a type which can contain integers as well as
pointers, and an integer type is better for that.

The context value for a callback function can be a void* because you
don't ever need to "marshal" the "callback", or if you do you can wrap
up the context appropriately.

> > + * Completion after initiator return (asynch. only):
...
> > + *   * initiator calls ao_complete:               |
> > + *     - observes event not net done,             |
> > + *     - returns to caller                        |
> > + *                                                |
> > + *                              - ao completes on some thread
> > + *                              - generate the event or call the callback
> > + *                              - destroy the ao
> 
> Where does ao_inprogress fit into these diagrams?

There's a mistake in the diagrams: where it says on the left
"initiator calls ao_complete" it should read "... ao_inprogress", and
likewise for "ao_complete takes the lock".  I will fix this.


> > + */
> > +
> > +void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao) {
> 
> CODING_STYLE wants these braces on the next line (a bunch more follow)

Oh yes, will fix.


> > +void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) {
> > +    assert(ao->magic == LIBXL__AO_MAGIC);
> > +    assert(!ao->complete);
> > +    ao->complete = 1;
> > +    ao->rc = rc;
> > +
> > +    if (ao->poller) {
> > +        assert(ao->in_initiator);
> > +        libxl__poller_wakeup(egc, ao->poller);
> > +    } else if (ao->how.callback) {
> > +        LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback);
> > +    } else {
> > +        libxl_event *ev;
> > +        ev = NEW_EVENT(egc, OPERATION_COMPLETE, ao->domid);
> > +        if (ev) {
> > +            ev->for_user = ao->how.u.for_event;
> > +            ev->u.operation_complete.rc = ao->rc;
> > +            libxl__event_occurred(egc, ev);
> > +        }
> > +        ao->notified = 1;
> > +    }
> > +    if (!ao->in_initiator && ao->notified)
> > +        libxl__ao__destroy(libxl__gc_owner(&egc->gc), ao);
> 
> You added a helper for this libxl__gc_owner(&egc..) construct.

You mean EGC_GC and CTX.  I don't think that's a good idea here
because it obscures exactly what's going on.  In particular, there are
two gcs here - the ao's and the egc's - and one of them may be about
to evaporate.


> > +    ao = calloc(sizeof(*ao),1);
> 
> calloc is actually (nmemb, size). I'm sure it doesn't really matter
> though.

I'll fix it though.


> > +            rc = eventloop_iteration(&egc,ao->poller);
> > +            if (rc) {
> > +                /* Oh dear, this is quite unfortunate. */
> > +                LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "Error waiting for"
> > +                           " event during long-running operation (rc=%d)", rc);
> > +                sleep(1);
> > +                /* It's either this or return ERROR_I_DONT_KNOW_WHETHER
> > +                 * _THE_THING_YOU_ASKED_FOR_WILL_BE_DONE_LATER_WHEN
> > +                 * _YOU_DIDNT_EXPECT_IT, since we don't have any kind of
> > +                 * cancellation ability. */
> 
> Does this constitute a "disaster" (in the special hook sense)?

No, disaster just lets us say that some events may be lost and an ao
completion might not be an event.  disaster doesn't let us randomly
store up ongoing activity and have it happen when not expected.
For example, a caller asking a synchronous operation does not expect
to get an error code and then have the operation continue in the
background anyway.


> > + *   Usually callback function should use GET_CONTAINING_STRUCT
> 
> Now called CONTAINER_OF

Fixed.  Sometimes I wish the compiler could look into comments and
spot when I've done this kind of thing.


> > + *   to obtain its own structure, containing a pointer to the ao,
> > + *   and then use the gc from that ao.
> > + */
> > +
> > +#define AO_CREATE(ctx, domid, ao_how)                           \
> > +    libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how);       \
> > +    if (!ao) return ERROR_NOMEM;                                \
> > +    AO_GC;                                                      \
> > +    CTX_LOCK;
> 
> Where does the unlock which balances this come from? The only unlock I
> see in this patch is the temporary drop in libxl__ao_inprogress which is
> matched by another lock.

The AO_INPROGRESS macro is supposed to unlock it, but locks it again
by mistake.  Well spotted.

> > +#define AO_INPROGRESS do{                                       \
> > +        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
> > +        int ao__rc = libxl__ao_inprogress(ao);                  \
> > +        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \
> 
> Is this supposed to be unlock answering my question above? Likewise in
> ABORT?

Yes.  Indeed the comment above agrees:

 * - If initiation is successful, the initiating function needs
 *   to run libxl__ao_inprogress right before unlocking and
 *   returning, and return whatever it returns (AO_INPROGRESS macro).
 *
 * - If the initiation is unsuccessful, the initiating function must
 *   call libxl__ao_abort before unlocking and returning whatever
 *   error code is appropriate (AO_ABORT macro).


 > > +        return ao__rc;                                          \
> > +   }while(0)
> 
> Can we arrange for AO_INPROGRESS and AO_ABORT to return the rc? So it
> would become
>         return AO_INPROGRESS;

That would be possible.  I wasn't sure whether to do it like that.
Note that AO_CREATE already might return; doing it the way I have it
now seems more symmetrical.

But perhaps it would make things clearer to have the return outside
the macro.

> Is the ({stuff,stuff,stuff,val}) syntax a gcc-ism?

Yes.  But I don't think that should stop us if we prefer it.


> > +/* All of these MUST be called with the ctx locked.
> 
> Except libxl__ao_create? at least according to the implementation of
> AO_CREATE which takes the lock after.

libxl_ao__create calls libxl__poller_get which needs to be called with
the lock held.  Well spotted.


Thanks,
Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 8/9] libxl: Introduce libxl__ev_devstate
  2012-01-19 10:54   ` Ian Campbell
@ 2012-01-24 17:33     ` Ian Jackson
  2012-01-25 10:57       ` Ian Campbell
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 17:33 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 8/9] libxl: Introduce libxl__ev_devstate"):
> On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > Provide a new-style asynchronous facility for waiting for device
> > states on xenbus.  This will replace libxl__wait_for_device_state,
> > after the callers have been updated in later patches.
> 
> Is event-with-timeout likely to be a useful/common enough pattern to be
> worth baking into the infrastructure/helpers rather than implementing
> just for this one event type? (if yes then, "I will refactor for the
> second user is a valid response").

I'm not convinced.  I thought of this but I think it would result in
flabby code - all the libxl__ev_register functions would gain a new
timeout parameter (and note that the timeout machinery has both
absolute and relative timeouts...)

I think when we have a second user it might be worth seeing if some
commonality could be extracted but TBH I doubt it would make the code
smaller or simpler.

> > +static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
> > +                             const struct timeval *requested_abs)
> > +{
> > +    EGC_GC;
> > +    libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
> > +    LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
> > +               " timed out", ds->watch.path, ds->wanted);
> > +    libxl__ev_devstate_cancel(gc, ds);
> 
> What prevents racing here with the watch happening? Might the caller see
> two callbacks?

  static inline void libxl__ev_devstate_cancel(libxl__gc *gc,
                                               libxl__ev_devstate *ds)
  {
      libxl__ev_time_deregister(gc,&ds->timeout);
      libxl__ev_xswatch_deregister(gc,&ds->watch);
  }

So, no.  When the timeout happens, the ev xswatch is deregistered and
can thereafter no longer generate callbacks.  If there are any
xenstore watch events in the pipeline for deregistered ev_xswatch's,
they're discarded by watchfd_callback.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 9/9] libxl: Convert to asynchronous: device removal
  2012-01-19 11:55   ` Ian Campbell
@ 2012-01-24 17:39     ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 17:39 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 9/9] libxl: Convert to asynchronous: device removal"):
> On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > Convert libxl_FOO_device_remove, and the function which does the bulk
> > of the work, libxl__device_remove, to the new async ops scheme.
...
> 
> After all the internal complexity the actual usage is refreshingly
> simple. Phew!

Yes, that was my main aim ...

> >  int libxl_device_disk_destroy(libxl_ctx *ctx, uint32_t domid,
> > @@ -1536,11 +1540,11 @@ int libxl_cdrom_insert(libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk)
> >  out:
> 
> The async capability here ought to be propagated to the
> libxl_cdrom_insert interface too. I guess that would mean handling
> compound asynchronous operations.

Yes.  Compound asynchronous operations are already supported by the
current scheme: the code which implements them just sets up an
appropriate series of callbacks.

> > +int libxl__initiate_device_remove(libxl__ao *ao, libxl__device *dev)
> >  {
> > +    /* Arranges that dev will be removed from its guest.  When
> > +     * this is done, the ao will be completed.  An error
> > +     * return from libxl__device_remove means that the ao
> > +     * will _not_ be completed and the caller must do so.
> 
> Do you mean aborted or cancelled rather than completed here?

No.  An ao cannot be aborted or cancelled, only completed (perhaps
with an error code).

I see that the comment is missing "initiate_" from the function name,
which I will fix.

> > diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
> > index b7f0f54..9920fb9 100644
> > --- a/tools/libxl/libxl_internal.h
> > +++ b/tools/libxl/libxl_internal.h
> > @@ -653,35 +653,15 @@ _hidden char *libxl__device_backend_path(libxl__gc *gc, libxl__device *device);
> [...]
> > +/* Arranges that dev will be removed from its guest.  When
> > + * this is done, the ao will be completed.  An error
> > + * return from libxl__device_remove means that the ao
> > + * will _not_ be completed and the caller must do so. */
> 
> This is the same comment as at the head of the implementation so the
> same comment re aborting applies. Do we need both comments?

No.  The comment should be in the header file only.  I'll fix this.

> > -    if (libxl_device_nic_remove(ctx, domid, &nic)) {
> > +    if (libxl_device_nic_remove(ctx, domid, &nic, 0)) {
...
> 
> There aren't actually any examples of a caller using the ao-ness in xl
> are there?

No.

> I know that sync is for the most part ao+wait but I'm a bit concerned
> that e.g. several of the paths in libxl__ao_complete probably haven't
> been run and one of the flow-charts added to tools/libxl/libxl_event.c
> in patch 6/8 has probably never happened either.

Yes.

> IMHO this isn't a blocker for this patch but since xl is, in addition to
> being a toolstack, a testbed for libxl perhaps one or more "gratuitously
> asynchronous" calls could be made? Perhaps the libxl_cdrom_insert case
> would be an interesting one? In particular the case in the
> create_domain() event loop (e.g. so that the response to a cdrom eject
> does not block shutdown processing).

That would be definitely worthwhile.  I'll put it (mentally) on my
todo list.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 3/9] libxl: New event generation API
  2012-01-24 16:38       ` Ian Campbell
@ 2012-01-24 18:43         ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-24 18:43 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 3/9] libxl: New event generation API"):
> On Tue, 2012-01-24 at 16:23 +0000, Ian Jackson wrote:
> > Ian Campbell writes ("Re: [Xen-devel] [PATCH 3/9] libxl: New event generation API"):
> > > On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> 
> > > > +    ret = libxl_evenable_domain_death(ctx, domid, 0, &deathw);
...
> > > > +        diskws = xmalloc(sizeof(*diskws) * d_config.num_disks);
> > > 
> > > I didn't see this getting freed on the exit path.
> > 
> > This is deliberate.  Why bother freeing things when the process is
> > about to exit.
> 
> Usually I agree.
> 
> However xl is intended as a libxl testbed as well as a toolstack in its
> own right and it is useful to be able to run tools such as valgrind on
> it to detect leaks in the library, but this requires not having too many
> "false positives" in xl.

Hmm, true.  OK, I will clean up these evgens.

> > > Incidentally we have libxl_device_disk.removable which might be an
> > > opportunity to optimise? Assuming it is meaningful in that way. I think
> > > in reality only emulated CD-ROM devices ever generate this event but
> > > perhaps exposing that in the API overcomplicates things.
> > 
> > Optimise to save on pointless xenstore watches you mean ?
> 
> That and event overhead generally of having the events registered and
> being tracked etc. In the common case we probably have 1 disk and 1
> cdrom so we've effectively doubled the amount of stuff we're dealing
> with -- whether this becomes significant e.g. on a system with 100+ VMs
> I'm not sure.

OK, it's an easy enough test to add.  I'll do so.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-24 17:27     ` Ian Jackson
@ 2012-01-25 10:48       ` Ian Campbell
  2012-01-25 14:45         ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-25 10:48 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Tue, 2012-01-24 at 17:27 +0000, Ian Jackson wrote:
> Thanks for the thorough review.
> 
> Ian Campbell writes ("Re: [Xen-devel] [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure"):
> > You've done device removal, do you have a list of other things which
> > should use this? (perhaps with an associated list of people's names...)
> 
> No, I don't have such a list, but offhand:
>   domain creation (bootloader, etc.)
>   qmp
>   device attach
>   domain destruction
>   bootloader
>   vncviewer exec (or vncviewer parameter fetching - api should change)
> (these may overlap).

Thanks.

> > Is there a possibility that libxl might decide that the operation isn't
> > actually going to take all the long and do things synchronously,
> > returning normal success (e.g. 0)? Is that the reason for the separate
> > return code for this "I did what you asked me" case?
> 
> No, even if the operation completes quickly, the same exit path is
> taken.  So if you ask for a callback or an event, you get that even if
> the callback or event happened before your initiating function
> returns.  As I say:
> 
>  * Note that it is possible for an asynchronous operation which is to
>  * result in a callback to complete during its initiating function
>  * call.  In this case the initating function will return
>  * ERROR_ASYNC_INPROGRESS, even though by the time it returns the
>  * operation is complete and the callback has already happened.

Thanks, I think I simply hadn't got to that comment when I wrote the
question.

If this is a direct cut-n-paste then presumably there is a patch
somewhere where "initiating" is spelled "initating" as above. Hah, I've
just spotted where I pointed it out last time and you corrected it
below... At least I'm consistent.

> If ao_how is non-NULL then these functions cannot return 0.
> If it is NULL they cannot return ASYNC_INPROGRESS.
> 
> I chose to use a new exit status because it seemed safer but that's a
> matter of taste and if you prefer I could return 0 for that case.

I'm undecided (plus it seems a bit like bikeshedding). I certainly
prefer either 0 or {LIBXL_}ASYNC_IN_PROGRESS to ERROR_ASYNC_IN_PROGRESS
though.

> > Can we drop the ERROR_ prefix? I know that's inconsistent with the other
> > return codes but those actually are errors.
> 
> I guess we could but isn't this going to become a proper IDL enum at
> some point ?

At which point it would become LIBXL_ERROR_{FOOS} and
LIBXL_ASYNC_IN_PROGRESS?

[...]
> > > + * "disaster", except that libxl calls ao_how->callback instead of
> > > + * libxl_event_hooks.event_occurs.
> > > + *
> > > + * If ao_how->callback==NULL, a libxl_event will be generated which
> > > + * can be obtained from libxl_event_wait or libxl_event_check.
> > 
> > Or be delivered via event_occurs?
> 
> Yes, in principle, although that would be a silly thing to ask for.
> Why would you want your ao completions delivered via some central
> callback function just so that you could split them up again ?

True.

> > > + * call.  In this case the initating function will return
> > 
> >                               initiating
> 
> Fixed.
> 
> 
> > > +typedef struct {
> > > +    void (*callback)(libxl_ctx *ctx, int rc, void *for_callback);
> > > +    union {
> > > +        libxl_ev_user for_event; /* used if callback==NULL */
> > > +        void *for_callback; /* passed to callback */
> > 
> > Why void * for one bit of "closure" but an explicit uint64_t for the
> > other. I nearly commented on the use of uint64_t previously -- void *,
> > or perhaps (u)intptr_t is more normal.
> 
> The context value in an event needs to be marshallable to a foreign
> language or a foreign process.  So it can't be a pointer.  Or at
> least, it has to be a type which can contain integers as well as
> pointers, and an integer type is better for that.

Fair enough.

> 
> The context value for a callback function can be a void* because you
> don't ever need to "marshal" the "callback", or if you do you can wrap
> up the context appropriately.
> 
> > > + * Completion after initiator return (asynch. only):
> ...
> > > + *   * initiator calls ao_complete:               |
> > > + *     - observes event not net done,             |
> > > + *     - returns to caller                        |
> > > + *                                                |
> > > + *                              - ao completes on some thread
> > > + *                              - generate the event or call the callback
> > > + *                              - destroy the ao
> > 
> > Where does ao_inprogress fit into these diagrams?
> 
> There's a mistake in the diagrams: where it says on the left
> "initiator calls ao_complete" it should read "... ao_inprogress", and
> likewise for "ao_complete takes the lock".  I will fix this.

Thanks. While rereading with that substitution (and finding that it made
sense) I noticed:
+ *   * initiator calls ao_complete:               |
+ *     - observes event not net done,             |

You want s/net/yet/.

> > > +void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc) {
> > > +    assert(ao->magic == LIBXL__AO_MAGIC);
> > > +    assert(!ao->complete);
> > > +    ao->complete = 1;
> > > +    ao->rc = rc;
> > > +
> > > +    if (ao->poller) {
> > > +        assert(ao->in_initiator);
> > > +        libxl__poller_wakeup(egc, ao->poller);
> > > +    } else if (ao->how.callback) {
> > > +        LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback);
> > > +    } else {
> > > +        libxl_event *ev;
> > > +        ev = NEW_EVENT(egc, OPERATION_COMPLETE, ao->domid);
> > > +        if (ev) {
> > > +            ev->for_user = ao->how.u.for_event;
> > > +            ev->u.operation_complete.rc = ao->rc;
> > > +            libxl__event_occurred(egc, ev);
> > > +        }
> > > +        ao->notified = 1;
> > > +    }
> > > +    if (!ao->in_initiator && ao->notified)
> > > +        libxl__ao__destroy(libxl__gc_owner(&egc->gc), ao);
> > 
> > You added a helper for this libxl__gc_owner(&egc..) construct.
> 
> You mean EGC_GC and CTX.  I don't think that's a good idea here
> because it obscures exactly what's going on.  In particular, there are
> two gcs here - the ao's and the egc's - and one of them may be about
> to evaporate.

OK.

> > > +            rc = eventloop_iteration(&egc,ao->poller);
> > > +            if (rc) {
> > > +                /* Oh dear, this is quite unfortunate. */
> > > +                LIBXL__LOG(CTX, LIBXL__LOG_ERROR, "Error waiting for"
> > > +                           " event during long-running operation (rc=%d)", rc);
> > > +                sleep(1);
> > > +                /* It's either this or return ERROR_I_DONT_KNOW_WHETHER
> > > +                 * _THE_THING_YOU_ASKED_FOR_WILL_BE_DONE_LATER_WHEN
> > > +                 * _YOU_DIDNT_EXPECT_IT, since we don't have any kind of
> > > +                 * cancellation ability. */
> > 
> > Does this constitute a "disaster" (in the special hook sense)?
> 
> No, disaster just lets us say that some events may be lost and an ao
> completion might not be an event.  disaster doesn't let us randomly
> store up ongoing activity and have it happen when not expected.
> For example, a caller asking a synchronous operation does not expect
> to get an error code and then have the operation continue in the
> background anyway.

OK.

> > > + *   Usually callback function should use GET_CONTAINING_STRUCT
> > 
> > Now called CONTAINER_OF
> 
> Fixed.  Sometimes I wish the compiler could look into comments and
> spot when I've done this kind of thing.

Or read the comments and DWIM so we just need to write the comments ;-)

> > > + *   to obtain its own structure, containing a pointer to the ao,
> > > + *   and then use the gc from that ao.
> > > + */
> > > +
> > > +#define AO_CREATE(ctx, domid, ao_how)                           \
> > > +    libxl__ao *ao = libxl__ao_create(ctx, domid, ao_how);       \
> > > +    if (!ao) return ERROR_NOMEM;                                \
> > > +    AO_GC;                                                      \
> > > +    CTX_LOCK;
> > 
> > Where does the unlock which balances this come from? The only unlock I
> > see in this patch is the temporary drop in libxl__ao_inprogress which is
> > matched by another lock.
> 
> The AO_INPROGRESS macro is supposed to unlock it, but locks it again
> by mistake.  Well spotted.
> 
> > > +#define AO_INPROGRESS do{                                       \
> > > +        libxl_ctx *ao__ctx = libxl__gc_owner(&ao->gc);          \
> > > +        int ao__rc = libxl__ao_inprogress(ao);                  \
> > > +        libxl__ctx_lock(ao__ctx); /* gc is now invalid */       \
> > 
> > Is this supposed to be unlock answering my question above? Likewise in
> > ABORT?
> 
> Yes.  Indeed the comment above agrees:
> 
>  * - If initiation is successful, the initiating function needs
>  *   to run libxl__ao_inprogress right before unlocking and
>  *   returning, and return whatever it returns (AO_INPROGRESS macro).
>  *
>  * - If the initiation is unsuccessful, the initiating function must
>  *   call libxl__ao_abort before unlocking and returning whatever
>  *   error code is appropriate (AO_ABORT macro).

Right.

I think this highlights my concern about some code paths not being run
by the in-xl users...

>  > > +        return ao__rc;                                          \
> > > +   }while(0)
> > 
> > Can we arrange for AO_INPROGRESS and AO_ABORT to return the rc? So it
> > would become
> >         return AO_INPROGRESS;
> 
> That would be possible.  I wasn't sure whether to do it like that.
> Note that AO_CREATE already might return; doing it the way I have it
> now seems more symmetrical.
> 
> But perhaps it would make things clearer to have the return outside
> the macro.

I thought there was a general preference for that sort of thing, but I
suppose it depends on the required macro contortions to make it happen.


Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 8/9] libxl: Introduce libxl__ev_devstate
  2012-01-24 17:33     ` Ian Jackson
@ 2012-01-25 10:57       ` Ian Campbell
  2012-01-25 14:49         ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Campbell @ 2012-01-25 10:57 UTC (permalink / raw)
  To: Ian Jackson; +Cc: xen-devel

On Tue, 2012-01-24 at 17:33 +0000, Ian Jackson wrote:
> Ian Campbell writes ("Re: [Xen-devel] [PATCH 8/9] libxl: Introduce libxl__ev_devstate"):
> > On Fri, 2012-01-13 at 19:25 +0000, Ian Jackson wrote:
> > > Provide a new-style asynchronous facility for waiting for device
> > > states on xenbus.  This will replace libxl__wait_for_device_state,
> > > after the callers have been updated in later patches.
> > 
> > Is event-with-timeout likely to be a useful/common enough pattern to be
> > worth baking into the infrastructure/helpers rather than implementing
> > just for this one event type? (if yes then, "I will refactor for the
> > second user is a valid response").
> 
> I'm not convinced.  I thought of this but I think it would result in
> flabby code - all the libxl__ev_register functions would gain a new
> timeout parameter (and note that the timeout machinery has both
> absolute and relative timeouts...)
> 
> I think when we have a second user it might be worth seeing if some
> commonality could be extracted but TBH I doubt it would make the code
> smaller or simpler.

Right, lets leave it then.

> > > +static void devstate_timeout(libxl__egc *egc, libxl__ev_time *ev,
> > > +                             const struct timeval *requested_abs)
> > > +{
> > > +    EGC_GC;
> > > +    libxl__ev_devstate *ds = CONTAINER_OF(ev, *ds, timeout);
> > > +    LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "backend %s wanted state %d "
> > > +               " timed out", ds->watch.path, ds->wanted);
> > > +    libxl__ev_devstate_cancel(gc, ds);
> > 
> > What prevents racing here with the watch happening? Might the caller see
> > two callbacks?
> 
>   static inline void libxl__ev_devstate_cancel(libxl__gc *gc,
>                                                libxl__ev_devstate *ds)
>   {
>       libxl__ev_time_deregister(gc,&ds->timeout);
>       libxl__ev_xswatch_deregister(gc,&ds->watch);
>   }
> 
> So, no.  When the timeout happens, the ev xswatch is deregistered and
> can thereafter no longer generate callbacks.  If there are any
> xenstore watch events in the pipeline for deregistered ev_xswatch's,
> they're discarded by watchfd_callback.

What happens if the watch occurs and is delivered (in a different
thread) e.g. just before devstate_timeout calls
libxl__ev_devstate_cancel? The watch callback will be delivered but we
will unconditionally go on to deliver the cancel callback as well.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-25 10:48       ` Ian Campbell
@ 2012-01-25 14:45         ` Ian Jackson
  2012-01-25 16:56           ` Ian Jackson
  0 siblings, 1 reply; 31+ messages in thread
From: Ian Jackson @ 2012-01-25 14:45 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure"):
> On Tue, 2012-01-24 at 17:27 +0000, Ian Jackson wrote:
> > If ao_how is non-NULL then these functions cannot return 0.
> > If it is NULL they cannot return ASYNC_INPROGRESS.
> > 
> > I chose to use a new exit status because it seemed safer but that's a
> > matter of taste and if you prefer I could return 0 for that case.
> 
> I'm undecided (plus it seems a bit like bikeshedding). I certainly
> prefer either 0 or {LIBXL_}ASYNC_IN_PROGRESS to ERROR_ASYNC_IN_PROGRESS
> though.

OK, I'll rename it.

> > I guess we could but isn't this going to become a proper IDL enum at
> > some point ?
> 
> At which point it would become LIBXL_ERROR_{FOOS} and
> LIBXL_ASYNC_IN_PROGRESS?

I guess so.  Or we could rename it LIBXL_RC_...

> + *   * initiator calls ao_complete:               |
> + *     - observes event not net done,             |
> 
> You want s/net/yet/.

Yes.

> > > Can we arrange for AO_INPROGRESS and AO_ABORT to return the rc? So it
> > > would become
> > >         return AO_INPROGRESS;
> > 
> > That would be possible.  I wasn't sure whether to do it like that.
> > Note that AO_CREATE already might return; doing it the way I have it
> > now seems more symmetrical.
> > 
> > But perhaps it would make things clearer to have the return outside
> > the macro.
> 
> I thought there was a general preference for that sort of thing, but I
> suppose it depends on the required macro contortions to make it happen.

I think this should be easily doable.  I'll sort it out.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 8/9] libxl: Introduce libxl__ev_devstate
  2012-01-25 10:57       ` Ian Campbell
@ 2012-01-25 14:49         ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-25 14:49 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel

Ian Campbell writes ("Re: [Xen-devel] [PATCH 8/9] libxl: Introduce libxl__ev_devstate"):
> On Tue, 2012-01-24 at 17:33 +0000, Ian Jackson wrote:
> > So, no.  When the timeout happens, the ev xswatch is deregistered and
> > can thereafter no longer generate callbacks.  If there are any
> > xenstore watch events in the pipeline for deregistered ev_xswatch's,
> > they're discarded by watchfd_callback.
> 
> What happens if the watch occurs and is delivered (in a different
> thread) e.g. just before devstate_timeout calls
> libxl__ev_devstate_cancel? The watch callback will be delivered but we
> will unconditionally go on to deliver the cancel callback as well.

All of these callback functions are called with the lock held, so
there aren't any of these kind of races.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

* Re: [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure
  2012-01-25 14:45         ` Ian Jackson
@ 2012-01-25 16:56           ` Ian Jackson
  0 siblings, 0 replies; 31+ messages in thread
From: Ian Jackson @ 2012-01-25 16:56 UTC (permalink / raw)
  To: Ian Campbell, xen-devel

Ian Jackson writes ("Re: [Xen-devel] [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure"):
> Ian Campbell writes ("Re: [Xen-devel] [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure"):
> > On Tue, 2012-01-24 at 17:27 +0000, Ian Jackson wrote:
> > > If ao_how is non-NULL then these functions cannot return 0.
> > > If it is NULL they cannot return ASYNC_INPROGRESS.
> > > 
> > > I chose to use a new exit status because it seemed safer but that's a
> > > matter of taste and if you prefer I could return 0 for that case.
> > 
> > I'm undecided (plus it seems a bit like bikeshedding). I certainly
> > prefer either 0 or {LIBXL_}ASYNC_IN_PROGRESS to ERROR_ASYNC_IN_PROGRESS
> > though.
> 
> OK, I'll rename it.

Having thought about this some more, and particularly about what
callers would be likely to want, I decided it would be better to
abolish it and return 0 instead.

So I will do that.

Ian.

^ permalink raw reply	[flat|nested] 31+ messages in thread

end of thread, other threads:[~2012-01-25 16:56 UTC | newest]

Thread overview: 31+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-01-13 19:25 [PATCH v7 0/9] libxl: New event API Ian Jackson
2012-01-13 19:25 ` [PATCH 1/9] libxl: New API for providing OS events to libxl Ian Jackson
2012-01-18 16:35   ` Ian Campbell
2012-01-18 17:06     ` Ian Jackson
2012-01-13 19:25 ` [PATCH 2/9] ocaml, libxl: support "private" fields Ian Jackson
2012-01-18 14:03   ` Ian Campbell
2012-01-13 19:25 ` [PATCH 3/9] libxl: New event generation API Ian Jackson
2012-01-18 17:33   ` Ian Campbell
2012-01-24 16:23     ` Ian Jackson
2012-01-24 16:38       ` Ian Campbell
2012-01-24 18:43         ` Ian Jackson
2012-01-13 19:25 ` [PATCH 4/9] libxl: introduce libxl_fd_set_nonblock, rationalise _cloexec Ian Jackson
2012-01-13 19:25 ` [PATCH 5/9] libxl: Permit multithreaded event waiting Ian Jackson
2012-01-19 10:01   ` Ian Campbell
2012-01-24 16:34     ` Ian Jackson
2012-01-13 19:25 ` [PATCH 6/9] libxl: Asynchronous/long-running operation infrastructure Ian Jackson
2012-01-19 10:44   ` Ian Campbell
2012-01-24 17:27     ` Ian Jackson
2012-01-25 10:48       ` Ian Campbell
2012-01-25 14:45         ` Ian Jackson
2012-01-25 16:56           ` Ian Jackson
2012-01-13 19:25 ` [PATCH 7/9] libxl: New convenience macro CONTAINER_OF Ian Jackson
2012-01-18 14:04   ` Ian Campbell
2012-01-13 19:25 ` [PATCH 8/9] libxl: Introduce libxl__ev_devstate Ian Jackson
2012-01-19 10:54   ` Ian Campbell
2012-01-24 17:33     ` Ian Jackson
2012-01-25 10:57       ` Ian Campbell
2012-01-25 14:49         ` Ian Jackson
2012-01-13 19:25 ` [PATCH 9/9] libxl: Convert to asynchronous: device removal Ian Jackson
2012-01-19 11:55   ` Ian Campbell
2012-01-24 17:39     ` Ian Jackson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.