[Qemu-devel] [PATCH v3] block/vxhs: Add Veritas HyperScale VxHS block device support

* [Qemu-devel] [PATCH v3] block/vxhs: Add Veritas HyperScale VxHS block device support
@ 2016-10-28  7:44 Ashish Mittal
  2016-10-28 14:44 ` Eric Blake
  2016-10-28 19:03 ` Jeff Cody
  0 siblings, 2 replies; 11+ messages in thread
From: Ashish Mittal @ 2016-10-28  7:44 UTC (permalink / raw)
  To: qemu-devel, pbonzini, kwolf, armbru, berrange, jcody, famz,
	ashish.mittal, stefanha, Rakesh.Ranjan, Buddhi.Madhav
  Cc: Ketan.Nilangekar, Abhijit.Dey, Venkatesha.Mg

This patch adds support for a new block device type called "vxhs".
Source code for the qnio library that this code loads can be downloaded from:
https://github.com/MittalAshish/libqnio.git

Sample command line using the JSON syntax:
./qemu-system-x86_64 -name instance-00000008 -S -vnc 0.0.0.0:0 -k en-us
-vga cirrus -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5
-msg timestamp=on
'json:{"driver":"vxhs","vdisk_id":"{c3e9095a-a5ee-4dce-afeb-2a59fb387410}",
"server":{"host":"172.172.17.4","port":"9999"}}'

Sample command line using the URI syntax:
qemu-img convert -f raw -O raw -n
/var/lib/nova/instances/_base/0c5eacd5ebea5ed914b6a3e7b18f1ce734c386ad
vxhs://192.168.0.1:9999/%7Bc6718f6b-0401-441d-a8c3-1f0064d75ee0%7D

Signed-off-by: Ashish Mittal <ashish.mittal@veritas.com>
---
v3 changelog:
(1) Added QAPI schema for the VxHS driver.

v2 changelog:
(1) Changes done in response to v1 comments.

TODO:
(1) Add qemu-iotest
(2) We need to be able to free all resources once we close the last vxhs drive.

 block/Makefile.objs  |   2 +
 block/trace-events   |  21 ++
 block/vxhs.c         | 669 +++++++++++++++++++++++++++++++++++++++++++++++++++
 configure            |  41 ++++
 qapi/block-core.json |  20 +-
 5 files changed, 751 insertions(+), 2 deletions(-)
 create mode 100644 block/vxhs.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 67a036a..58313a2 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -18,6 +18,7 @@ block-obj-$(CONFIG_LIBNFS) += nfs.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
 block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+block-obj-$(CONFIG_VXHS) += vxhs.o
 block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
@@ -38,6 +39,7 @@ rbd.o-cflags       := $(RBD_CFLAGS)
 rbd.o-libs         := $(RBD_LIBS)
 gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
 gluster.o-libs     := $(GLUSTERFS_LIBS)
+vxhs.o-libs        := $(VXHS_LIBS)
 ssh.o-cflags       := $(LIBSSH2_CFLAGS)
 ssh.o-libs         := $(LIBSSH2_LIBS)
 archipelago.o-libs := $(ARCHIPELAGO_LIBS)
diff --git a/block/trace-events b/block/trace-events
index 05fa13c..94249ee 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -114,3 +114,24 @@ qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s
 qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
 qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+
+# block/vxhs.c
+vxhs_iio_callback(int error, int reason) "ctx is NULL: error %d, reason %d"
+vxhs_setup_qnio(void *s) "Context to HyperScale IO manager = %p"
+vxhs_iio_callback_chnfail(int err, int error) "QNIO channel failed, no i/o %d, %d"
+vxhs_iio_callback_unknwn(int opcode, int err) "unexpected opcode %d, errno %d"
+vxhs_open_fail(int ret) "Could not open the device. Error = %d"
+vxhs_open_epipe(int ret) "Could not create a pipe for device. Bailing out. Error=%d"
+vxhs_aio_rw_invalid(int req) "Invalid I/O request iodir %d"
+vxhs_aio_rw_ioerr(char *guid, int iodir, uint64_t size, uint64_t off, void *acb, int ret, int err) "IO ERROR (vDisk %s) FOR : Read/Write = %d size = %lu offset = %lu ACB = %p. Error = %d, errno = %d"
+vxhs_get_vdisk_stat_err(char *guid, int ret, int err) "vDisk (%s) stat ioctl failed, ret = %d, errno = %d"
+vxhs_get_vdisk_stat(char *vdisk_guid, uint64_t vdisk_size) "vDisk %s stat ioctl returned size %lu"
+vxhs_qnio_iio_open(const char *ip) "Failed to connect to storage agent on host-ip %s"
+vxhs_qnio_iio_devopen(const char *fname) "Failed to open vdisk device: %s"
+vxhs_complete_aio(void *acb, uint64_t ret) "aio failed acb %p ret %ld"
+vxhs_parse_uri_filename(const char *filename) "URI passed via bdrv_parse_filename %s"
+vxhs_qemu_init_vdisk(const char *vdisk_id) "vdisk_id from json %s"
+vxhs_parse_uri_hostinfo(int num, char *host, int port) "Host %d: IP %s, Port %d"
+vxhs_qemu_init(char *of_vsa_addr, int port) "Adding host %s:%d to BDRVVXHSState"
+vxhs_qemu_init_filename(const char *filename) "Filename passed as %s"
+vxhs_close(char *vdisk_guid) "Closing vdisk %s"
diff --git a/block/vxhs.c b/block/vxhs.c
new file mode 100644
index 0000000..08ad681
--- /dev/null
+++ b/block/vxhs.c
@@ -0,0 +1,669 @@
+/*
+ * QEMU Block driver for Veritas HyperScale (VxHS)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include <qnio/qnio_api.h>
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "trace.h"
+#include "qemu/uri.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+#define VDISK_FD_READ               0
+#define VDISK_FD_WRITE              1
+
+#define VXHS_OPT_FILENAME           "filename"
+#define VXHS_OPT_VDISK_ID           "vdisk_id"
+#define VXHS_OPT_SERVER             "server"
+#define VXHS_OPT_HOST               "host"
+#define VXHS_OPT_PORT               "port"
+
+/* qnio client ioapi_ctx */
+static void *global_qnio_ctx;
+
+/* vdisk prefix to pass to qnio */
+static const char vdisk_prefix[] = "/dev/of/vdisk";
+
+typedef enum {
+    VDISK_AIO_READ,
+    VDISK_AIO_WRITE,
+    VDISK_STAT
+} VDISKAIOCmd;
+
+/*
+ * HyperScale AIO callbacks structure
+ */
+typedef struct VXHSAIOCB {
+    BlockAIOCB common;
+    int err;
+    int direction; /* IO direction (r/w) */
+    size_t io_offset;
+    size_t size;
+    QEMUIOVector *qiov;
+} VXHSAIOCB;
+
+typedef struct VXHSvDiskHostsInfo {
+    int qnio_cfd; /* Channel FD */
+    int vdisk_rfd; /* vDisk remote FD */
+    char *hostip; /* Host's IP addresses */
+    int port; /* Host's port number */
+} VXHSvDiskHostsInfo;
+
+/*
+ * Structure per vDisk maintained for state
+ */
+typedef struct BDRVVXHSState {
+    int fds[2];
+    int64_t vdisk_size;
+    int event_reader_pos;
+    VXHSAIOCB *qnio_event_acb;
+    VXHSvDiskHostsInfo vdisk_hostinfo; /* Per host info */
+    char *vdisk_guid;
+} BDRVVXHSState;
+
+static void vxhs_qnio_iio_close(BDRVVXHSState *s)
+{
+    /*
+     * Close vDisk device
+     */
+    if (s->vdisk_hostinfo.vdisk_rfd >= 0) {
+        iio_devclose(global_qnio_ctx, 0, s->vdisk_hostinfo.vdisk_rfd);
+        s->vdisk_hostinfo.vdisk_rfd = -1;
+    }
+
+    /*
+     * Close QNIO channel against cached channel-fd
+     */
+    if (s->vdisk_hostinfo.qnio_cfd >= 0) {
+        iio_close(global_qnio_ctx, s->vdisk_hostinfo.qnio_cfd);
+        s->vdisk_hostinfo.qnio_cfd = -1;
+    }
+}
+
+static int vxhs_qnio_iio_open(int *cfd, const char *of_vsa_addr,
+                              int *rfd, const char *file_name)
+{
+    /*
+     * Open qnio channel to storage agent if not opened before.
+     */
+    if (*cfd < 0) {
+        *cfd = iio_open(global_qnio_ctx, of_vsa_addr, 0);
+        if (*cfd < 0) {
+            trace_vxhs_qnio_iio_open(of_vsa_addr);
+            return -ENODEV;
+        }
+    }
+
+    /*
+     * Open vdisk device
+     */
+    *rfd = iio_devopen(global_qnio_ctx, *cfd, file_name, 0);
+    if (*rfd < 0) {
+        if (*cfd >= 0) {
+            iio_close(global_qnio_ctx, *cfd);
+            *cfd = -1;
+            *rfd = -1;
+        }
+
+        trace_vxhs_qnio_iio_devopen(file_name);
+        return -ENODEV;
+    }
+
+    return 0;
+}
+
+static void vxhs_iio_callback(int32_t rfd, uint32_t reason, void *ctx,
+                              uint32_t error, uint32_t opcode)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = NULL;
+    ssize_t ret;
+
+    switch (opcode) {
+    case IRP_READ_REQUEST:
+    case IRP_WRITE_REQUEST:
+
+        /*
+         * ctx is VXHSAIOCB*
+         * ctx is NULL if error is QNIOERROR_CHANNEL_HUP or
+         * reason is IIO_REASON_HUP
+         */
+        if (ctx) {
+            acb = ctx;
+            s = acb->common.bs->opaque;
+        } else {
+            trace_vxhs_iio_callback(error, reason);
+            goto out;
+        }
+
+        if (error) {
+            if (!acb->err) {
+                acb->err = error;
+            }
+            trace_vxhs_iio_callback(error, reason);
+        }
+
+        ret = qemu_write_full(s->fds[VDISK_FD_WRITE], &acb, sizeof(acb));
+        g_assert(ret == sizeof(acb));
+        break;
+
+    default:
+        if (error == QNIOERROR_CHANNEL_HUP) {
+            /*
+             * Channel failed, spontaneous notification,
+             * not in response to I/O
+             */
+            trace_vxhs_iio_callback_chnfail(error, errno);
+        } else {
+            trace_vxhs_iio_callback_unknwn(opcode, error);
+        }
+        break;
+    }
+out:
+    return;
+}
+
+static void vxhs_complete_aio(VXHSAIOCB *acb, BDRVVXHSState *s)
+{
+    BlockCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+    int ret = 0;
+
+    if (acb->err != 0) {
+        trace_vxhs_complete_aio(acb, acb->err);
+        /*
+         * We mask all the IO errors generically as EIO for upper layers
+         * Right now our IO Manager uses non standard error codes. Instead
+         * of confusing upper layers with incorrect interpretation we are
+         * doing this workaround.
+         */
+        ret = (-EIO);
+    }
+
+    qemu_aio_unref(acb);
+    cb(opaque, ret);
+}
+
+/*
+ * This is the HyperScale event handler registered to QEMU.
+ * It is invoked when any IO gets completed and written on pipe
+ * by callback called from QNIO thread context. Then it marks
+ * the AIO as completed, and releases HyperScale AIO callbacks.
+ */
+static void vxhs_aio_event_reader(void *opaque)
+{
+    BDRVVXHSState *s = opaque;
+    char *p;
+    ssize_t ret;
+
+    do {
+        p = (char *)&s->qnio_event_acb;
+        ret = read(s->fds[VDISK_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->qnio_event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->qnio_event_acb)) {
+                s->event_reader_pos = 0;
+                vxhs_complete_aio(s->qnio_event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+/*
+ * Call QNIO operation to create channels to do IO on vDisk.
+ */
+
+static void *vxhs_setup_qnio(void)
+{
+    void *qnio_ctx = NULL;
+
+    qnio_ctx = iio_init(vxhs_iio_callback);
+    if (qnio_ctx != NULL) {
+        trace_vxhs_setup_qnio(qnio_ctx);
+    }
+    return qnio_ctx;
+}
+
+static QemuOptsList runtime_opts = {
+    .name = "vxhs",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_FILENAME,
+            .type = QEMU_OPT_STRING,
+            .help = "URI to the Veritas HyperScale image",
+        },
+        {
+            .name = VXHS_OPT_VDISK_ID,
+            .type = QEMU_OPT_STRING,
+            .help = "UUID of the VxHS vdisk",
+        },
+        { /* end of list */ }
+    },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+    .name = "vxhs_tcp",
+    .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+    .desc = {
+        {
+            .name = VXHS_OPT_HOST,
+            .type = QEMU_OPT_STRING,
+            .help = "host address (ipv4 addresses)",
+        },
+        {
+            .name = VXHS_OPT_PORT,
+            .type = QEMU_OPT_NUMBER,
+            .help = "port number on which VxHSD is listening (default 9999)",
+            .def_value_str = "9999"
+        },
+        { /* end of list */ }
+    },
+};
+
+/*
+ * Parse the incoming URI and populate *options with the host information.
+ * URI syntax has the limitation of supporting only one host info.
+ * To pass multiple host information, use the JSON syntax.
+ */
+static int vxhs_parse_uri(const char *filename, QDict *options)
+{
+    URI *uri = NULL;
+    char *hoststr, *portstr;
+    char *port;
+    int ret = 0;
+
+    trace_vxhs_parse_uri_filename(filename);
+    uri = uri_parse(filename);
+    if (!uri || !uri->server || !uri->path) {
+        uri_free(uri);
+        return -EINVAL;
+    }
+
+    hoststr = g_strdup(VXHS_OPT_SERVER".host");
+    qdict_put(options, hoststr, qstring_from_str(uri->server));
+    g_free(hoststr);
+
+    portstr = g_strdup(VXHS_OPT_SERVER".port");
+    if (uri->port) {
+        port = g_strdup_printf("%d", uri->port);
+        qdict_put(options, portstr, qstring_from_str(port));
+        g_free(port);
+    }
+    g_free(portstr);
+
+    if (strstr(uri->path, "vxhs") == NULL) {
+        qdict_put(options, "vdisk_id", qstring_from_str(uri->path));
+    }
+
+    trace_vxhs_parse_uri_hostinfo(1, uri->server, uri->port);
+    uri_free(uri);
+
+    return ret;
+}
+
+static void vxhs_parse_filename(const char *filename, QDict *options,
+                                Error **errp)
+{
+    if (qdict_haskey(options, "vdisk_id") || qdict_haskey(options, "server")) {
+        error_setg(errp, "vdisk_id/server and a file name may not be specified "
+                         "at the same time");
+        return;
+    }
+
+    if (strstr(filename, "://")) {
+        int ret = vxhs_parse_uri(filename, options);
+        if (ret < 0) {
+            error_setg(errp, "Invalid URI. URI should be of the form "
+                       "  vxhs://<host_ip>:<port>/{<vdisk_id>}");
+        }
+    }
+}
+
+static int vxhs_qemu_init(QDict *options, BDRVVXHSState *s,
+                          int *cfd, int *rfd, Error **errp)
+{
+    QDict *backing_options = NULL;
+    QemuOpts *opts, *tcp_opts;
+    const char *vxhs_filename;
+    char *of_vsa_addr = NULL;
+    Error *local_err = NULL;
+    const char *vdisk_id_opt;
+    const char *server_host_opt;
+    char *file_name = NULL;
+    char *str = NULL;
+    int ret = 0;
+
+    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(opts, options, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    vxhs_filename = qemu_opt_get(opts, VXHS_OPT_FILENAME);
+    if (vxhs_filename) {
+        trace_vxhs_qemu_init_filename(vxhs_filename);
+    }
+
+    vdisk_id_opt = qemu_opt_get(opts, VXHS_OPT_VDISK_ID);
+    if (!vdisk_id_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER, VXHS_OPT_VDISK_ID);
+        ret = -EINVAL;
+        goto out;
+    }
+    s->vdisk_guid = g_strdup(vdisk_id_opt);
+    trace_vxhs_qemu_init_vdisk(vdisk_id_opt);
+
+    str = g_strdup_printf(VXHS_OPT_SERVER".");
+    qdict_extract_subqdict(options, &backing_options, str);
+
+    /* Create opts info from runtime_tcp_opts list */
+    tcp_opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+    qemu_opts_absorb_qdict(tcp_opts, backing_options, &local_err);
+    if (local_err) {
+        qdict_del(backing_options, str);
+        qemu_opts_del(tcp_opts);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    server_host_opt = qemu_opt_get(tcp_opts, VXHS_OPT_HOST);
+    if (!server_host_opt) {
+        error_setg(&local_err, QERR_MISSING_PARAMETER,
+                   VXHS_OPT_SERVER"."VXHS_OPT_HOST);
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->vdisk_hostinfo.hostip = g_strdup(server_host_opt);
+
+    s->vdisk_hostinfo.port = g_ascii_strtoll(qemu_opt_get(tcp_opts,
+                                                          VXHS_OPT_PORT),
+                                                          NULL, 0);
+
+    s->vdisk_hostinfo.qnio_cfd = -1;
+    s->vdisk_hostinfo.vdisk_rfd = -1;
+    trace_vxhs_qemu_init(s->vdisk_hostinfo.hostip,
+                         s->vdisk_hostinfo.port);
+
+    qdict_del(backing_options, str);
+    qemu_opts_del(tcp_opts);
+
+    file_name = g_strdup_printf("%s%s", vdisk_prefix, s->vdisk_guid);
+    of_vsa_addr = g_strdup_printf("of://%s:%d",
+                                s->vdisk_hostinfo.hostip,
+                                s->vdisk_hostinfo.port);
+
+    /*
+     * .bdrv_open() and .bdrv_create() run under the QEMU global mutex.
+     */
+    if (global_qnio_ctx == NULL) {
+        global_qnio_ctx = vxhs_setup_qnio();
+        if (global_qnio_ctx == NULL) {
+            error_setg(&local_err, "Failed vxhs_setup_qnio");
+            ret = -EINVAL;
+            goto out;
+        }
+    }
+
+    ret = vxhs_qnio_iio_open(cfd, of_vsa_addr, rfd, file_name);
+    if (ret) {
+        error_setg(&local_err, "Failed qnio_iio_open");
+        ret = -EIO;
+    }
+
+out:
+    g_free(str);
+    g_free(file_name);
+    g_free(of_vsa_addr);
+    qemu_opts_del(opts);
+
+    if (ret < 0) {
+        error_propagate(errp, local_err);
+        g_free(s->vdisk_hostinfo.hostip);
+        g_free(s->vdisk_guid);
+        s->vdisk_guid = NULL;
+        errno = -ret;
+    }
+
+    return ret;
+}
+
+static int vxhs_open(BlockDriverState *bs, QDict *options,
+                     int bdrv_flags, Error **errp)
+{
+    BDRVVXHSState *s = bs->opaque;
+    AioContext *aio_context;
+    int qemu_qnio_cfd = -1;
+    int qemu_rfd = -1;
+    int ret = 0;
+
+    ret = vxhs_qemu_init(options, s, &qemu_qnio_cfd, &qemu_rfd, errp);
+    if (ret < 0) {
+        trace_vxhs_open_fail(ret);
+        return ret;
+    }
+
+    s->vdisk_hostinfo.qnio_cfd = qemu_qnio_cfd;
+    s->vdisk_hostinfo.vdisk_rfd = qemu_rfd;
+    s->vdisk_size = -1;
+
+    /*
+     * Create a pipe for communicating between two threads in different
+     * context. Set handler for read event, which gets triggered when
+     * IO completion is done by non-QEMU context.
+     */
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        trace_vxhs_open_epipe(ret);
+        ret = -errno;
+        goto errout;
+    }
+    fcntl(s->fds[VDISK_FD_READ], F_SETFL, O_NONBLOCK);
+
+    aio_context = bdrv_get_aio_context(bs);
+    aio_set_fd_handler(aio_context, s->fds[VDISK_FD_READ],
+                       false, vxhs_aio_event_reader, NULL, s);
+    return 0;
+
+errout:
+    /*
+     * Close remote vDisk device if it was opened earlier
+     */
+    vxhs_qnio_iio_close(s);
+    trace_vxhs_open_fail(ret);
+    return ret;
+}
+
+static const AIOCBInfo vxhs_aiocb_info = {
+    .aiocb_size = sizeof(VXHSAIOCB)
+};
+
+/*
+ * This allocates QEMU-VXHS callback for each IO
+ * and is passed to QNIO. When QNIO completes the work,
+ * it will be passed back through the callback.
+ */
+static BlockAIOCB *vxhs_aio_rw(BlockDriverState *bs, int64_t sector_num,
+                               QEMUIOVector *qiov, int nb_sectors,
+                               BlockCompletionFunc *cb, void *opaque, int iodir)
+{
+    VXHSAIOCB *acb = NULL;
+    BDRVVXHSState *s = bs->opaque;
+    size_t size;
+    uint64_t offset;
+    int iio_flags = 0;
+    int ret = 0;
+    uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    acb = qemu_aio_get(&vxhs_aiocb_info, bs, cb, opaque);
+    /*
+     * Setup or initialize VXHSAIOCB.
+     * Every single field should be initialized since
+     * acb will be picked up from the slab without
+     * initializing with zero.
+     */
+    acb->io_offset = offset;
+    acb->size = size;
+    acb->err = 0;
+    acb->qiov = qiov;
+    acb->direction = iodir;
+
+    iio_flags = (IIO_FLAG_DONE | IIO_FLAG_ASYNC);
+
+    switch (iodir) {
+    case VDISK_AIO_WRITE:
+            ret = iio_writev(global_qnio_ctx, rfd, qiov->iov, qiov->niov,
+                             offset, (uint64_t)size, (void *)acb, iio_flags);
+            break;
+    case VDISK_AIO_READ:
+            ret = iio_readv(global_qnio_ctx, rfd, qiov->iov, qiov->niov,
+                            offset, (uint64_t)size, (void *)acb, iio_flags);
+            break;
+    default:
+            trace_vxhs_aio_rw_invalid(iodir);
+            goto errout;
+    }
+
+    if (ret != 0) {
+        trace_vxhs_aio_rw_ioerr(s->vdisk_guid, iodir, size, offset,
+                                acb, ret, errno);
+        goto errout;
+    }
+    return &acb->common;
+
+errout:
+    qemu_aio_unref(acb);
+    return NULL;
+}
+
+static BlockAIOCB *vxhs_aio_readv(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
+                       opaque, VDISK_AIO_READ);
+}
+
+static BlockAIOCB *vxhs_aio_writev(BlockDriverState *bs,
+                                   int64_t sector_num, QEMUIOVector *qiov,
+                                   int nb_sectors,
+                                   BlockCompletionFunc *cb, void *opaque)
+{
+    return vxhs_aio_rw(bs, sector_num, qiov, nb_sectors,
+                       cb, opaque, VDISK_AIO_WRITE);
+}
+
+static void vxhs_close(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    trace_vxhs_close(s->vdisk_guid);
+    close(s->fds[VDISK_FD_READ]);
+    close(s->fds[VDISK_FD_WRITE]);
+
+    /*
+     * Clearing all the event handlers for oflame registered to QEMU
+     */
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
+                       false, NULL, NULL, NULL);
+    g_free(s->vdisk_guid);
+    s->vdisk_guid = NULL;
+    vxhs_qnio_iio_close(s);
+
+    /*
+     * Free the dynamically allocated hostip string
+     */
+    g_free(s->vdisk_hostinfo.hostip);
+    s->vdisk_hostinfo.hostip = NULL;
+    s->vdisk_hostinfo.port = 0;
+}
+
+static int64_t vxhs_get_vdisk_stat(BDRVVXHSState *s)
+{
+    int64_t vdisk_size = -1;
+    int ret = 0;
+    uint32_t rfd = s->vdisk_hostinfo.vdisk_rfd;
+
+    ret = iio_ioctl(global_qnio_ctx, rfd, IOR_VDISK_STAT, &vdisk_size, NULL, 0);
+    if (ret < 0) {
+        trace_vxhs_get_vdisk_stat_err(s->vdisk_guid, ret, errno);
+        return -EIO;
+    }
+
+    trace_vxhs_get_vdisk_stat(s->vdisk_guid, vdisk_size);
+    return vdisk_size;
+}
+
+/*
+ * Returns the size of vDisk in bytes. This is required
+ * by QEMU block upper block layer so that it is visible
+ * to guest.
+ */
+static int64_t vxhs_getlength(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+    int64_t vdisk_size;
+
+    if (s->vdisk_size <= 0) {
+        vdisk_size = vxhs_get_vdisk_stat(s);
+        if (vdisk_size < 0) {
+            return -EIO;
+        }
+        s->vdisk_size = vdisk_size;
+    }
+
+    return s->vdisk_size;
+}
+
+static void vxhs_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fds[VDISK_FD_READ],
+                       false, NULL, NULL, NULL);
+
+}
+
+static void vxhs_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVVXHSState *s = bs->opaque;
+
+    aio_set_fd_handler(new_context, s->fds[VDISK_FD_READ],
+                       false, vxhs_aio_event_reader, NULL, s);
+}
+
+static BlockDriver bdrv_vxhs = {
+    .format_name                  = "vxhs",
+    .protocol_name                = "vxhs",
+    .instance_size                = sizeof(BDRVVXHSState),
+    .bdrv_file_open               = vxhs_open,
+    .bdrv_parse_filename          = vxhs_parse_filename,
+    .bdrv_close                   = vxhs_close,
+    .bdrv_getlength               = vxhs_getlength,
+    .bdrv_aio_readv               = vxhs_aio_readv,
+    .bdrv_aio_writev              = vxhs_aio_writev,
+    .bdrv_detach_aio_context      = vxhs_detach_aio_context,
+    .bdrv_attach_aio_context      = vxhs_attach_aio_context,
+};
+
+static void bdrv_vxhs_init(void)
+{
+    bdrv_register(&bdrv_vxhs);
+}
+
+block_init(bdrv_vxhs_init);
diff --git a/configure b/configure
index 8e10059..eeef5d6 100755
--- a/configure
+++ b/configure
@@ -321,6 +321,7 @@ numa=""
 tcmalloc="no"
 jemalloc="no"
 replication="yes"
+vxhs=""
 
 # parse CC options first
 for opt do
@@ -1162,6 +1163,11 @@ for opt do
   ;;
   --enable-replication) replication="yes"
   ;;
+  --disable-vxhs) vxhs="no"
+  ;;
+  --enable-vxhs) vxhs="yes"
+  ;;
+
   *)
       echo "ERROR: unknown option $opt"
       echo "Try '$0 --help' for more information"
@@ -1394,6 +1400,7 @@ disabled with --disable-FEATURE, default is enabled if available:
   tcmalloc        tcmalloc support
   jemalloc        jemalloc support
   replication     replication support
+  vxhs            Veritas HyperScale vDisk backend support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -4677,6 +4684,33 @@ if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
 fi
 
 ##########################################
+# Veritas HyperScale block driver VxHS
+# Check if libqnio is installed
+
+if test "$vxhs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <stdint.h>
+#include <qnio/qnio_api.h>
+
+void *vxhs_callback;
+
+int main(void) {
+    iio_init(vxhs_callback);
+    return 0;
+}
+EOF
+  vxhs_libs="-lqnio"
+  if compile_prog "" "$vxhs_libs" ; then
+    vxhs=yes
+  else
+    if test "$vxhs" = "yes" ; then
+      feature_not_found "vxhs block device" "Install libqnio. See github"
+    fi
+    vxhs=no
+  fi
+fi
+
+##########################################
 # End of CC checks
 # After here, no more $cc or $ld runs
 
@@ -5042,6 +5076,7 @@ echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
 echo "replication support $replication"
+echo "VxHS block device $vxhs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5650,6 +5685,12 @@ if test "$pthread_setname_np" = "yes" ; then
   echo "CONFIG_PTHREAD_SETNAME_NP=y" >> $config_host_mak
 fi
 
+if test "$vxhs" = "yes" ; then
+  echo "CONFIG_VXHS=y" >> $config_host_mak
+  echo "VXHS_CFLAGS=$vxhs_cflags" >> $config_host_mak
+  echo "VXHS_LIBS=$vxhs_libs" >> $config_host_mak
+fi
+
 if test "$tcg_interpreter" = "yes"; then
   QEMU_INCLUDES="-I\$(SRC_PATH)/tcg/tci $QEMU_INCLUDES"
 elif test "$ARCH" = "sparc64" ; then
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 97b1205..d5407a4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1708,7 +1708,7 @@
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
-            'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
+            'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', 'vxhs',
             'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
             'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw',
 	    'replication', 'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
@@ -2220,6 +2220,21 @@
   'data': { 'filename': 'str' } }
 
 ##
+# @BlockdevOptionsVxHS
+#
+# Driver specific block device options for VxHS
+#
+# @vdisk_id:    UUID of VxHS volume
+#
+# @server:      vxhs server IP, port
+#
+# Since: 2.7
+##
+{ 'struct': 'BlockdevOptionsVxHS',
+  'data': { 'vdisk_id': 'str',
+            'server': 'InetSocketAddress' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
@@ -2283,7 +2298,8 @@
       'vhdx':       'BlockdevOptionsGenericFormat',
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
-      'vvfat':      'BlockdevOptionsVVFAT'
+      'vvfat':      'BlockdevOptionsVVFAT',
+      'vxhs':       'BlockdevOptionsVxHS'
   } }
 
 ##
-- 
2.5.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread