From mboxrd@z Thu Jan  1 00:00:00 1970
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Subject: Re: [PATCH] libxl: new xlu_disk_parse function
Date: Mon, 28 Mar 2011 19:13:38 +0100
Message-ID: <alpine.DEB.2.00.1103281909590.5516@kaball-desktop>
References: <1300988989-18305-1-git-send-email-ian.jackson@eu.citrix.com>
	<1300988989-18305-2-git-send-email-ian.jackson@eu.citrix.com>
	<1301333477.1691.7.camel@qabil.uk.xensource.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="US-ASCII"
Return-path: <xen-devel-bounces@lists.xensource.com>
In-Reply-To: <1301333477.1691.7.camel@qabil.uk.xensource.com>
List-Unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xensource.com>
List-Help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-Subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
Sender: xen-devel-bounces@lists.xensource.com
Errors-To: xen-devel-bounces@lists.xensource.com
To: Gianni Tedesco <gianni.tedesco@citrix.com>
Cc: Ian@lonpmailmx01.citrite.net, "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>, Ian Jackson <Ian.Jackson@eu.citrix.com>, Jackson <ijackson@chiark.greenend.org.uk>
List-Id: xen-devel@lists.xenproject.org

On Mon, 28 Mar 2011, Gianni Tedesco wrote:
> On Thu, 2011-03-24 at 17:49 +0000, Ian Jackson wrote:
> > From: Ian Jackson <ijackson@chiark.greenend.org.uk>
> >
> > Introduce new flex/regexp-based parser for disk configuration strings.
> >
> > Callers will be updated in following patches.
> >
> > Signed-off-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
> > ---
> >  config/StdGNU.mk              |    1 +
> >  tools/libxl/Makefile          |    9 +-
> >  tools/libxl/libxlu_cfg.c      |   42 +++++++---
> >  tools/libxl/libxlu_disk.c     |  164 +++++++++++++++++++++++++++++++++++++++++
> >  tools/libxl/libxlu_disk_i.h   |   19 +++++
> >  tools/libxl/libxlu_disk_l.l   |   54 ++++++++++++++
> >  tools/libxl/libxlu_internal.h |   20 +++++
> >  tools/libxl/libxlutil.h       |   12 +++
> >  8 files changed, 304 insertions(+), 17 deletions(-)
> >  create mode 100644 tools/libxl/libxlu_disk.c
> >  create mode 100644 tools/libxl/libxlu_disk_i.h
> >  create mode 100644 tools/libxl/libxlu_disk_l.l
> >
> > diff --git a/config/StdGNU.mk b/config/StdGNU.mk
> > index 25aeb4d..9b331f0 100644
> > --- a/config/StdGNU.mk
> > +++ b/config/StdGNU.mk
> > @@ -67,6 +67,7 @@ CURSES_LIBS = -lncurses
> >  PTHREAD_LIBS = -lpthread
> >  UTIL_LIBS = -lutil
> >  DLOPEN_LIBS = -ldl
> > +PCRE_LIBS = -lpcre
> >
> >  SONAME_LDFLAG = -soname
> >  SHLIB_LDFLAGS = -shared
> > diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile
> > index a7b1d51..f0c473f 100644
> > --- a/tools/libxl/Makefile
> > +++ b/tools/libxl/Makefile
> > @@ -22,7 +22,7 @@ endif
> >  LIBXL_LIBS =
> >  LIBXL_LIBS = $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(LDLIBS_libblktapctl) $(UTIL_LIBS) $(LIBUUID_LIBS)
> >
> > -LIBXLU_LIBS =
> > +LIBXLU_LIBS = $(PCRE_LIBS)
> >
> >  LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o
> >  ifeq ($(LIBXL_BLKTAP),y)
> > @@ -38,9 +38,10 @@ LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o libxl_pci.o \
> >                         libxl_internal.o libxl_utils.o $(LIBXL_OBJS-y)
> >  LIBXL_OBJS += _libxl_types.o
> >
> > -AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h
> > -AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c
> > -LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o
> > +AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h libxlu_disk_l.h
> > +AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c libxlu_disk_l.c
> > +LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o \
> > +       libxlu_disk_l.o libxlu_disk.o
> >
> >  CLIENTS = xl
> >
> > diff --git a/tools/libxl/libxlu_cfg.c b/tools/libxl/libxlu_cfg.c
> > index f947c21..fd3e102 100644
> > --- a/tools/libxl/libxlu_cfg.c
> > +++ b/tools/libxl/libxlu_cfg.c
> > @@ -15,6 +15,7 @@ XLU_Config *xlu_cfg_init(FILE *report, const char *report_filename) {
> >      if (!cfg->filename) { free(cfg); return 0; }
> >
> >      cfg->settings= 0;
> > +    cfg->disk_re= 0;
> >      return cfg;
> >  }
> >
> > @@ -38,6 +39,29 @@ static int ctx_prep(CfgParseContext *ctx, XLU_Config *cfg) {
> >
> >  static void ctx_dispose(CfgParseContext *ctx) {
> >      if (ctx->scanner) xlu__cfg_yylex_destroy(ctx->scanner);
> > +    if (ctx->disk_re) pcre_free(ctx->disk_re);
> > +}
> > +
> > +int xlu__scanner_string_prep(CfgParseContext *ctx, YY_BUFFER_STATE *buf,
> > +                            XLU_Config *cfg, const char *data, int length) {
> > +    int e;
> > +
> > +    e= ctx_prep(ctx, cfg);
> > +    if (e) return e;
> > +
> > +    *buf = xlu__cfg_yy_scan_bytes(data, length, ctx->scanner);
> > +    if (!*buf) {
> > +        fprintf(cfg->report,"%s: unable to allocate scanner buffer\n",
> > +                cfg->filename);
> > +        return ENOMEM;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +void xlu__scanner_string_dispose(CfgParseContext *ctx, YY_BUFFER_STATE *buf) {
> > +    if (*buf) xlu__cfg_yy_delete_buffer(*buf, ctx->scanner);
> > +    ctx_dispose(ctx);
> >  }
> >
> >  static void parse(CfgParseContext *ctx) {
> > @@ -87,25 +111,17 @@ int xlu_cfg_readfile(XLU_Config *cfg, const char *real_filename) {
> >
> >  int xlu_cfg_readdata(XLU_Config *cfg, const char *data, int length) {
> >      int e;
> > -    YY_BUFFER_STATE buf= 0;
> > -
> > +    YY_BUFFER_STATE buf=0;
> >      CfgParseContext ctx;
> > -    e= ctx_prep(&ctx, cfg);
> > -    if (e) { ctx.err= e; goto xe; }
> > +    ctx.scanner=0;
> >
> > -    buf = xlu__cfg_yy_scan_bytes(data, length, ctx.scanner);
> > -    if (!buf) {
> > -        fprintf(cfg->report,"%s: unable to allocate scanner buffer\n",
> > -                cfg->filename);
> > -        ctx.err= ENOMEM;
> > -        goto xe;
> > -    }
> > +    e = xlu__scanner_string_prep(&ctx,&buf, cfg,data,length);
> > +    if (e) { ctx.err = e; goto xe; }
> >
> >      parse(&ctx);
> >
> >   xe:
> > -    if (buf) xlu__cfg_yy_delete_buffer(buf, ctx.scanner);
> > -    ctx_dispose(&ctx);
> > +    xlu__scanner_string_dispose(&ctx,&buf);
> >
> >      return ctx.err;
> >  }
> > diff --git a/tools/libxl/libxlu_disk.c b/tools/libxl/libxlu_disk.c
> > new file mode 100644
> > index 0000000..be523f7
> > --- /dev/null
> > +++ b/tools/libxl/libxlu_disk.c
> > @@ -0,0 +1,164 @@
> > +
> > +/* Parsing the disk spec a tricky problem, because the target
> > + * string might contain "," which is the delimiter we use for
> > + * stripping off things on the RHS, and ":", which is the delimiter
> > + * we use for stripping off things on the LHS.
> > + *
> > + * For the LHS we use a flex scanner, see libxlu_disk_l.l.
> > + * That chops prefixes like "phy:" from the front of the string,
> > + * accumulating information into the disk structure.
> > + *
> > + * For the RHS, we use a regexp using pcre.
> > + */
> > +
> > +#include "libxlu_internal.h"
> > +#include "libxlu_disk_l.h"
> > +#include "libxlu_disk_i.h"
> > +#include "libxlu_cfg_i.h"
> > +
> > +static void err_core(DiskParseContext *dpc, const char *message,
> > +                    const char *in, int inlen) {
> > +    fprintf(dpc->ctx.cfg->report,
> > +           "%s: config parsing error in disk specification:"
> > +           " %s: near `%s' in `%.*s'\n",
> > +           dpc->ctx.cfg->filename, message, dpc->spec, inlen, in);
> > +    if (!dpc->ctx.err) dpc->ctx.err= EINVAL;
> > +}
> > +
> > +void xlu__disk_err(DiskParseContext *dpc, const char *message) {
> > +    err_core(dpc, message,
> > +            xlu__disk_yyget_text(dpc->ctx.scanner),
> > +            xlu__disk_yyget_leng(dpc->ctx.scanner));
> > +}
> > +
> > +void xlu__disk_rhs(DiskParseContext *dpc) {
> > +
> > +    /* variables used by pcre */
> > +#define WORKSPACE_SIZE 20 /* wtf no manifest constant from pcre */
> > +#define OVECTOR_SIZE 10
> > +    int ovector[OVECTOR_SIZE];
> > +    int workspace[WORKSPACE_SIZE];
> > +
> > +    int rc, len;
> > +    const char *text;
> > +    libxl_device_disk *disk = dpc->disk;
> > +
> > +    static const char *re_text=
> > +       "(.*),(?:/dev/)?([0-9a-zA-Z]*)(:[0-9a-z]+)?(,[0-9a-z]*)?"
> > +       /* 1:target     2:vdev         3:type        4:attrib (r/w) */;
> > +
> > +    /* compile the regexp if we haven't got one already */
> > +    if (!dpc->ctx.disk_re) {
> > +       const char *re_errstr;
> > +       int re_errcode, re_erroffset;
> > +
> > +       dpc->ctx.disk_re = pcre_compile2(re_text, PCRE_DOTALL,
> > +                                        &re_errcode, &re_errstr, &re_erroffset,
> > +                                        0);
> > +       if (!dpc->ctx.disk_re) {
> > +           if (re_errcode == 21 /* wtf, no manifest constant */) {
> > +               dpc->ctx.err = ENOMEM;
> > +               return;
> > +           }
> > +           fprintf(dpc->ctx.cfg->report, "config parsing:"
> > +                   " unable to compile regexp: %s [%d] (at `%s')",
> > +                   re_errstr, re_errcode, re_text+re_erroffset);
> > +           dpc->ctx.err = EIO;
> > +           return;
> > +       }
> > +    }
> > +
> > +    /* actually run the regexp match */
> > +
> > +    text = xlu__disk_yyget_text(dpc->ctx.scanner);
> > +    len = xlu__disk_yyget_leng(dpc->ctx.scanner);
> > +
> > +    rc = pcre_dfa_exec(dpc->ctx.disk_re, 0,
> > +                      text, len, 0,
> > +                      PCRE_ANCHORED,
> > +                      ovector, OVECTOR_SIZE,
> > +                      workspace, WORKSPACE_SIZE);
> > +    switch (rc) {
> > +    case PCRE_ERROR_NOMATCH:
> > +       xlu__disk_err(dpc, "bad syntax for target, or missing vdev");
> > +       return;
> 
>        case 1: ??
> 
> > +    case 2:
> > +    case 3:
> > +    case 4:
> > +       break;
> 
> Or does it belong here? In which case aborting on a parse error is bad
> juju.
>        case 1:
> 
> > +    default:
> > +       abort();
> > +    }
> > +
> > +    /* macros for processing info from capturing parens; see pcreapi(3) */
> > +
> > +#define CAPTURED(cap) (START((cap)) >= 0)
> > +#define START(cap) (ovector[(cap)*2])
> > +#define END(cap)   (ovector[(cap)*2+1])
> > +#define LEN(cap)   (END((cap)) - START((cap)))
> > +#define TEXT(cap)  (text + START((cap)))
> > +
> > +#define STORE(cap, member) do{                                 \
> > +    assert(CAPTURED((cap)));                                   \
> > +    free(disk->member);                                                \
> > +    disk->member = malloc(LEN((cap)) + 1);                     \
> > +    if (!disk->member) { dpc->ctx.err = ENOMEM; return; }      \
> > +    memcpy(disk->member, TEXT((cap)), LEN((cap)));             \
> > +    disk->member[LEN((cap))] = 0;                              \
> > +}while(0)
> > +
> > +#define EXACTLY(cap, string)                           \
> > +    (CAPTURED((cap)) &&                                        \
> > +     LEN((cap))==strlen((string)) &&                   \
> > +     !memcmp(TEXT((cap)), (string), LEN((cap))))
> > +
> > +#define ERR(cap, msg) \
> > +    err_core(dpc, (msg), TEXT((cap)), LEN((cap)))
> > +
> > +    /* actually process the four capturing parens in our regexp */
> > +
> > +    STORE(1, pdev_path);
> > +
> > +    STORE(2, vdev);
> > +
> > +    if (!CAPTURED(3) || EXACTLY(3, ":")) {
> > +       disk->is_cdrom = 0;
> > +    } else if (EXACTLY(3, ":cdrom")) {
> > +       disk->is_cdrom = 1;
> > +    } else {
> > +       ERR(3, "unknown type (only `:' and `:cdrom' supported)");
> > +    }
> > +
> > +    if (!CAPTURED(4) || EXACTLY(4, ",w") || EXACTLY(4, ",")) {
> > +       disk->readwrite = 1;
> > +    } else if (EXACTLY(4, ",r")) {
> > +       disk->readwrite = 0;
> > +    } else {
> > +       ERR(4, "unknown read/write attribute");
> > +    }
> > +}
> 
> Hmm, I'm not sure this is nicer than the code it's replacing, it's
> certainly a lot longer. I don't like the idea of it being full of things
> comparing for ",w" or ":cdrom" etc, rather than tokenising it fully and
> evaluating what the tokens are separately.

I definitely agree.
Besides when doing refactoring the code produced should be either shorter or
at least easier to read but this code is neither of them.