From mboxrd@z Thu Jan 1 00:00:00 1970 From: Stefano Stabellini Subject: Re: [PATCH] libxl: new xlu_disk_parse function Date: Mon, 28 Mar 2011 19:13:38 +0100 Message-ID: References: <1300988989-18305-1-git-send-email-ian.jackson@eu.citrix.com> <1300988989-18305-2-git-send-email-ian.jackson@eu.citrix.com> <1301333477.1691.7.camel@qabil.uk.xensource.com> Mime-Version: 1.0 Content-Type: text/plain; charset="US-ASCII" Return-path: In-Reply-To: <1301333477.1691.7.camel@qabil.uk.xensource.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Gianni Tedesco Cc: Ian@lonpmailmx01.citrite.net, "xen-devel@lists.xensource.com" , Ian Jackson , Jackson List-Id: xen-devel@lists.xenproject.org On Mon, 28 Mar 2011, Gianni Tedesco wrote: > On Thu, 2011-03-24 at 17:49 +0000, Ian Jackson wrote: > > From: Ian Jackson > > > > Introduce new flex/regexp-based parser for disk configuration strings. > > > > Callers will be updated in following patches. > > > > Signed-off-by: Ian Jackson > > --- > > config/StdGNU.mk | 1 + > > tools/libxl/Makefile | 9 +- > > tools/libxl/libxlu_cfg.c | 42 +++++++--- > > tools/libxl/libxlu_disk.c | 164 +++++++++++++++++++++++++++++++++++++++++ > > tools/libxl/libxlu_disk_i.h | 19 +++++ > > tools/libxl/libxlu_disk_l.l | 54 ++++++++++++++ > > tools/libxl/libxlu_internal.h | 20 +++++ > > tools/libxl/libxlutil.h | 12 +++ > > 8 files changed, 304 insertions(+), 17 deletions(-) > > create mode 100644 tools/libxl/libxlu_disk.c > > create mode 100644 tools/libxl/libxlu_disk_i.h > > create mode 100644 tools/libxl/libxlu_disk_l.l > > > > diff --git a/config/StdGNU.mk b/config/StdGNU.mk > > index 25aeb4d..9b331f0 100644 > > --- a/config/StdGNU.mk > > +++ b/config/StdGNU.mk > > @@ -67,6 +67,7 @@ CURSES_LIBS = -lncurses > > PTHREAD_LIBS = -lpthread > > UTIL_LIBS = -lutil > > DLOPEN_LIBS = -ldl > > +PCRE_LIBS = -lpcre > > > > SONAME_LDFLAG = -soname > > SHLIB_LDFLAGS = -shared > > diff --git a/tools/libxl/Makefile b/tools/libxl/Makefile > > index a7b1d51..f0c473f 100644 > > --- a/tools/libxl/Makefile > > +++ b/tools/libxl/Makefile > > @@ -22,7 +22,7 @@ endif > > LIBXL_LIBS = > > LIBXL_LIBS = $(LDLIBS_libxenctrl) $(LDLIBS_libxenguest) $(LDLIBS_libxenstore) $(LDLIBS_libblktapctl) $(UTIL_LIBS) $(LIBUUID_LIBS) > > > > -LIBXLU_LIBS = > > +LIBXLU_LIBS = $(PCRE_LIBS) > > > > LIBXL_OBJS-y = osdeps.o libxl_paths.o libxl_bootloader.o flexarray.o > > ifeq ($(LIBXL_BLKTAP),y) > > @@ -38,9 +38,10 @@ LIBXL_OBJS = flexarray.o libxl.o libxl_create.o libxl_dm.o libxl_pci.o \ > > libxl_internal.o libxl_utils.o $(LIBXL_OBJS-y) > > LIBXL_OBJS += _libxl_types.o > > > > -AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h > > -AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c > > -LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o > > +AUTOINCS= libxlu_cfg_y.h libxlu_cfg_l.h libxlu_disk_l.h > > +AUTOSRCS= libxlu_cfg_y.c libxlu_cfg_l.c libxlu_disk_l.c > > +LIBXLU_OBJS = libxlu_cfg_y.o libxlu_cfg_l.o libxlu_cfg.o \ > > + libxlu_disk_l.o libxlu_disk.o > > > > CLIENTS = xl > > > > diff --git a/tools/libxl/libxlu_cfg.c b/tools/libxl/libxlu_cfg.c > > index f947c21..fd3e102 100644 > > --- a/tools/libxl/libxlu_cfg.c > > +++ b/tools/libxl/libxlu_cfg.c > > @@ -15,6 +15,7 @@ XLU_Config *xlu_cfg_init(FILE *report, const char *report_filename) { > > if (!cfg->filename) { free(cfg); return 0; } > > > > cfg->settings= 0; > > + cfg->disk_re= 0; > > return cfg; > > } > > > > @@ -38,6 +39,29 @@ static int ctx_prep(CfgParseContext *ctx, XLU_Config *cfg) { > > > > static void ctx_dispose(CfgParseContext *ctx) { > > if (ctx->scanner) xlu__cfg_yylex_destroy(ctx->scanner); > > + if (ctx->disk_re) pcre_free(ctx->disk_re); > > +} > > + > > +int xlu__scanner_string_prep(CfgParseContext *ctx, YY_BUFFER_STATE *buf, > > + XLU_Config *cfg, const char *data, int length) { > > + int e; > > + > > + e= ctx_prep(ctx, cfg); > > + if (e) return e; > > + > > + *buf = xlu__cfg_yy_scan_bytes(data, length, ctx->scanner); > > + if (!*buf) { > > + fprintf(cfg->report,"%s: unable to allocate scanner buffer\n", > > + cfg->filename); > > + return ENOMEM; > > + } > > + > > + return 0; > > +} > > + > > +void xlu__scanner_string_dispose(CfgParseContext *ctx, YY_BUFFER_STATE *buf) { > > + if (*buf) xlu__cfg_yy_delete_buffer(*buf, ctx->scanner); > > + ctx_dispose(ctx); > > } > > > > static void parse(CfgParseContext *ctx) { > > @@ -87,25 +111,17 @@ int xlu_cfg_readfile(XLU_Config *cfg, const char *real_filename) { > > > > int xlu_cfg_readdata(XLU_Config *cfg, const char *data, int length) { > > int e; > > - YY_BUFFER_STATE buf= 0; > > - > > + YY_BUFFER_STATE buf=0; > > CfgParseContext ctx; > > - e= ctx_prep(&ctx, cfg); > > - if (e) { ctx.err= e; goto xe; } > > + ctx.scanner=0; > > > > - buf = xlu__cfg_yy_scan_bytes(data, length, ctx.scanner); > > - if (!buf) { > > - fprintf(cfg->report,"%s: unable to allocate scanner buffer\n", > > - cfg->filename); > > - ctx.err= ENOMEM; > > - goto xe; > > - } > > + e = xlu__scanner_string_prep(&ctx,&buf, cfg,data,length); > > + if (e) { ctx.err = e; goto xe; } > > > > parse(&ctx); > > > > xe: > > - if (buf) xlu__cfg_yy_delete_buffer(buf, ctx.scanner); > > - ctx_dispose(&ctx); > > + xlu__scanner_string_dispose(&ctx,&buf); > > > > return ctx.err; > > } > > diff --git a/tools/libxl/libxlu_disk.c b/tools/libxl/libxlu_disk.c > > new file mode 100644 > > index 0000000..be523f7 > > --- /dev/null > > +++ b/tools/libxl/libxlu_disk.c > > @@ -0,0 +1,164 @@ > > + > > +/* Parsing the disk spec a tricky problem, because the target > > + * string might contain "," which is the delimiter we use for > > + * stripping off things on the RHS, and ":", which is the delimiter > > + * we use for stripping off things on the LHS. > > + * > > + * For the LHS we use a flex scanner, see libxlu_disk_l.l. > > + * That chops prefixes like "phy:" from the front of the string, > > + * accumulating information into the disk structure. > > + * > > + * For the RHS, we use a regexp using pcre. > > + */ > > + > > +#include "libxlu_internal.h" > > +#include "libxlu_disk_l.h" > > +#include "libxlu_disk_i.h" > > +#include "libxlu_cfg_i.h" > > + > > +static void err_core(DiskParseContext *dpc, const char *message, > > + const char *in, int inlen) { > > + fprintf(dpc->ctx.cfg->report, > > + "%s: config parsing error in disk specification:" > > + " %s: near `%s' in `%.*s'\n", > > + dpc->ctx.cfg->filename, message, dpc->spec, inlen, in); > > + if (!dpc->ctx.err) dpc->ctx.err= EINVAL; > > +} > > + > > +void xlu__disk_err(DiskParseContext *dpc, const char *message) { > > + err_core(dpc, message, > > + xlu__disk_yyget_text(dpc->ctx.scanner), > > + xlu__disk_yyget_leng(dpc->ctx.scanner)); > > +} > > + > > +void xlu__disk_rhs(DiskParseContext *dpc) { > > + > > + /* variables used by pcre */ > > +#define WORKSPACE_SIZE 20 /* wtf no manifest constant from pcre */ > > +#define OVECTOR_SIZE 10 > > + int ovector[OVECTOR_SIZE]; > > + int workspace[WORKSPACE_SIZE]; > > + > > + int rc, len; > > + const char *text; > > + libxl_device_disk *disk = dpc->disk; > > + > > + static const char *re_text= > > + "(.*),(?:/dev/)?([0-9a-zA-Z]*)(:[0-9a-z]+)?(,[0-9a-z]*)?" > > + /* 1:target 2:vdev 3:type 4:attrib (r/w) */; > > + > > + /* compile the regexp if we haven't got one already */ > > + if (!dpc->ctx.disk_re) { > > + const char *re_errstr; > > + int re_errcode, re_erroffset; > > + > > + dpc->ctx.disk_re = pcre_compile2(re_text, PCRE_DOTALL, > > + &re_errcode, &re_errstr, &re_erroffset, > > + 0); > > + if (!dpc->ctx.disk_re) { > > + if (re_errcode == 21 /* wtf, no manifest constant */) { > > + dpc->ctx.err = ENOMEM; > > + return; > > + } > > + fprintf(dpc->ctx.cfg->report, "config parsing:" > > + " unable to compile regexp: %s [%d] (at `%s')", > > + re_errstr, re_errcode, re_text+re_erroffset); > > + dpc->ctx.err = EIO; > > + return; > > + } > > + } > > + > > + /* actually run the regexp match */ > > + > > + text = xlu__disk_yyget_text(dpc->ctx.scanner); > > + len = xlu__disk_yyget_leng(dpc->ctx.scanner); > > + > > + rc = pcre_dfa_exec(dpc->ctx.disk_re, 0, > > + text, len, 0, > > + PCRE_ANCHORED, > > + ovector, OVECTOR_SIZE, > > + workspace, WORKSPACE_SIZE); > > + switch (rc) { > > + case PCRE_ERROR_NOMATCH: > > + xlu__disk_err(dpc, "bad syntax for target, or missing vdev"); > > + return; > > case 1: ?? > > > + case 2: > > + case 3: > > + case 4: > > + break; > > Or does it belong here? In which case aborting on a parse error is bad > juju. > case 1: > > > + default: > > + abort(); > > + } > > + > > + /* macros for processing info from capturing parens; see pcreapi(3) */ > > + > > +#define CAPTURED(cap) (START((cap)) >= 0) > > +#define START(cap) (ovector[(cap)*2]) > > +#define END(cap) (ovector[(cap)*2+1]) > > +#define LEN(cap) (END((cap)) - START((cap))) > > +#define TEXT(cap) (text + START((cap))) > > + > > +#define STORE(cap, member) do{ \ > > + assert(CAPTURED((cap))); \ > > + free(disk->member); \ > > + disk->member = malloc(LEN((cap)) + 1); \ > > + if (!disk->member) { dpc->ctx.err = ENOMEM; return; } \ > > + memcpy(disk->member, TEXT((cap)), LEN((cap))); \ > > + disk->member[LEN((cap))] = 0; \ > > +}while(0) > > + > > +#define EXACTLY(cap, string) \ > > + (CAPTURED((cap)) && \ > > + LEN((cap))==strlen((string)) && \ > > + !memcmp(TEXT((cap)), (string), LEN((cap)))) > > + > > +#define ERR(cap, msg) \ > > + err_core(dpc, (msg), TEXT((cap)), LEN((cap))) > > + > > + /* actually process the four capturing parens in our regexp */ > > + > > + STORE(1, pdev_path); > > + > > + STORE(2, vdev); > > + > > + if (!CAPTURED(3) || EXACTLY(3, ":")) { > > + disk->is_cdrom = 0; > > + } else if (EXACTLY(3, ":cdrom")) { > > + disk->is_cdrom = 1; > > + } else { > > + ERR(3, "unknown type (only `:' and `:cdrom' supported)"); > > + } > > + > > + if (!CAPTURED(4) || EXACTLY(4, ",w") || EXACTLY(4, ",")) { > > + disk->readwrite = 1; > > + } else if (EXACTLY(4, ",r")) { > > + disk->readwrite = 0; > > + } else { > > + ERR(4, "unknown read/write attribute"); > > + } > > +} > > Hmm, I'm not sure this is nicer than the code it's replacing, it's > certainly a lot longer. I don't like the idea of it being full of things > comparing for ",w" or ":cdrom" etc, rather than tokenising it fully and > evaluating what the tokens are separately. I definitely agree. Besides when doing refactoring the code produced should be either shorter or at least easier to read but this code is neither of them.