From mboxrd@z Thu Jan 1 00:00:00 1970 Subject: Re: [PATCH] libselinux: add support for pcre2 To: Janis Danisevskis , selinux@tycho.nsa.gov, seandroid-list@tycho.nsa.gov, jwcart2@tycho.nsa.gov References: <1473349947-11952-1-git-send-email-jdanis@android.com> From: Stephen Smalley Message-ID: Date: Thu, 8 Sep 2016 13:51:45 -0400 MIME-Version: 1.0 In-Reply-To: <1473349947-11952-1-git-send-email-jdanis@android.com> Content-Type: text/plain; charset=utf-8 List-Id: "Security-Enhanced Linux \(SELinux\) mailing list" List-Post: List-Help: On 09/08/2016 11:52 AM, Janis Danisevskis wrote: > From: Janis Danisevskis > > This patch moves all pcre1/2 dependencies into the new files regex.h > and regex.c implementing the common denominator of features needed > by libselinux. The compiler flag -DUSE_PCRE2 toggles between the > used implementations. > > As of this patch libselinux supports either pcre or pcre2 but not > both at the same time. The persistently stored file contexts > information differs. This means libselinux can only load file > context files generated by sefcontext_compile build with the > same pcre variant. > > Also, for pcre2 the persistent format is architecture dependent. > Stored precompiled regular expressions can only be used on the > same architecture they were generated on. If pcre2 is used and > sefcontext_compile shall generate portable output, it and libselinux > must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the > cost of having to recompile the regular expressions at load time. > > Signed-off-by: Janis Danisevskis > > This patch includes includes: > > libselinux: fix memory leak on pcre2 > > Introduced a malloc on pcre_version(). Libselinux > expected this to be static, just use a static > internal buffer. > > Signed-off-by: William Roberts > --- > libselinux/Makefile | 13 + > libselinux/src/Makefile | 4 +- > libselinux/src/label_file.c | 93 ++----- > libselinux/src/label_file.h | 59 ++--- > libselinux/src/regex.c | 461 ++++++++++++++++++++++++++++++++++ > libselinux/src/regex.h | 169 +++++++++++++ > libselinux/utils/Makefile | 4 +- > libselinux/utils/sefcontext_compile.c | 55 +--- > 8 files changed, 697 insertions(+), 161 deletions(-) > create mode 100644 libselinux/src/regex.c > create mode 100644 libselinux/src/regex.h > > diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile > index 37d01af..66687e6 100644 > --- a/libselinux/src/Makefile > +++ b/libselinux/src/Makefile > @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi > -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \ > -Werror -Wno-aggregate-return -Wno-redundant-decls > > -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS) > > SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \ > -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations > @@ -113,7 +113,7 @@ $(LIBA): $(OBJS) > $(RANLIB) $@ > > $(LIBSO): $(LOBJS) > - $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro > + $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro > ln -sf $@ $(TARGET) > > $(LIBPC): $(LIBPC).in ../VERSION We want make to still work in this subdirectory, so we need defaults assigned to PCRE_CFLAGS and PCRE_LDFLAGS if not set by the caller. > diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h > index 6d1e890..24cb9e0 100644 > --- a/libselinux/src/label_file.h > +++ b/libselinux/src/label_file.h > @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec, > struct saved_data *data = (struct saved_data *)rec->data; > struct spec *spec_arr; > unsigned int nspec = data->nspec; > - const char *errbuf = NULL; > + char const *errbuf; Unnecessary change? > + struct regex_error_data error_data; > > items = read_spec_entries(line_buf, &errbuf, 3, ®ex, &type, &context); > if (items < 0) { > @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec, > data->nspec++; > > if (rec->validating && > - compile_regex(data, &spec_arr[nspec], &errbuf)) { > + compile_regex(data, &spec_arr[nspec], &error_data)) { Same bug as before - errbuf was being used in the following log call, so you need to change it to use regex_format_error(). > COMPAT_LOG(SELINUX_ERROR, > "%s: line %u has invalid regex %s: %s\n", > path, lineno, regex, > diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c > new file mode 100644 > index 0000000..558a72a > --- /dev/null > +++ b/libselinux/src/regex.c > @@ -0,0 +1,461 @@ > +#include > +#include > +#include > +#include > + > +#include "regex.h" > +#include "label_file.h" > + > +#ifdef USE_PCRE2 > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { All of the non-static functions in this file need to be marked with "hidden" so that they are not part of the shared library ABI. Preferred coding style is Linux kernel, so opening bracket goes on a line by itself for functions. > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { > + int rc; > + size_t entry_len; As before, this needs to be uint32_t or it will be the wrong size on 64-bit systems and the subsequent next_entry() call won't fully initialize it. > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0) > + return -1; > + > + if (entry_len) { > + /* > + * this should yield exactly one because we store one pattern at a time > + */ > + rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr); > + if (rc != 1) > + return -1; > + > + *regex = regex_data_create(); > + if (!*regex) > + return -1; > + > + rc = pcre2_serialize_decode(&(*regex)->regex, 1, > + (PCRE2_SPTR)mmap_area->next_addr, NULL); > + if (rc != 1) > + goto err; > + > + (*regex)->match_data = > + pcre2_match_data_create_from_pattern((*regex)->regex, NULL); > + if (!(*regex)->match_data) > + goto err; > + } > + > + /* and skip the decoded bit */ > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + return 0; > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc = 0; > + size_t len; > + PCRE2_SIZE to_write; > + PCRE2_UCHAR * bytes = NULL; > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS > + int do_write_patterns = 1; > +#else > + int do_write_patterns = 0; > +#endif > + > + if (do_write_patterns) { > + /* encode the patter for serialization */ > + rc = pcre2_serialize_encode((const pcre2_code **)®ex->regex, 1, > + &bytes, &to_write, NULL); > + if (rc != 1) { > + rc = -1; > + goto err; > + } > + } else { > + to_write = 0; > + } > + > + /* write serialized pattern's size */ > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); We need a uint32_t variable here, not PCRE2_SIZE which may be larger. > + if (len != 1) { > + rc = -1; > + goto err; > + } > + > + if (do_write_patterns) { > + /* write serialized pattern */ > + len = fwrite(bytes, 1, to_write, fp); > + if (len != to_write) { > + rc = -1; > + } In general you don't need { } for a single statement except where it might otherwise be hard to read (e.g. helpful for the else statement above, but not really here or in several other cases below). > + } > + > +err: > + if (bytes) > + pcre2_serialize_free(bytes); > + > + return rc; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->regex) { > + pcre2_code_free(regex->regex); > + } > + if (regex->match_data) { > + pcre2_match_data_free(regex->match_data); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + rc = pcre2_match(regex->regex, > + (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0, > + partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, > + NULL); > + if (rc > 0) > + return REGEX_MATCH; Indentation problem. > + switch (rc) { > + case PCRE2_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE2_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually compares the > + * regular expressions. > + * This compare function basically just compares the binary representations of > + * the automatons, and because this representation contains pointers and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#else // !USE_PCRE2 > + > +int regex_prepare_data(struct regex_data ** regex, > + char const * pattern_string, > + struct regex_error_data * errordata) { > + memset(errordata, 0, sizeof(struct regex_error_data)); > + > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL, > + &errordata->error_buffer, > + &errordata->error_offset, NULL); > + if (!(*regex)->regex) { > + goto err; > + } > + (*regex)->owned = 1; > + > + (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer); > + if (!(*regex)->sd && errordata->error_buffer) { > + goto err; > + } > + > + return 0; > + > +err: regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +char const * regex_version(void) { > + return pcre_version(); > +} > + > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) { > + int rc; > + size_t entry_len, info_len; Same as before: entry_len needs to be uint32_t. > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + return -1; > + } > + *regex = regex_data_create(); > + if (!(*regex)) > + return -1; > + > + (*regex)->owned = 0; > + (*regex)->regex = (pcre *) mmap_area->next_addr; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* > + * Check that regex lengths match. pcre_fullinfo() > + * also validates its magic number. > + */ > + rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len); > + if (rc < 0 || info_len != entry_len) { > + goto err; > + } > + > + rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); > + if (rc < 0 || !entry_len) { > + goto err; > + } > + > + if (entry_len) { > + (*regex)->lsd.study_data = (void *) mmap_area->next_addr; > + (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA; > + rc = next_entry(NULL, mmap_area, entry_len); > + if (rc < 0) > + goto err; > + > + /* Check that study data lengths match. */ > + rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd, > + PCRE_INFO_STUDYSIZE, &info_len); > + if (rc < 0 || info_len != entry_len) > + goto err; > + } > + return 0; > + > +err: > + regex_data_free(*regex); > + *regex = NULL; > + return -1; > +} > + > +int regex_writef(struct regex_data * regex, FILE * fp) { > + int rc; > + size_t len; > + uint32_t to_write; > + size_t size; > + pcre_extra * sd = regex->owned ? regex->sd : > + (regex->lsd.study_data ? ®ex->lsd : NULL); Recommend defining and using an inline function for the above and the similar, inconsistent case in regex_match() below. Prior to this patch, the equivalent was get_pcre_extra(). > + > + /* determine the size of the pcre data in bytes */ > + rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size); > + if (rc < 0) > + return -1; > + > + /* write the number of bytes in the pcre data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + /* write the actual pcre data as a char array */ > + len = fwrite(regex->regex, 1, to_write, fp); > + if (len != to_write) > + return -1; > + > + if (sd) { > + /* determine the size of the pcre study info */ > + rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, > + &size); > + if (rc < 0) > + return -1; > + } else > + size = 0; > + > + /* write the number of bytes in the pcre study data */ > + to_write = size; > + len = fwrite(&to_write, sizeof(uint32_t), 1, fp); > + if (len != 1) > + return -1; > + > + if (sd) { > + /* write the actual pcre study data as a char array */ > + len = fwrite(sd->study_data, 1, to_write, fp); > + if (len != to_write) > + return -1; > + } > + > + return 0; > +} > + > +void regex_data_free(struct regex_data * regex) { > + if (regex) { > + if (regex->owned) { > + if (regex->regex) > + pcre_free(regex->regex); > + if (regex->sd) > + pcre_free_study(regex->sd); > + } > + free(regex); > + } > +} > + > +int regex_match(struct regex_data * regex, char const * subject, int partial) { > + int rc; > + > + rc = pcre_exec(regex->regex, regex->owned ? regex->sd : ®ex->lsd, get_pcre_extra() > + subject, strlen(subject), 0, > + partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0); > + switch (rc) { > + case 0: > + return REGEX_MATCH; > + case PCRE_ERROR_PARTIAL: > + return REGEX_MATCH_PARTIAL; > + case PCRE_ERROR_NOMATCH: > + return REGEX_NO_MATCH; > + default: > + return REGEX_ERROR; > + } > +} > + > +/* > + * TODO Replace this compare function with something that actually compares the > + * regular expressions. > + * This compare function basically just compares the binary representations of > + * the automatons, and because this representation contains pointers and > + * metadata, it can only return a match if regex1 == regex2. > + * Preferably, this function would be replaced with an algorithm that computes > + * the equivalence of the automatons systematically. > + */ > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) { > + int rc; > + size_t len1, len2; > + rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1); > + assert(rc == 0); > + rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2); > + assert(rc == 0); > + if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1)) > + return SELABEL_INCOMPARABLE; > + > + return SELABEL_EQUAL; > +} > + > +#endif > + > + > +struct regex_data * regex_data_create(void) { > + struct regex_data * dummy = (struct regex_data*) malloc( > + sizeof(struct regex_data)); > + if (dummy) { > + memset(dummy, 0, sizeof(struct regex_data)); > + } Just use calloc()? > + return dummy; > +} > +