All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] libselinux: add support for pcre2
@ 2016-09-15 16:14 Janis Danisevskis
  2016-09-15 17:53 ` Stephen Smalley
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-15 16:14 UTC (permalink / raw)
  To: selinux, seandroid-list, sds, jwcart2

From: Janis Danisevskis <jdanis@google.com>

This patch moves all pcre1/2 dependencies into the new files regex.h
and regex.c implementing the common denominator of features needed
by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
used implementations.

As of this patch libselinux supports either pcre or pcre2 but not
both at the same time. The persistently stored file contexts
information differs. This means libselinux can only load file
context files generated by sefcontext_compile build with the
same pcre variant.

Also, for pcre2 the persistent format is architecture dependent.
Stored precompiled regular expressions can only be used on the
same architecture they were generated on. If pcre2 is used,
sefcontext_compile now respects the "-r". This flag makes
sefcontext_compile include the precompiled regular expressions
in the output file. The default is to omit them, so that the
output remains portable at the cost of having to recompile
the regular expressions at load time, or rather on first use.

Signed-off-by: Janis Danisevskis <jdanis@google.com>
---
 libselinux/Makefile                   |   9 +
 libselinux/src/Makefile               |   6 +-
 libselinux/src/label_file.c           |  92 ++-----
 libselinux/src/label_file.h           |  72 ++---
 libselinux/src/regex.c                | 496 ++++++++++++++++++++++++++++++++++
 libselinux/src/regex.h                | 149 ++++++++++
 libselinux/utils/Makefile             |   7 +-
 libselinux/utils/sefcontext_compile.c |  72 ++---
 policycoreutils/restorecond/Makefile  |   9 +-
 9 files changed, 745 insertions(+), 167 deletions(-)
 create mode 100644 libselinux/src/regex.c
 create mode 100644 libselinux/src/regex.h

diff --git a/libselinux/Makefile b/libselinux/Makefile
index 6142b60..5a8d42c 100644
--- a/libselinux/Makefile
+++ b/libselinux/Makefile
@@ -24,6 +24,15 @@ ifeq ($(DISABLE_SETRANS),y)
 endif
 export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
 
+USE_PCRE2 ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+export PCRE_CFLAGS PCRE_LDFLAGS
+
 all install relabel clean distclean indent:
 	@for subdir in $(SUBDIRS); do \
 		(cd $$subdir && $(MAKE) $@) || exit 1; \
diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
index 37d01af..36e42b8 100644
--- a/libselinux/src/Makefile
+++ b/libselinux/src/Makefile
@@ -74,7 +74,9 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
 
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+PCRE_LDFLAGS ?= -lpcre
+
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 
 SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
 		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
@@ -113,7 +115,7 @@ $(LIBA): $(OBJS)
 	$(RANLIB) $@
 
 $(LIBSO): $(LOBJS)
-	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
+	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
 	ln -sf $@ $(TARGET) 
 
 $(LIBPC): $(LIBPC).in ../VERSION
diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 9faecdb..9a67aa2 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <limits.h>
 #include <stdint.h>
-#include <pcre.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -126,6 +125,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 	struct mmap_area *mmap_area;
 	uint32_t i, magic, version;
 	uint32_t entry_len, stem_map_len, regex_array_len;
+	const char *reg_version;
 
 	mmap_area = malloc(sizeof(*mmap_area));
 	if (!mmap_area) {
@@ -155,8 +155,13 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 	if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS)
 		return -1;
 
+	reg_version = regex_version();
+	if (!reg_version)
+		return -1;
+
 	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
-		len = strlen(pcre_version());
+
+		len = strlen(reg_version);
 
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
 		if (rc < 0)
@@ -166,7 +171,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 		if (len != entry_len)
 			return -1;
 
-		/* Check if pcre version mismatch */
+		/* Check if regex version mismatch */
 		str_buf = malloc(entry_len + 1);
 		if (!str_buf)
 			return -1;
@@ -178,7 +183,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 		}
 
 		str_buf[entry_len] = '\0';
-		if ((strcmp(str_buf, pcre_version()) != 0)) {
+		if ((strcmp(str_buf, reg_version) != 0)) {
 			free(str_buf);
 			return -1;
 		}
@@ -258,7 +263,6 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 
 		spec = &data->spec_arr[data->nspec];
 		spec->from_mmap = 1;
-		spec->regcomp = 1;
 
 		/* Process context */
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
@@ -345,47 +349,10 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 			spec->prefix_len = prefix_len;
 		}
 
-		/* Process regex and study_data entries */
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto out;
-		}
-		spec->regex = (pcre *)mmap_area->next_addr;
-		rc = next_entry(NULL, mmap_area, entry_len);
+		rc = regex_load_mmap(mmap_area, &spec->regex);
 		if (rc < 0)
 			goto out;
 
-		/* Check that regex lengths match. pcre_fullinfo()
-		 * also validates its magic number. */
-		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
-		if (rc < 0 || len != entry_len) {
-			rc = -1;
-			goto out;
-		}
-
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto out;
-		}
-
-		if (entry_len) {
-			spec->lsd.study_data = (void *)mmap_area->next_addr;
-			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
-			rc = next_entry(NULL, mmap_area, entry_len);
-			if (rc < 0)
-				goto out;
-
-			/* Check that study data lengths match. */
-			rc = pcre_fullinfo(spec->regex, &spec->lsd,
-					   PCRE_INFO_STUDYSIZE, &len);
-			if (rc < 0 || len != entry_len) {
-				rc = -1;
-				goto out;
-			}
-		}
-
 		data->nspec++;
 	}
 
@@ -630,14 +597,11 @@ static void closef(struct selabel_handle *rec)
 		spec = &data->spec_arr[i];
 		free(spec->lr.ctx_trans);
 		free(spec->lr.ctx_raw);
+		regex_data_free(spec->regex);
 		if (spec->from_mmap)
 			continue;
 		free(spec->regex_str);
 		free(spec->type_str);
-		if (spec->regcomp) {
-			pcre_free(spec->regex);
-			pcre_free_study(spec->sd);
-		}
 	}
 
 	for (i = 0; i < (unsigned int)data->num_stems; i++) {
@@ -669,7 +633,7 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 {
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr = data->spec_arr;
-	int i, rc, file_stem, pcre_options = 0;
+	int i, rc, file_stem;
 	mode_t mode = (mode_t)type;
 	const char *buf;
 	struct spec *ret = NULL;
@@ -702,9 +666,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 	file_stem = find_stem_from_file(data, &buf);
 	mode &= S_IFMT;
 
-	if (partial)
-		pcre_options |= PCRE_PARTIAL_SOFT;
-
 	/*
 	 * Check for matching specifications in reverse order, so that
 	 * the last matching specification is used.
@@ -720,22 +681,16 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 			if (compile_regex(data, spec, NULL) < 0)
 				goto finish;
 			if (spec->stem_id == -1)
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    key, strlen(key), 0,
-						    pcre_options, NULL, 0);
+				rc = regex_match(spec->regex, key, partial);
 			else
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    buf, strlen(buf), 0,
-						    pcre_options, NULL, 0);
-			if (rc == 0) {
+				rc = regex_match(spec->regex, buf, partial);
+			if (rc == REGEX_MATCH) {
 				spec->matches++;
 				break;
-			} else if (partial && rc == PCRE_ERROR_PARTIAL)
+			} else if (partial && rc == REGEX_MATCH_PARTIAL)
 				break;
 
-			if (rc == PCRE_ERROR_NOMATCH)
+			if (rc == REGEX_NO_MATCH)
 				continue;
 
 			errno = ENOENT;
@@ -874,17 +829,10 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
 			continue;
 		}
 
-		if (spec1->regcomp && spec2->regcomp) {
-			size_t len1, len2;
-			int rc;
-
-			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
-			assert(rc == 0);
-			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
-			assert(rc == 0);
-			if (len1 != len2 ||
-			    memcmp(spec1->regex, spec2->regex, len1))
+		if (spec1->regex && spec2->regex) {
+			if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
 				return incomp(spec1, spec2, "regex", i, j);
+			}
 		} else {
 			if (strcmp(spec1->regex_str, spec2->regex_str))
 				return incomp(spec1, spec2, "regex_str", i, j);
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index 6d1e890..88f4294 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -6,6 +6,14 @@
 
 #include <sys/stat.h>
 
+/*
+ * regex.h/c were introduced to hold all dependencies on the regular
+ * expression back-end when we started supporting PCRE2. regex.h defines a
+ * minimal interface required by libselinux, so that the remaining code
+ * can be agnostic about the underlying implementation.
+ */
+#include "regex.h"
+
 #include "callbacks.h"
 #include "label_internal.h"
 
@@ -19,26 +27,16 @@
 
 #define SELINUX_COMPILED_FCONTEXT_MAX_VERS	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
 
-/* Prior to version 8.20, libpcre did not have pcre_free_study() */
-#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
-#define pcre_free_study  pcre_free
-#endif
-
 /* A file security context specification. */
 struct spec {
 	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
 	char *regex_str;	/* regular expession string for diagnostics */
 	char *type_str;		/* type string for diagnostic messages */
-	pcre *regex;		/* compiled regular expression */
-	union {
-		pcre_extra *sd;	/* pointer to extra compiled stuff */
-		pcre_extra lsd;	/* used to hold the mmap'd version */
-	};
+	struct regex_data * regex; /* backend dependent regular expression data */
 	mode_t mode;		/* mode format value */
 	int matches;		/* number of matching pathnames */
 	int stem_id;		/* indicates which stem-compression item */
 	char hasMetaChars;	/* regular expression has meta-chars */
-	char regcomp;		/* regex_str has been compiled to regex */
 	char from_mmap;		/* this spec is from an mmap of the data */
 	size_t prefix_len;      /* length of fixed path prefix */
 };
@@ -78,17 +76,6 @@ struct saved_data {
 	struct mmap_area *mmap_areas;
 };
 
-static inline pcre_extra *get_pcre_extra(struct spec *spec)
-{
-	if (spec->from_mmap) {
-		if (spec->lsd.study_data)
-			return &spec->lsd;
-		else
-			return NULL;
-	} else
-		return spec->sd;
-}
-
 static inline mode_t string_to_mode(char *mode)
 {
 	size_t len;
@@ -333,13 +320,14 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
 static inline int compile_regex(struct saved_data *data, struct spec *spec,
 					    const char **errbuf)
 {
-	const char *tmperrbuf;
 	char *reg_buf, *anchored_regex, *cp;
+	struct regex_error_data error_data;
+	static char regex_error_format_buffer[256];
 	struct stem *stem_arr = data->stem_arr;
 	size_t len;
-	int erroff;
+	int rc;
 
-	if (spec->regcomp)
+	if (spec->regex)
 		return 0; /* already done */
 
 	/* Skip the fixed stem. */
@@ -350,8 +338,11 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
 	/* Anchor the regular expression. */
 	len = strlen(reg_buf);
 	cp = anchored_regex = malloc(len + 3);
-	if (!anchored_regex)
+	if (!anchored_regex) {
+		if (errbuf)
+			*errbuf = "out of memory";
 		return -1;
+	}
 
 	/* Create ^...$ regexp.  */
 	*cp++ = '^';
@@ -361,25 +352,19 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
 	*cp = '\0';
 
 	/* Compile the regular expression. */
-	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
-						    &erroff, NULL);
+	rc = regex_prepare_data(&spec->regex, anchored_regex, &error_data);
 	free(anchored_regex);
-	if (!spec->regex) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
-		return -1;
-	}
-
-	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
-	if (!spec->sd && tmperrbuf) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
+	if (rc < 0) {
+		if (errbuf) {
+			regex_format_error(&error_data,
+					regex_error_format_buffer,
+					sizeof(regex_error_format_buffer));
+			*errbuf = &regex_error_format_buffer[0];
+		}
 		return -1;
 	}
 
 	/* Done. */
-	spec->regcomp = 1;
-
 	return 0;
 }
 
@@ -453,12 +438,11 @@ static inline int process_line(struct selabel_handle *rec,
 	 */
 	data->nspec++;
 
-	if (rec->validating &&
-			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
+	if (rec->validating
+			&& compile_regex(data, &spec_arr[nspec], &errbuf)) {
 		COMPAT_LOG(SELINUX_ERROR,
 			   "%s:  line %u has invalid regex %s:  %s\n",
-			   path, lineno, regex,
-			   (errbuf ? errbuf : "out of memory"));
+			   path, lineno, regex, errbuf);
 		errno = EINVAL;
 		return -1;
 	}
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
new file mode 100644
index 0000000..646351b
--- /dev/null
+++ b/libselinux/src/regex.c
@@ -0,0 +1,496 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "regex.h"
+#include "label_file.h"
+
+#ifdef USE_PCRE2
+struct regex_data {
+	pcre2_code *regex; /* compiled regular expression */
+	/*
+	 * match data block required for the compiled
+	 * pattern in pcre2
+	 */
+	pcre2_match_data *match_data;
+};
+
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata)
+{
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex = pcre2_compile(
+	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
+	    &errordata->error_code, &errordata->error_offset, NULL);
+	if (!(*regex)->regex) {
+		goto err;
+	}
+
+	(*regex)->match_data =
+	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+	if (!(*regex)->match_data) {
+		goto err;
+	}
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const *regex_version(void)
+{
+	static char version_buf[256];
+	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
+	if (len <= 0 || len > sizeof(version_buf))
+		return NULL;
+
+	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
+	return version_buf;
+}
+
+int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex)
+{
+	int rc;
+	uint32_t entry_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0)
+		return -1;
+
+	if (entry_len) {
+		/*
+		 * this should yield exactly one because we store one pattern at
+		 * a time
+		 */
+		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
+		if (rc != 1)
+			return -1;
+
+		*regex = regex_data_create();
+		if (!*regex)
+			return -1;
+
+		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
+					    (PCRE2_SPTR)mmap_area->next_addr,
+					    NULL);
+		if (rc != 1)
+			goto err;
+
+		(*regex)->match_data =
+		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+		if (!(*regex)->match_data)
+			goto err;
+	}
+
+	/* and skip the decoded bit */
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	return 0;
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
+{
+	int rc = 0;
+	size_t len;
+	PCRE2_SIZE serialized_size;
+	uint32_t to_write = 0;
+	PCRE2_UCHAR *bytes = NULL;
+
+	if (do_write_precompregex) {
+		/* encode the patter for serialization */
+		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
+					    1, &bytes, &serialized_size, NULL);
+		if (rc != 1) {
+			rc = -1;
+			goto out;
+		}
+		to_write = serialized_size;
+	}
+
+	/* write serialized pattern's size */
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1) {
+		rc = -1;
+		goto out;
+	}
+
+	if (do_write_precompregex) {
+		/* write serialized pattern */
+		len = fwrite(bytes, 1, to_write, fp);
+		if (len != to_write)
+			rc = -1;
+	}
+
+out:
+	if (bytes)
+		pcre2_serialize_free(bytes);
+
+	return rc;
+}
+
+void regex_data_free(struct regex_data *regex)
+{
+	if (regex) {
+		if (regex->regex)
+			pcre2_code_free(regex->regex);
+		if (regex->match_data)
+			pcre2_match_data_free(regex->match_data);
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data *regex, char const *subject, int partial)
+{
+	int rc;
+	rc = pcre2_match(
+	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
+	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
+	if (rc > 0)
+		return REGEX_MATCH;
+	switch (rc) {
+	case PCRE2_ERROR_PARTIAL:
+		return REGEX_MATCH_PARTIAL;
+	case PCRE2_ERROR_NOMATCH:
+		return REGEX_NO_MATCH;
+	default:
+		return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
+{
+	int rc;
+	size_t len1, len2;
+	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#else // !USE_PCRE2
+
+/* Prior to version 8.20, libpcre did not have pcre_free_study() */
+#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
+#define pcre_free_study pcre_free
+#endif
+
+struct regex_data {
+	int owned;   /*
+		      * non zero if regex and pcre_extra is owned by this
+		      * structure and thus must be freed on destruction.
+		      */
+	pcre *regex; /* compiled regular expression */
+	union {
+		pcre_extra *sd; /* pointer to extra compiled stuff */
+		pcre_extra lsd; /* used to hold the mmap'd version */
+	};
+};
+
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata)
+{
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex =
+	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
+			 &errordata->error_offset, NULL);
+	if (!(*regex)->regex)
+		goto err;
+
+	(*regex)->owned = 1;
+
+	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
+	if (!(*regex)->sd && errordata->error_buffer)
+		goto err;
+
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const *regex_version(void)
+{
+	return pcre_version();
+}
+
+int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex)
+{
+	int rc;
+	uint32_t entry_len;
+	size_t info_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len)
+		return -1;
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->owned = 0;
+	(*regex)->regex = (pcre *)mmap_area->next_addr;
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	/*
+	 * Check that regex lengths match. pcre_fullinfo()
+	 * also validates its magic number.
+	 */
+	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
+	if (rc < 0 || info_len != entry_len)
+		goto err;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len)
+		goto err;
+
+	if (entry_len) {
+		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
+		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
+		rc = next_entry(NULL, mmap_area, entry_len);
+		if (rc < 0)
+			goto err;
+
+		/* Check that study data lengths match. */
+		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
+				   PCRE_INFO_STUDYSIZE, &info_len);
+		if (rc < 0 || info_len != entry_len)
+			goto err;
+	}
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
+{
+	if (!regex) return NULL;
+	if (regex->owned) {
+		return regex->sd;
+	} else if (regex->lsd.study_data) {
+		return &regex->lsd;
+	} else {
+		return NULL;
+	}
+}
+
+int regex_writef(struct regex_data *regex, FILE *fp, int unused)
+{
+	int rc;
+	size_t len;
+	uint32_t to_write;
+	size_t size;
+	pcre_extra *sd = get_pcre_extra(regex);
+
+	/* determine the size of the pcre data in bytes */
+	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
+	if (rc < 0)
+		return -1;
+
+	/* write the number of bytes in the pcre data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	/* write the actual pcre data as a char array */
+	len = fwrite(regex->regex, 1, to_write, fp);
+	if (len != to_write)
+		return -1;
+
+	if (sd) {
+		/* determine the size of the pcre study info */
+		rc =
+		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
+		if (rc < 0)
+			return -1;
+	} else
+		size = 0;
+
+	/* write the number of bytes in the pcre study data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	if (sd) {
+		/* write the actual pcre study data as a char array */
+		len = fwrite(sd->study_data, 1, to_write, fp);
+		if (len != to_write)
+			return -1;
+	}
+
+	return 0;
+}
+
+void regex_data_free(struct regex_data *regex)
+{
+	if (regex) {
+		if (regex->owned) {
+			if (regex->regex)
+				pcre_free(regex->regex);
+			if (regex->sd)
+				pcre_free_study(regex->sd);
+		}
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data *regex, char const *subject, int partial)
+{
+	int rc;
+
+	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
+		       subject, strlen(subject), 0,
+		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
+	switch (rc) {
+	case 0:
+		return REGEX_MATCH;
+	case PCRE_ERROR_PARTIAL:
+		return REGEX_MATCH_PARTIAL;
+	case PCRE_ERROR_NOMATCH:
+		return REGEX_NO_MATCH;
+	default:
+		return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
+{
+	int rc;
+	size_t len1, len2;
+	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#endif
+
+struct regex_data *regex_data_create(void)
+{
+	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
+}
+
+void regex_format_error(struct regex_error_data const *error_data, char *buffer,
+			size_t buf_size)
+{
+	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
+	char *ptr = &buffer[buf_size - the_end_length];
+	int rc = 0;
+	size_t pos = 0;
+	if (!buffer || !buf_size)
+		return;
+	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
+	if (rc < 0)
+		/*
+		 * If snprintf fails it constitutes a logical error that needs
+		 * fixing.
+		 */
+		abort();
+
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+	if (error_data->error_offset > 0) {
+#ifdef USE_PCRE2
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
+			      error_data->error_offset);
+#else
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
+			      error_data->error_offset);
+#endif
+		if (rc < 0)
+			abort();
+	}
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+#ifdef USE_PCRE2
+	rc = pcre2_get_error_message(error_data->error_code,
+				     (PCRE2_UCHAR *)(buffer + pos),
+				     buf_size - pos);
+	if (rc == PCRE2_ERROR_NOMEMORY)
+		goto truncated;
+#else
+	rc = snprintf(buffer + pos, buf_size - pos, "%s",
+		      error_data->error_buffer);
+	if (rc < 0)
+		abort();
+
+	if ((size_t)rc < strlen(error_data->error_buffer))
+		goto truncated;
+#endif
+
+	return;
+
+truncated:
+	/* replace end of string with "..." to indicate that it was truncated */
+	switch (the_end_length) {
+	/* no break statements, fall-through is intended */
+	case 4:
+		*ptr++ = '.';
+	case 3:
+		*ptr++ = '.';
+	case 2:
+		*ptr++ = '.';
+	case 1:
+		*ptr++ = '\0';
+	default:
+		break;
+	}
+	return;
+}
diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
new file mode 100644
index 0000000..810b3c5
--- /dev/null
+++ b/libselinux/src/regex.h
@@ -0,0 +1,149 @@
+#ifndef SRC_REGEX_H_
+#define SRC_REGEX_H_
+
+#include <stdio.h>
+
+#ifdef USE_PCRE2
+#include <pcre2.h>
+#else
+#include <pcre.h>
+#endif
+
+#include "dso.h"
+
+enum { REGEX_MATCH,
+       REGEX_MATCH_PARTIAL,
+       REGEX_NO_MATCH,
+       REGEX_ERROR = -1,
+};
+
+struct regex_data;
+
+#ifdef USE_PCRE2
+struct regex_error_data {
+	int error_code;
+	PCRE2_SIZE error_offset;
+};
+#else
+struct regex_error_data {
+	char const *error_buffer;
+	int error_offset;
+};
+#endif
+
+struct mmap_area;
+
+/**
+ * regex_verison returns the version string of the underlying regular
+ * regular expressions library. In the case of PCRE it just returns the
+ * result of pcre_version(). In the case of PCRE2, the very first time this
+ * function is called it allocates a buffer large enough to hold the version
+ * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
+ * The allocated buffer will linger in memory until the calling process is being
+ * reaped.
+ *
+ * It may return NULL on error.
+ */
+char const *regex_version(void) hidden;
+/**
+ * This constructor function allocates a buffer for a regex_data structure.
+ * The buffer is being initialized with zeroes.
+ */
+struct regex_data *regex_data_create(void) hidden;
+/**
+ * This complementary destructor function frees the a given regex_data buffer.
+ * It also frees any non NULL member pointers with the appropriate pcreX_X_free
+ * function. For PCRE this function respects the extra_owned field and frees
+ * the pcre_extra data conditionally. Calling this function on a NULL pointer is
+ * save.
+ */
+void regex_data_free(struct regex_data *regex) hidden;
+/**
+ * This function compiles the regular expression. Additionally, it prepares
+ * data structures required by the different underlying engines. For PCRE
+ * it calls pcre_study to generate optional data required for optimized
+ * execution of the compiled pattern. In the case of PCRE2, it allocates
+ * a pcre2_match_data structure of appropriate size to hold all possible
+ * matches created by the pattern.
+ *
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ * @arg pattern_string The pattern string that is to be compiled.
+ * @arg errordata A pointer to a regex_error_data structure must be passed
+ *                to this function. This structure depends on the underlying
+ *                implementation. It can be passed to regex_format_error
+ *                to generate a human readable error message.
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata) hidden;
+/**
+ * This function loads a serialized precompiled pattern from a contiguous
+ * data region given by map_area.
+ *
+ * @arg map_area Description of the memory region holding a serialized
+ *               representation of the precompiled pattern.
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ *
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_load_mmap(struct mmap_area *map_area,
+		    struct regex_data **regex) hidden;
+/**
+ * This function stores a precompiled regular expression to a file.
+ * In the case of PCRE, it just dumps the binary representation of the
+ * precomplied pattern into a file. In the case of PCRE2, it uses the
+ * serialization function provided by the library.
+ *
+ * @arg regex The precomplied regular expression data.
+ * @arg fp A file stream specifying the output file.
+ * @arg do_write_precompregex If non-zero precompiled patterns are written to
+ *			      the output file (ignored by PCRE1 back-end).
+ */
+int regex_writef(struct regex_data *regex, FILE *fp,
+		 int do_write_precompregex) hidden;
+/**
+ * This function applies a precompiled pattern to a subject string and
+ * returns whether or not a match was found.
+ *
+ * @arg regex The precompiled pattern.
+ * @arg subject The subject string.
+ * @arg partial Boolean indicating if partial matches are wanted. A nonzero
+ *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
+ *              option to pcre_exec of pcre2_match.
+ * @retval REGEX_MATCH if a match was found
+ * @retval REGEX_MATCH_PARTIAL if a partial match was found
+ * @retval REGEX_NO_MATCH if no match was found
+ * @retval REGEX_ERROR if an error was encountered during the execution of the
+ *                     regular expression
+ */
+int regex_match(struct regex_data *regex, char const *subject,
+		int partial) hidden;
+/**
+ * This function compares two compiled regular expressions (regex1 and regex2).
+ * It compares the binary representations of the compiled patterns. It is a very
+ * crude approximation because the binary representation holds data like
+ * reference counters, that has nothing to do with the actual state machine.
+ *
+ * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
+ *                       the same
+ * @retval SELABEL_INCOMPARABLE otherwise
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) hidden;
+/**
+ * This function takes the error data returned by regex_prepare_data and turns
+ * it in to a human readable error message.
+ * If the buffer given to hold the error message is to small it truncates the
+ * message and indicates the truncation with an ellipsis ("...") at the end of
+ * the buffer.
+ *
+ * @arg error_data Error data as returned by regex_prepare_data.
+ * @arg buffer String buffer to hold the formated error string.
+ * @arg buf_size Total size of the given bufer in bytes.
+ */
+void regex_format_error(struct regex_error_data const *error_data, char *buffer,
+			size_t buf_size) hidden;
+#endif /* SRC_REGEX_H_ */
diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
index 8497cb4..1943bef 100644
--- a/libselinux/utils/Makefile
+++ b/libselinux/utils/Makefile
@@ -24,12 +24,15 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 LDLIBS += -L../src -lselinux -L$(LIBDIR)
+PCRE_LDFLAGS ?= -lpcre
 
 TARGETS=$(patsubst %.c,%,$(wildcard *.c))
 
-sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
+sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
+
+sefcontext_compile: sefcontext_compile.o ../src/regex.o
 
 selinux_restorecon: LDLIBS += -lsepol
 
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index fd6fb78..770ec4c 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,5 @@
 #include <ctype.h>
 #include <errno.h>
-#include <pcre.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -13,6 +12,7 @@
 #include <sepol/sepol.h>
 
 #include "../src/label_file.h"
+#include "../src/regex.h"
 
 const char *policy_file;
 static int ctx_err;
@@ -92,7 +92,8 @@ out:
  *	u32  - data length of the pcre regex study daya
  *	char - a buffer holding the raw pcre regex study data
  */
-static int write_binary_file(struct saved_data *data, int fd)
+static int write_binary_file(struct saved_data *data, int fd,
+			     int do_write_precompregex)
 {
 	struct spec *specs = data->spec_arr;
 	FILE *bin_file;
@@ -101,6 +102,7 @@ static int write_binary_file(struct saved_data *data, int fd)
 	uint32_t section_len;
 	uint32_t i;
 	int rc;
+	const char *reg_version;
 
 	bin_file = fdopen(fd, "w");
 	if (!bin_file) {
@@ -119,12 +121,15 @@ static int write_binary_file(struct saved_data *data, int fd)
 	if (len != 1)
 		goto err;
 
-	/* write the pcre version */
-	section_len = strlen(pcre_version());
+	/* write version of the regex back-end */
+	reg_version = regex_version();
+	if (!reg_version)
+		goto err;
+	section_len = strlen(reg_version);
 	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
 	if (len != 1)
 		goto err;
-	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
+	len = fwrite(reg_version, sizeof(char), section_len, bin_file);
 	if (len != section_len)
 		goto err;
 
@@ -162,10 +167,8 @@ static int write_binary_file(struct saved_data *data, int fd)
 		mode_t mode = specs[i].mode;
 		size_t prefix_len = specs[i].prefix_len;
 		int32_t stem_id = specs[i].stem_id;
-		pcre *re = specs[i].regex;
-		pcre_extra *sd = get_pcre_extra(&specs[i]);
+		struct regex_data *re = specs[i].regex;
 		uint32_t to_write;
-		size_t size;
 
 		/* length of the context string (including nul) */
 		to_write = strlen(context) + 1;
@@ -212,42 +215,10 @@ static int write_binary_file(struct saved_data *data, int fd)
 		if (len != 1)
 			goto err;
 
-		/* determine the size of the pcre data in bytes */
-		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
+		/* Write regex related data */
+		rc = regex_writef(re, bin_file, do_write_precompregex);
 		if (rc < 0)
 			goto err;
-
-		/* write the number of bytes in the pcre data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		/* write the actual pcre data as a char array */
-		len = fwrite(re, 1, to_write, bin_file);
-		if (len != to_write)
-			goto err;
-
-		if (sd) {
-			/* determine the size of the pcre study info */
-			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
-			if (rc < 0)
-				goto err;
-		} else
-			size = 0;
-
-		/* write the number of bytes in the pcre study data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		if (sd) {
-			/* write the actual pcre study data as a char array */
-			len = fwrite(sd->study_data, 1, to_write, bin_file);
-			if (len != to_write)
-				goto err;
-		}
 	}
 
 	rc = 0;
@@ -270,8 +241,7 @@ static void free_specs(struct saved_data *data)
 		free(specs[i].lr.ctx_trans);
 		free(specs[i].regex_str);
 		free(specs[i].type_str);
-		pcre_free(specs[i].regex);
-		pcre_free_study(specs[i].sd);
+		regex_data_free(specs[i].regex);
 	}
 	free(specs);
 
@@ -293,6 +263,12 @@ static void usage(const char *progname)
 	    "         will be fc_file with the .bin suffix appended.\n\t"
 	    "-p       Optional binary policy file that will be used to\n\t"
 	    "         validate contexts defined in the fc_file.\n\t"
+	    "-r       Include precompiled regular expressions in the output.\n\t"
+	    "         (PCRE2 only. Compiled PCRE2 regular expressions are\n\t"
+	    "         not portable across architectures. When linked against\n\t"
+	    "         PCRE this flag is ignored)\n\t"
+	    "         Omit precompiled regular expressions (only meaningful\n\t"
+	    "         when using PCRE2 regular expression back-end).\n\t"
 	    "fc_file  The text based file contexts file to be processed.\n",
 	    progname);
 		exit(EXIT_FAILURE);
@@ -302,6 +278,7 @@ int main(int argc, char *argv[])
 {
 	const char *path = NULL;
 	const char *out_file = NULL;
+	int do_write_precompregex = 0;
 	char stack_path[PATH_MAX + 1];
 	char *tmp = NULL;
 	int fd, rc, opt;
@@ -313,7 +290,7 @@ int main(int argc, char *argv[])
 	if (argc < 2)
 		usage(argv[0]);
 
-	while ((opt = getopt(argc, argv, "o:p:")) > 0) {
+	while ((opt = getopt(argc, argv, "o:p:r")) > 0) {
 		switch (opt) {
 		case 'o':
 			out_file = optarg;
@@ -321,6 +298,9 @@ int main(int argc, char *argv[])
 		case 'p':
 			policy_file = optarg;
 			break;
+		case 'r':
+			do_write_precompregex = 1;
+			break;
 		default:
 			usage(argv[0]);
 		}
@@ -418,7 +398,7 @@ int main(int argc, char *argv[])
 		goto err_unlink;
 	}
 
-	rc = write_binary_file(data, fd);
+	rc = write_binary_file(data, fd, do_write_precompregex);
 	if (rc < 0)
 		goto err_unlink;
 
diff --git a/policycoreutils/restorecond/Makefile b/policycoreutils/restorecond/Makefile
index f99e1e7..253246d 100644
--- a/policycoreutils/restorecond/Makefile
+++ b/policycoreutils/restorecond/Makefile
@@ -17,7 +17,14 @@ DBUSLIB = -ldbus-glib-1 -ldbus-1
 CFLAGS ?= -g -Werror -Wall -W
 override CFLAGS += -I$(PREFIX)/include $(DBUSFLAGS) -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/lib/glib-2.0/include
 
-LDLIBS += -lselinux $(DBUSLIB) -lglib-2.0 -L$(LIBDIR)
+USE_PCRE2 ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+
+LDLIBS += -lselinux $(PCRE_LDFLAGS) $(DBUSLIB) -lglib-2.0 -L$(LIBDIR)
 
 all: restorecond
 
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-15 16:14 [PATCH] libselinux: add support for pcre2 Janis Danisevskis
@ 2016-09-15 17:53 ` Stephen Smalley
  0 siblings, 0 replies; 24+ messages in thread
From: Stephen Smalley @ 2016-09-15 17:53 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2

[-- Attachment #1: Type: text/plain, Size: 1245 bytes --]

On 09/15/2016 12:14 PM, Janis Danisevskis wrote:
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.
> 
> Also, for pcre2 the persistent format is architecture dependent.
> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used,
> sefcontext_compile now respects the "-r". This flag makes
> sefcontext_compile include the precompiled regular expressions
> in the output file. The default is to omit them, so that the
> output remains portable at the cost of having to recompile
> the regular expressions at load time, or rather on first use.
> 
> Signed-off-by: Janis Danisevskis <jdanis@google.com>

Thanks, applied, with the attached fix on top to allow building.

[-- Attachment #2: 0001-libselinux-regex_writef-Mark-unused-argument-with-__.patch --]
[-- Type: text/x-patch, Size: 827 bytes --]

>From a9162c813adaadbdb632d1a71d7c6ffc3e43b1b0 Mon Sep 17 00:00:00 2001
From: Stephen Smalley <sds@tycho.nsa.gov>
Date: Thu, 15 Sep 2016 13:43:24 -0400
Subject: [PATCH] libselinux: regex_writef: Mark unused argument with
 __attribute__((unused)).

Signed-off-by: Stephen Smalley <sds@tycho.nsa.gov>
---
 libselinux/src/regex.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 646351b..750088e 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -312,7 +312,8 @@ static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
 	}
 }
 
-int regex_writef(struct regex_data *regex, FILE *fp, int unused)
+int regex_writef(struct regex_data *regex, FILE *fp,
+		 int unused __attribute__((unused)))
 {
 	int rc;
 	size_t len;
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 19:06                       ` Stephen Smalley
@ 2016-09-16 19:45                         ` Janis Danisevskis
  0 siblings, 0 replies; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 19:45 UTC (permalink / raw)
  To: Stephen Smalley; +Cc: seandroid-list, selinux

[-- Attachment #1: Type: text/plain, Size: 1098 bytes --]

On Fri, Sep 16, 2016 at 8:06 PM, Stephen Smalley <sds@tycho.nsa.gov> wrote:

> On 09/16/2016 02:57 PM, Janis Danisevskis wrote:
> > I have started implementing an arch string patch. Unfortunately, i did
> > not manage to finish it before I had to leave the office today.
> > In essence I did this:
> > The regex_arch_string has three components: the pointer width determined
> > by sizeof(void*), PCRE2_SIZE width determined by sizeof(), and
> > endianess determined by
> > __BYTE_ORDER__==__ORDER_BIG/LITTEL_ENDIAN__
> >
> > For example, the resulting string for x86_64 and aarch64el should look
> like
> > this: "8-8-el".
> >
> > I bumped the compiled context version number and added the string
> > right after the version in the output.
> > Comments?
>
> What's the error handling when the versions do not match?  Just ignore
> the compiled regexes in file_contexts.bin?
>
>
> Yes, If the version and arch match attempt to load the compiled regexes
from the file. Otherwise, skip the corresponding regions of the file and
leave the regex field NULL, so that they will be compiled on first use.

[-- Attachment #2: Type: text/html, Size: 1565 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 18:57                     ` Janis Danisevskis
@ 2016-09-16 19:06                       ` Stephen Smalley
  2016-09-16 19:45                         ` Janis Danisevskis
  0 siblings, 1 reply; 24+ messages in thread
From: Stephen Smalley @ 2016-09-16 19:06 UTC (permalink / raw)
  To: Janis Danisevskis; +Cc: seandroid-list, selinux

On 09/16/2016 02:57 PM, Janis Danisevskis wrote:
> I have started implementing an arch string patch. Unfortunately, i did
> not manage to finish it before I had to leave the office today.
> In essence I did this:
> The regex_arch_string has three components: the pointer width determined
> by sizeof(void*), PCRE2_SIZE width determined by sizeof(), and
> endianess determined by
> __BYTE_ORDER__==__ORDER_BIG/LITTEL_ENDIAN__
> 
> For example, the resulting string for x86_64 and aarch64el should look like
> this: "8-8-el".
> 
> I bumped the compiled context version number and added the string
> right after the version in the output.
> Comments?

What's the error handling when the versions do not match?  Just ignore
the compiled regexes in file_contexts.bin?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 14:52                   ` Stephen Smalley
@ 2016-09-16 18:57                     ` Janis Danisevskis
  2016-09-16 19:06                       ` Stephen Smalley
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 18:57 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Jason Zaman, William Roberts, Janis Danisevskis, seandroid-list, selinux

[-- Attachment #1: Type: text/plain, Size: 2987 bytes --]

I have started implementing an arch string patch. Unfortunately, i did
not manage to finish it before I had to leave the office today.
In essence I did this:
The regex_arch_string has three components: the pointer width determined
by sizeof(void*), PCRE2_SIZE width determined by sizeof(), and
endianess determined by
__BYTE_ORDER__==__ORDER_BIG/LITTEL_ENDIAN__

For example, the resulting string for x86_64 and aarch64el should look like
this: "8-8-el".

I bumped the compiled context version number and added the string
right after the version in the output.
Comments?


On Fri, Sep 16, 2016 at 3:52 PM, Stephen Smalley <sds@tycho.nsa.gov> wrote:

> On 09/16/2016 09:31 AM, Jason Zaman wrote:
> > On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
> >> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com>
> wrote:
> >>> I don't mind. Then before sefcontext_compile -r gets widely adapted we
> >>> should change the semantic quickly. I'll prepare a patch.
> >>
> >> Did I miss something and this was merged? Iv'e been out recovering
> >> from a surgery so I haven't been
> >> following this as well as I normally would have,
> >>
> >> If its merged, just leave it.
> >
> > Its the very latest thing in master yeah, but I do also agree with
> changing it.
> >
> > I just wanted to add that from a distro perspective, compiling things by
> > default makes more sense. In gentoo, the package post_install runs
> > sefcontext_compile. Using the fcontext files happens a lot more than any
> > updates to libselinux (and thus potential format changes) so I'm pretty
> > sure most people would prefer to have the speedup.
> >
> > Gentoo does it on the machine itself, I am not sure about redhat or
> > debian but I wouldnt be surprised if they do it per-arch at the very
> > least so cross-arch probably isnt an issue.
>
> In Red Hat, SELinux policy is noarch, and they switched to precompiling
> both policy and file_contexts.bin at build time to minimize the cost at
> package install time.  Otherwise, in small VMs, they had issues with
> running out of memory during semodule -B.  So file_contexts.bin
> presently has to be arch-independent, or we need the arch properties
> detection logic and fallback.  That said, none of this matters unless
> you build with USE_PCRE2=y, and no one outside of Android is doing that
> today.
>
> > Also, I think we should add the arch to the version string stored. I
> > would rather have false negatives than positives especially since we are
> > not 100% sure exactly what part of the arch is important. We can always
> > loosen it up later if that gets locked down.
>
> We don't want the arch string itself, because that would invalidate use
> of file_contexts.bin entirely on typical Android use cases (build on
> x86_64, install to ARM), but only the relevant properties.  And for
> Android, that is fatal - there is no file_contexts text file on which to
> fallback anymore.  They only ship file_contexts.bin.
>
>

[-- Attachment #2: Type: text/html, Size: 3800 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:31                 ` Jason Zaman
  2016-09-16 13:43                   ` William Roberts
  2016-09-16 13:58                   ` Janis Danisevskis
@ 2016-09-16 14:52                   ` Stephen Smalley
  2016-09-16 18:57                     ` Janis Danisevskis
  2 siblings, 1 reply; 24+ messages in thread
From: Stephen Smalley @ 2016-09-16 14:52 UTC (permalink / raw)
  To: Jason Zaman, William Roberts
  Cc: Janis Danisevskis, Janis Danisevskis, seandroid-list, selinux

On 09/16/2016 09:31 AM, Jason Zaman wrote:
> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
>> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
>>> I don't mind. Then before sefcontext_compile -r gets widely adapted we
>>> should change the semantic quickly. I'll prepare a patch.
>>
>> Did I miss something and this was merged? Iv'e been out recovering
>> from a surgery so I haven't been
>> following this as well as I normally would have,
>>
>> If its merged, just leave it.
> 
> Its the very latest thing in master yeah, but I do also agree with changing it.
> 
> I just wanted to add that from a distro perspective, compiling things by
> default makes more sense. In gentoo, the package post_install runs
> sefcontext_compile. Using the fcontext files happens a lot more than any
> updates to libselinux (and thus potential format changes) so I'm pretty
> sure most people would prefer to have the speedup.
> 
> Gentoo does it on the machine itself, I am not sure about redhat or
> debian but I wouldnt be surprised if they do it per-arch at the very
> least so cross-arch probably isnt an issue.

In Red Hat, SELinux policy is noarch, and they switched to precompiling
both policy and file_contexts.bin at build time to minimize the cost at
package install time.  Otherwise, in small VMs, they had issues with
running out of memory during semodule -B.  So file_contexts.bin
presently has to be arch-independent, or we need the arch properties
detection logic and fallback.  That said, none of this matters unless
you build with USE_PCRE2=y, and no one outside of Android is doing that
today.

> Also, I think we should add the arch to the version string stored. I
> would rather have false negatives than positives especially since we are
> not 100% sure exactly what part of the arch is important. We can always
> loosen it up later if that gets locked down.

We don't want the arch string itself, because that would invalidate use
of file_contexts.bin entirely on typical Android use cases (build on
x86_64, install to ARM), but only the relevant properties.  And for
Android, that is fatal - there is no file_contexts text file on which to
fallback anymore.  They only ship file_contexts.bin.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 14:06                       ` Jason Zaman
@ 2016-09-16 14:14                         ` William Roberts
  0 siblings, 0 replies; 24+ messages in thread
From: William Roberts @ 2016-09-16 14:14 UTC (permalink / raw)
  To: Jason Zaman
  Cc: seandroid-list, selinux, Janis Danisevskis, Janis Danisevskis,
	Stephen Smalley

[-- Attachment #1: Type: text/plain, Size: 2888 bytes --]

On Sep 16, 2016 07:06, "Jason Zaman" <jason@perfinion.com> wrote:
>
> On Fri, Sep 16, 2016 at 06:51:25AM -0700, William Roberts wrote:
> > On Fri, Sep 16, 2016 at 6:43 AM, William Roberts
> > <bill.c.roberts@gmail.com> wrote:
> > > On Fri, Sep 16, 2016 at 6:31 AM, Jason Zaman <jason@perfinion.com>
wrote:
> > >> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
> > >>> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <
jdanis@google.com> wrote:
> > >>> > I don't mind. Then before sefcontext_compile -r gets widely
adapted we
> > >>> > should change the semantic quickly. I'll prepare a patch.
> > >>>
> > >>> Did I miss something and this was merged? Iv'e been out recovering
> > >>> from a surgery so I haven't been
> > >>> following this as well as I normally would have,
> > >>>
> > >>> If its merged, just leave it.
> > >>
> > >> Its the very latest thing in master yeah, but I do also agree with
changing it.
> > >
> > > I'd prefer it changed myself. Another argument pro-change is that one
doesn't
> > > know if its a PCRE2 or PCRE1 compatable version of the libraries, we
should
> > > probably have an ability to intergoate that via API so we can print it
> > > out in help
> > > dialogue, that this tool so we at least know what it can support.
> > >
> > > The biggest thing that needs to get fixed with this, is that no matter
> > > if it contains the
> > > pre-compiled regexs or not, it should always load and work on Android.
> > > In distros,
> > > it will fall back to file_contexts, but we don't have this in Android.
> > > This ties into the arch
> > > version information below. But if the arch differs, always recompile.
> > > The alternative to
> > > this, is just go back to textual fc file son Android, since we won't
> > > be using any of the
> > > features of binary fc's. Better yet, in the Android build, I would
> > > check to see if host arch
> > > is the same as target arch, or let an OEM set a flag, to do the
> > > compilation. But I would
> > > consider those stretch goals, and just revert android back to textual
files.
> >
> > My ramblings might not be clear here. If we just version it with arch
> > and require
> > an exact match, we don't need -r, so that option goes away.
>
> This is what I was aiming for too, currently the pcre_version() is just
> strcmp'd so changing it to cat(pcre2_version(), arch_string()) should
> avoid any problems. It may turn out to be overzealous but it will always
> be safe. I dont think splitting pcre_version and arch into separate
> fields makes a huge difference if you prefer that. Currently the version

It can make a difference if we ever need to parse any information for some
reason in the future. We'd then have to split them back apart. This is just
a bit more future proof IMO.

> numbers are not parsed tho, so checking arch only for pcre2 seems like a
> lot more work.
>
> -- Jason

[-- Attachment #2: Type: text/html, Size: 3876 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:51                     ` William Roberts
@ 2016-09-16 14:06                       ` Jason Zaman
  2016-09-16 14:14                         ` William Roberts
  0 siblings, 1 reply; 24+ messages in thread
From: Jason Zaman @ 2016-09-16 14:06 UTC (permalink / raw)
  To: William Roberts
  Cc: Janis Danisevskis, Janis Danisevskis, seandroid-list,
	Stephen Smalley, selinux

On Fri, Sep 16, 2016 at 06:51:25AM -0700, William Roberts wrote:
> On Fri, Sep 16, 2016 at 6:43 AM, William Roberts
> <bill.c.roberts@gmail.com> wrote:
> > On Fri, Sep 16, 2016 at 6:31 AM, Jason Zaman <jason@perfinion.com> wrote:
> >> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
> >>> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
> >>> > I don't mind. Then before sefcontext_compile -r gets widely adapted we
> >>> > should change the semantic quickly. I'll prepare a patch.
> >>>
> >>> Did I miss something and this was merged? Iv'e been out recovering
> >>> from a surgery so I haven't been
> >>> following this as well as I normally would have,
> >>>
> >>> If its merged, just leave it.
> >>
> >> Its the very latest thing in master yeah, but I do also agree with changing it.
> >
> > I'd prefer it changed myself. Another argument pro-change is that one doesn't
> > know if its a PCRE2 or PCRE1 compatable version of the libraries, we should
> > probably have an ability to intergoate that via API so we can print it
> > out in help
> > dialogue, that this tool so we at least know what it can support.
> >
> > The biggest thing that needs to get fixed with this, is that no matter
> > if it contains the
> > pre-compiled regexs or not, it should always load and work on Android.
> > In distros,
> > it will fall back to file_contexts, but we don't have this in Android.
> > This ties into the arch
> > version information below. But if the arch differs, always recompile.
> > The alternative to
> > this, is just go back to textual fc file son Android, since we won't
> > be using any of the
> > features of binary fc's. Better yet, in the Android build, I would
> > check to see if host arch
> > is the same as target arch, or let an OEM set a flag, to do the
> > compilation. But I would
> > consider those stretch goals, and just revert android back to textual files.
> 
> My ramblings might not be clear here. If we just version it with arch
> and require
> an exact match, we don't need -r, so that option goes away.

This is what I was aiming for too, currently the pcre_version() is just
strcmp'd so changing it to cat(pcre2_version(), arch_string()) should
avoid any problems. It may turn out to be overzealous but it will always
be safe. I dont think splitting pcre_version and arch into separate
fields makes a huge difference if you prefer that. Currently the version
numbers are not parsed tho, so checking arch only for pcre2 seems like a
lot more work.

-- Jason

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:31                 ` Jason Zaman
  2016-09-16 13:43                   ` William Roberts
@ 2016-09-16 13:58                   ` Janis Danisevskis
  2016-09-16 14:52                   ` Stephen Smalley
  2 siblings, 0 replies; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 13:58 UTC (permalink / raw)
  To: Jason Zaman, William Roberts
  Cc: Janis Danisevskis, seandroid-list, Stephen Smalley, selinux

[-- Attachment #1: Type: text/plain, Size: 6426 bytes --]

On Fri, Sep 16, 2016 at 2:31 PM Jason Zaman <jason@perfinion.com> wrote:

> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
> > On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com>
> wrote:
> > > I don't mind. Then before sefcontext_compile -r gets widely adapted we
> > > should change the semantic quickly. I'll prepare a patch.
> >
> > Did I miss something and this was merged? Iv'e been out recovering
> > from a surgery so I haven't been
> > following this as well as I normally would have,
> >
> > If its merged, just leave it.
>
> Its the very latest thing in master yeah, but I do also agree with
> changing it.
>
> I just wanted to add that from a distro perspective, compiling things by
> default makes more sense. In gentoo, the package post_install runs
> sefcontext_compile. Using the fcontext files happens a lot more than any
> updates to libselinux (and thus potential format changes) so I'm pretty
> sure most people would prefer to have the speedup.
>
> Gentoo does it on the machine itself, I am not sure about redhat or
> debian but I wouldnt be surprised if they do it per-arch at the very
> least so cross-arch probably isnt an issue.
>
> Also, I think we should add the arch to the version string stored. I
> would rather have false negatives than positives especially since we are
> not 100% sure exactly what part of the arch is important. We can always
> loosen it up later if that gets locked down.
>

Good, I will add an architecture string.
>From the PCRE2 documentation: [1] "The host on which the patterns are
reloaded must be running the same version of PCRE2, with the same code unit
width, and must also have the same endianness, pointer width and PCRE2_SIZE
type."
I will encode these three properties in the arch string.

[1] http://www.pcre.org/current/doc/html/pcre2serialize.html


> Isnt the selinux policy in android stored in the rootfs or /system?
> isnt that all read-only? how would re-compiling on the device work?
> Does android put the fcontext.bin files on /data? that seems insecure.
>

On android file_context.bin is in / and read only. If the patterns are
compiled on the device, they are not stored persistently. AFAIK this
happens only once upon every boot.


>
> -- Jason
>
> > > On Fri, Sep 16, 2016 at 1:35 PM William Roberts <
> bill.c.roberts@gmail.com>
> > > wrote:
> > >>
> > >> <snip>
> > >> >
> > >> >
> > >> > That's just the thing. Without -r the phone _will_ boot because the
> > >> > regexes
> > >> > are compiled on first use. With -r and an arch mismatch we have an
> > >> > undefined
> > >> > behavior, which is bad.
> > >>
> > >> That's just a limitation of the current design.
> > >>
> > >> >
> > >> > See, I don't currently know what part of the architecture is
> important.
> > >> > Will
> > >> > word size and endianess be enough, e.g., will regexes generated on
> > >> > x86_64el
> > >> > work on armv8el (apparently this is what I observed but it could
> change
> > >> > at
> > >> > the whim of the PCRE2 maintainers)? So what will be the encoding?
> If we
> > >> > encode the complete architecture then the typical case for android
> > >> > (build on
> > >> > x86_64 for armv7/armv8) will yield the same result as without the -r
> > >> > option,
> > >> > namely, we rebuild the regexes on the device. If we just encode word
> > >> > size
> > >> > and endianess it may work for a while... I just don't feel
> comfortable
> > >> > enough to make a decision at this point.
> > >>
> > >> Then shelve it and use textual file contexts. The only purpose of the
> > >> binary format is to
> > >> include the pre-compiled regex;s so if you cant use that feature,
> > >> there's no point in
> > >> using binary format.
> > >>
> > >> My thought would be that it has to be an exact match for architecture
> > >> upfront, then
> > >> possibly investigate relaxing the requirement later. But, IIRC, if we
> get
> > >> a 30%
> > >> increase in text file load time, theirs really no point, for the
> > >> binary format on Android.
> > >> Android fic files are smaller, and have much simpler regex's compared
> > >> to our desktop
> > >> brethren.
> > >>
> > >> >
> > >> > My limited understanding is that the automata built by PCRE2 use
> > >> > pointers to
> > >> > link states or other structures. These pointers are symbolized when
> > >> > serialized, but they keep their width, which is why the regexes are
> at
> > >> > least
> > >> > sensitive to the word size.
> > >> >
> > >> > Just a wild Idea:
> > >> > PCRE2 also supports JIT compiling. I did not use this here because
> I did
> > >> > not
> > >> > feel comfortable for libselinux to depend on the execmem
> permission. But
> > >> > this could be developed into pre cross compiling the regexes into
> native
> > >> > code, which can than be linked into a device specific library. But I
> > >> > guess
> > >> > this would require quite some cooperation with the PCRE2 developers.
> > >>
> > >> I was going to ask if the arch dependency was on JIT'd code or just
> > >> something else
> > >> which you elaborated above with word size/endiness, etc.
> > >>
> > >> >
> > >> >>
> > >> >> The other thing is, this is only an issue on binary formated
> > >> >> file_context
> > >> >> files,
> > >> >> this is the ideal reason to move back to text files, since their
> is no
> > >> >> speedup
> > >> >> gained if we have to compile them.
> > >> >
> > >> >
> > >> > Right, or you could see it as an intermediate solution that does not
> > >> > require
> > >> > changes to the build system until we can properly cross compile the
> > >> > regexes.
> > >>
> > >> If you add an option to sefcontext_compile it should be to remove
> > >> them. not add it in.
> > >> This keeps it consistent with teh behavior for PCRE, it would be add
> > >> to say, "make me
> > >> a binary fc, but don't actually embed the regex information", since
> > >> that is currently not
> > >> the default behavior. Changing the Android make recipe is trivial, so
> > >> adding -r shouldn't
> > >> for Android shouldn't be a show stopper.
> > >>
> > >> <snip>
> >
> >
> >
> > --
> > Respectfully,
> >
> > William C Roberts
> > _______________________________________________
> > Seandroid-list mailing list
> > Seandroid-list@tycho.nsa.gov
> > To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
> > To get help, send an email containing "help" to
> Seandroid-list-request@tycho.nsa.gov.
>

[-- Attachment #2: Type: text/html, Size: 11624 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:43                   ` William Roberts
@ 2016-09-16 13:51                     ` William Roberts
  2016-09-16 14:06                       ` Jason Zaman
  0 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-16 13:51 UTC (permalink / raw)
  To: Jason Zaman
  Cc: Janis Danisevskis, Janis Danisevskis, seandroid-list,
	Stephen Smalley, selinux

On Fri, Sep 16, 2016 at 6:43 AM, William Roberts
<bill.c.roberts@gmail.com> wrote:
> On Fri, Sep 16, 2016 at 6:31 AM, Jason Zaman <jason@perfinion.com> wrote:
>> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
>>> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
>>> > I don't mind. Then before sefcontext_compile -r gets widely adapted we
>>> > should change the semantic quickly. I'll prepare a patch.
>>>
>>> Did I miss something and this was merged? Iv'e been out recovering
>>> from a surgery so I haven't been
>>> following this as well as I normally would have,
>>>
>>> If its merged, just leave it.
>>
>> Its the very latest thing in master yeah, but I do also agree with changing it.
>
> I'd prefer it changed myself. Another argument pro-change is that one doesn't
> know if its a PCRE2 or PCRE1 compatable version of the libraries, we should
> probably have an ability to intergoate that via API so we can print it
> out in help
> dialogue, that this tool so we at least know what it can support.
>
> The biggest thing that needs to get fixed with this, is that no matter
> if it contains the
> pre-compiled regexs or not, it should always load and work on Android.
> In distros,
> it will fall back to file_contexts, but we don't have this in Android.
> This ties into the arch
> version information below. But if the arch differs, always recompile.
> The alternative to
> this, is just go back to textual fc file son Android, since we won't
> be using any of the
> features of binary fc's. Better yet, in the Android build, I would
> check to see if host arch
> is the same as target arch, or let an OEM set a flag, to do the
> compilation. But I would
> consider those stretch goals, and just revert android back to textual files.

My ramblings might not be clear here. If we just version it with arch
and require
an exact match, we don't need -r, so that option goes away.



<snip>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:31                 ` Jason Zaman
@ 2016-09-16 13:43                   ` William Roberts
  2016-09-16 13:51                     ` William Roberts
  2016-09-16 13:58                   ` Janis Danisevskis
  2016-09-16 14:52                   ` Stephen Smalley
  2 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-16 13:43 UTC (permalink / raw)
  To: Jason Zaman
  Cc: Janis Danisevskis, Janis Danisevskis, seandroid-list,
	Stephen Smalley, selinux

On Fri, Sep 16, 2016 at 6:31 AM, Jason Zaman <jason@perfinion.com> wrote:
> On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
>> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
>> > I don't mind. Then before sefcontext_compile -r gets widely adapted we
>> > should change the semantic quickly. I'll prepare a patch.
>>
>> Did I miss something and this was merged? Iv'e been out recovering
>> from a surgery so I haven't been
>> following this as well as I normally would have,
>>
>> If its merged, just leave it.
>
> Its the very latest thing in master yeah, but I do also agree with changing it.

I'd prefer it changed myself. Another argument pro-change is that one doesn't
know if its a PCRE2 or PCRE1 compatable version of the libraries, we should
probably have an ability to intergoate that via API so we can print it
out in help
dialogue, that this tool so we at least know what it can support.

The biggest thing that needs to get fixed with this, is that no matter
if it contains the
pre-compiled regexs or not, it should always load and work on Android.
In distros,
it will fall back to file_contexts, but we don't have this in Android.
This ties into the arch
version information below. But if the arch differs, always recompile.
The alternative to
this, is just go back to textual fc file son Android, since we won't
be using any of the
features of binary fc's. Better yet, in the Android build, I would
check to see if host arch
is the same as target arch, or let an OEM set a flag, to do the
compilation. But I would
consider those stretch goals, and just revert android back to textual files.

>
> I just wanted to add that from a distro perspective, compiling things by
> default makes more sense. In gentoo, the package post_install runs
> sefcontext_compile. Using the fcontext files happens a lot more than any
> updates to libselinux (and thus potential format changes) so I'm pretty
> sure most people would prefer to have the speedup.

For the distros it will make sense most of the time from what I can tell.

>
> Gentoo does it on the machine itself, I am not sure about redhat or
> debian but I wouldnt be surprised if they do it per-arch at the very
> least so cross-arch probably isnt an issue.
>
> Also, I think we should add the arch to the version string stored. I
> would rather have false negatives than positives especially since we are
> not 100% sure exactly what part of the arch is important. We can always
> loosen it up later if that gets locked down.

No don't append it onto the PCRE version string, just check the PCRE
version string
and if its greater than X read out the arch field next. This should
work like policy versioning
in libsepol.


>
> Isnt the selinux policy in android stored in the rootfs or /system?
> isnt that all read-only? how would re-compiling on the device work?
> Does android put the fcontext.bin files on /data? that seems insecure.

This is the cross compile aspect, we build on the host and shove it
into the device
rootfs at build time. We typically build on x86_64 and deploy on ARM.
But this is NOT
always the case, just the most common. For instance, with Intel
tablets it probably can
use the pre-compiled regexes.

>
> -- Jason
>
>> > On Fri, Sep 16, 2016 at 1:35 PM William Roberts <bill.c.roberts@gmail.com>
>> > wrote:
>> >>
>> >> <snip>
>> >> >
>> >> >
>> >> > That's just the thing. Without -r the phone _will_ boot because the
>> >> > regexes
>> >> > are compiled on first use. With -r and an arch mismatch we have an
>> >> > undefined
>> >> > behavior, which is bad.
>> >>
>> >> That's just a limitation of the current design.
>> >>
>> >> >
>> >> > See, I don't currently know what part of the architecture is important.
>> >> > Will
>> >> > word size and endianess be enough, e.g., will regexes generated on
>> >> > x86_64el
>> >> > work on armv8el (apparently this is what I observed but it could change
>> >> > at
>> >> > the whim of the PCRE2 maintainers)? So what will be the encoding? If we
>> >> > encode the complete architecture then the typical case for android
>> >> > (build on
>> >> > x86_64 for armv7/armv8) will yield the same result as without the -r
>> >> > option,
>> >> > namely, we rebuild the regexes on the device. If we just encode word
>> >> > size
>> >> > and endianess it may work for a while... I just don't feel comfortable
>> >> > enough to make a decision at this point.
>> >>
>> >> Then shelve it and use textual file contexts. The only purpose of the
>> >> binary format is to
>> >> include the pre-compiled regex;s so if you cant use that feature,
>> >> there's no point in
>> >> using binary format.
>> >>
>> >> My thought would be that it has to be an exact match for architecture
>> >> upfront, then
>> >> possibly investigate relaxing the requirement later. But, IIRC, if we get
>> >> a 30%
>> >> increase in text file load time, theirs really no point, for the
>> >> binary format on Android.
>> >> Android fic files are smaller, and have much simpler regex's compared
>> >> to our desktop
>> >> brethren.
>> >>
>> >> >
>> >> > My limited understanding is that the automata built by PCRE2 use
>> >> > pointers to
>> >> > link states or other structures. These pointers are symbolized when
>> >> > serialized, but they keep their width, which is why the regexes are at
>> >> > least
>> >> > sensitive to the word size.
>> >> >
>> >> > Just a wild Idea:
>> >> > PCRE2 also supports JIT compiling. I did not use this here because I did
>> >> > not
>> >> > feel comfortable for libselinux to depend on the execmem permission. But
>> >> > this could be developed into pre cross compiling the regexes into native
>> >> > code, which can than be linked into a device specific library. But I
>> >> > guess
>> >> > this would require quite some cooperation with the PCRE2 developers.
>> >>
>> >> I was going to ask if the arch dependency was on JIT'd code or just
>> >> something else
>> >> which you elaborated above with word size/endiness, etc.
>> >>
>> >> >
>> >> >>
>> >> >> The other thing is, this is only an issue on binary formated
>> >> >> file_context
>> >> >> files,
>> >> >> this is the ideal reason to move back to text files, since their is no
>> >> >> speedup
>> >> >> gained if we have to compile them.
>> >> >
>> >> >
>> >> > Right, or you could see it as an intermediate solution that does not
>> >> > require
>> >> > changes to the build system until we can properly cross compile the
>> >> > regexes.
>> >>
>> >> If you add an option to sefcontext_compile it should be to remove
>> >> them. not add it in.
>> >> This keeps it consistent with teh behavior for PCRE, it would be add
>> >> to say, "make me
>> >> a binary fc, but don't actually embed the regex information", since
>> >> that is currently not
>> >> the default behavior. Changing the Android make recipe is trivial, so
>> >> adding -r shouldn't
>> >> for Android shouldn't be a show stopper.
>> >>
>> >> <snip>
>>
>>
>>
>> --
>> Respectfully,
>>
>> William C Roberts
>> _______________________________________________
>> Seandroid-list mailing list
>> Seandroid-list@tycho.nsa.gov
>> To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
>> To get help, send an email containing "help" to Seandroid-list-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:15               ` William Roberts
@ 2016-09-16 13:31                 ` Jason Zaman
  2016-09-16 13:43                   ` William Roberts
                                     ` (2 more replies)
  0 siblings, 3 replies; 24+ messages in thread
From: Jason Zaman @ 2016-09-16 13:31 UTC (permalink / raw)
  To: William Roberts
  Cc: Janis Danisevskis, Janis Danisevskis, seandroid-list,
	Stephen Smalley, selinux

On Fri, Sep 16, 2016 at 06:15:01AM -0700, William Roberts wrote:
> On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
> > I don't mind. Then before sefcontext_compile -r gets widely adapted we
> > should change the semantic quickly. I'll prepare a patch.
> 
> Did I miss something and this was merged? Iv'e been out recovering
> from a surgery so I haven't been
> following this as well as I normally would have,
> 
> If its merged, just leave it.

Its the very latest thing in master yeah, but I do also agree with changing it.

I just wanted to add that from a distro perspective, compiling things by
default makes more sense. In gentoo, the package post_install runs
sefcontext_compile. Using the fcontext files happens a lot more than any
updates to libselinux (and thus potential format changes) so I'm pretty
sure most people would prefer to have the speedup.

Gentoo does it on the machine itself, I am not sure about redhat or
debian but I wouldnt be surprised if they do it per-arch at the very
least so cross-arch probably isnt an issue.

Also, I think we should add the arch to the version string stored. I
would rather have false negatives than positives especially since we are
not 100% sure exactly what part of the arch is important. We can always
loosen it up later if that gets locked down.

Isnt the selinux policy in android stored in the rootfs or /system?
isnt that all read-only? how would re-compiling on the device work?
Does android put the fcontext.bin files on /data? that seems insecure.

-- Jason

> > On Fri, Sep 16, 2016 at 1:35 PM William Roberts <bill.c.roberts@gmail.com>
> > wrote:
> >>
> >> <snip>
> >> >
> >> >
> >> > That's just the thing. Without -r the phone _will_ boot because the
> >> > regexes
> >> > are compiled on first use. With -r and an arch mismatch we have an
> >> > undefined
> >> > behavior, which is bad.
> >>
> >> That's just a limitation of the current design.
> >>
> >> >
> >> > See, I don't currently know what part of the architecture is important.
> >> > Will
> >> > word size and endianess be enough, e.g., will regexes generated on
> >> > x86_64el
> >> > work on armv8el (apparently this is what I observed but it could change
> >> > at
> >> > the whim of the PCRE2 maintainers)? So what will be the encoding? If we
> >> > encode the complete architecture then the typical case for android
> >> > (build on
> >> > x86_64 for armv7/armv8) will yield the same result as without the -r
> >> > option,
> >> > namely, we rebuild the regexes on the device. If we just encode word
> >> > size
> >> > and endianess it may work for a while... I just don't feel comfortable
> >> > enough to make a decision at this point.
> >>
> >> Then shelve it and use textual file contexts. The only purpose of the
> >> binary format is to
> >> include the pre-compiled regex;s so if you cant use that feature,
> >> there's no point in
> >> using binary format.
> >>
> >> My thought would be that it has to be an exact match for architecture
> >> upfront, then
> >> possibly investigate relaxing the requirement later. But, IIRC, if we get
> >> a 30%
> >> increase in text file load time, theirs really no point, for the
> >> binary format on Android.
> >> Android fic files are smaller, and have much simpler regex's compared
> >> to our desktop
> >> brethren.
> >>
> >> >
> >> > My limited understanding is that the automata built by PCRE2 use
> >> > pointers to
> >> > link states or other structures. These pointers are symbolized when
> >> > serialized, but they keep their width, which is why the regexes are at
> >> > least
> >> > sensitive to the word size.
> >> >
> >> > Just a wild Idea:
> >> > PCRE2 also supports JIT compiling. I did not use this here because I did
> >> > not
> >> > feel comfortable for libselinux to depend on the execmem permission. But
> >> > this could be developed into pre cross compiling the regexes into native
> >> > code, which can than be linked into a device specific library. But I
> >> > guess
> >> > this would require quite some cooperation with the PCRE2 developers.
> >>
> >> I was going to ask if the arch dependency was on JIT'd code or just
> >> something else
> >> which you elaborated above with word size/endiness, etc.
> >>
> >> >
> >> >>
> >> >> The other thing is, this is only an issue on binary formated
> >> >> file_context
> >> >> files,
> >> >> this is the ideal reason to move back to text files, since their is no
> >> >> speedup
> >> >> gained if we have to compile them.
> >> >
> >> >
> >> > Right, or you could see it as an intermediate solution that does not
> >> > require
> >> > changes to the build system until we can properly cross compile the
> >> > regexes.
> >>
> >> If you add an option to sefcontext_compile it should be to remove
> >> them. not add it in.
> >> This keeps it consistent with teh behavior for PCRE, it would be add
> >> to say, "make me
> >> a binary fc, but don't actually embed the regex information", since
> >> that is currently not
> >> the default behavior. Changing the Android make recipe is trivial, so
> >> adding -r shouldn't
> >> for Android shouldn't be a show stopper.
> >>
> >> <snip>
> 
> 
> 
> -- 
> Respectfully,
> 
> William C Roberts
> _______________________________________________
> Seandroid-list mailing list
> Seandroid-list@tycho.nsa.gov
> To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Seandroid-list-request@tycho.nsa.gov.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 13:09             ` Janis Danisevskis
@ 2016-09-16 13:15               ` William Roberts
  2016-09-16 13:31                 ` Jason Zaman
  0 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-16 13:15 UTC (permalink / raw)
  To: Janis Danisevskis
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list, selinux

On Fri, Sep 16, 2016 at 6:09 AM, Janis Danisevskis <jdanis@google.com> wrote:
> I don't mind. Then before sefcontext_compile -r gets widely adapted we
> should change the semantic quickly. I'll prepare a patch.

Did I miss something and this was merged? Iv'e been out recovering
from a surgery so I haven't been
following this as well as I normally would have,

If its merged, just leave it.

>
> On Fri, Sep 16, 2016 at 1:35 PM William Roberts <bill.c.roberts@gmail.com>
> wrote:
>>
>> <snip>
>> >
>> >
>> > That's just the thing. Without -r the phone _will_ boot because the
>> > regexes
>> > are compiled on first use. With -r and an arch mismatch we have an
>> > undefined
>> > behavior, which is bad.
>>
>> That's just a limitation of the current design.
>>
>> >
>> > See, I don't currently know what part of the architecture is important.
>> > Will
>> > word size and endianess be enough, e.g., will regexes generated on
>> > x86_64el
>> > work on armv8el (apparently this is what I observed but it could change
>> > at
>> > the whim of the PCRE2 maintainers)? So what will be the encoding? If we
>> > encode the complete architecture then the typical case for android
>> > (build on
>> > x86_64 for armv7/armv8) will yield the same result as without the -r
>> > option,
>> > namely, we rebuild the regexes on the device. If we just encode word
>> > size
>> > and endianess it may work for a while... I just don't feel comfortable
>> > enough to make a decision at this point.
>>
>> Then shelve it and use textual file contexts. The only purpose of the
>> binary format is to
>> include the pre-compiled regex;s so if you cant use that feature,
>> there's no point in
>> using binary format.
>>
>> My thought would be that it has to be an exact match for architecture
>> upfront, then
>> possibly investigate relaxing the requirement later. But, IIRC, if we get
>> a 30%
>> increase in text file load time, theirs really no point, for the
>> binary format on Android.
>> Android fic files are smaller, and have much simpler regex's compared
>> to our desktop
>> brethren.
>>
>> >
>> > My limited understanding is that the automata built by PCRE2 use
>> > pointers to
>> > link states or other structures. These pointers are symbolized when
>> > serialized, but they keep their width, which is why the regexes are at
>> > least
>> > sensitive to the word size.
>> >
>> > Just a wild Idea:
>> > PCRE2 also supports JIT compiling. I did not use this here because I did
>> > not
>> > feel comfortable for libselinux to depend on the execmem permission. But
>> > this could be developed into pre cross compiling the regexes into native
>> > code, which can than be linked into a device specific library. But I
>> > guess
>> > this would require quite some cooperation with the PCRE2 developers.
>>
>> I was going to ask if the arch dependency was on JIT'd code or just
>> something else
>> which you elaborated above with word size/endiness, etc.
>>
>> >
>> >>
>> >> The other thing is, this is only an issue on binary formated
>> >> file_context
>> >> files,
>> >> this is the ideal reason to move back to text files, since their is no
>> >> speedup
>> >> gained if we have to compile them.
>> >
>> >
>> > Right, or you could see it as an intermediate solution that does not
>> > require
>> > changes to the build system until we can properly cross compile the
>> > regexes.
>>
>> If you add an option to sefcontext_compile it should be to remove
>> them. not add it in.
>> This keeps it consistent with teh behavior for PCRE, it would be add
>> to say, "make me
>> a binary fc, but don't actually embed the regex information", since
>> that is currently not
>> the default behavior. Changing the Android make recipe is trivial, so
>> adding -r shouldn't
>> for Android shouldn't be a show stopper.
>>
>> <snip>



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 12:35           ` William Roberts
@ 2016-09-16 13:09             ` Janis Danisevskis
  2016-09-16 13:15               ` William Roberts
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 13:09 UTC (permalink / raw)
  To: William Roberts
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list, selinux

[-- Attachment #1: Type: text/plain, Size: 3421 bytes --]

I don't mind. Then before sefcontext_compile -r gets widely adapted we
should change the semantic quickly. I'll prepare a patch.

On Fri, Sep 16, 2016 at 1:35 PM William Roberts <bill.c.roberts@gmail.com>
wrote:

> <snip>
> >
> >
> > That's just the thing. Without -r the phone _will_ boot because the
> regexes
> > are compiled on first use. With -r and an arch mismatch we have an
> undefined
> > behavior, which is bad.
>
> That's just a limitation of the current design.
>
> >
> > See, I don't currently know what part of the architecture is important.
> Will
> > word size and endianess be enough, e.g., will regexes generated on
> x86_64el
> > work on armv8el (apparently this is what I observed but it could change
> at
> > the whim of the PCRE2 maintainers)? So what will be the encoding? If we
> > encode the complete architecture then the typical case for android
> (build on
> > x86_64 for armv7/armv8) will yield the same result as without the -r
> option,
> > namely, we rebuild the regexes on the device. If we just encode word size
> > and endianess it may work for a while... I just don't feel comfortable
> > enough to make a decision at this point.
>
> Then shelve it and use textual file contexts. The only purpose of the
> binary format is to
> include the pre-compiled regex;s so if you cant use that feature,
> there's no point in
> using binary format.
>
> My thought would be that it has to be an exact match for architecture
> upfront, then
> possibly investigate relaxing the requirement later. But, IIRC, if we get
> a 30%
> increase in text file load time, theirs really no point, for the
> binary format on Android.
> Android fic files are smaller, and have much simpler regex's compared
> to our desktop
> brethren.
>
> >
> > My limited understanding is that the automata built by PCRE2 use
> pointers to
> > link states or other structures. These pointers are symbolized when
> > serialized, but they keep their width, which is why the regexes are at
> least
> > sensitive to the word size.
> >
> > Just a wild Idea:
> > PCRE2 also supports JIT compiling. I did not use this here because I did
> not
> > feel comfortable for libselinux to depend on the execmem permission. But
> > this could be developed into pre cross compiling the regexes into native
> > code, which can than be linked into a device specific library. But I
> guess
> > this would require quite some cooperation with the PCRE2 developers.
>
> I was going to ask if the arch dependency was on JIT'd code or just
> something else
> which you elaborated above with word size/endiness, etc.
>
> >
> >>
> >> The other thing is, this is only an issue on binary formated
> file_context
> >> files,
> >> this is the ideal reason to move back to text files, since their is no
> >> speedup
> >> gained if we have to compile them.
> >
> >
> > Right, or you could see it as an intermediate solution that does not
> require
> > changes to the build system until we can properly cross compile the
> regexes.
>
> If you add an option to sefcontext_compile it should be to remove
> them. not add it in.
> This keeps it consistent with teh behavior for PCRE, it would be add
> to say, "make me
> a binary fc, but don't actually embed the regex information", since
> that is currently not
> the default behavior. Changing the Android make recipe is trivial, so
> adding -r shouldn't
> for Android shouldn't be a show stopper.
>
> <snip>
>

[-- Attachment #2: Type: text/html, Size: 5421 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 12:23         ` Janis Danisevskis
@ 2016-09-16 12:35           ` William Roberts
  2016-09-16 13:09             ` Janis Danisevskis
  0 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-16 12:35 UTC (permalink / raw)
  To: Janis Danisevskis
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list, selinux

<snip>
>
>
> That's just the thing. Without -r the phone _will_ boot because the regexes
> are compiled on first use. With -r and an arch mismatch we have an undefined
> behavior, which is bad.

That's just a limitation of the current design.

>
> See, I don't currently know what part of the architecture is important. Will
> word size and endianess be enough, e.g., will regexes generated on x86_64el
> work on armv8el (apparently this is what I observed but it could change at
> the whim of the PCRE2 maintainers)? So what will be the encoding? If we
> encode the complete architecture then the typical case for android (build on
> x86_64 for armv7/armv8) will yield the same result as without the -r option,
> namely, we rebuild the regexes on the device. If we just encode word size
> and endianess it may work for a while... I just don't feel comfortable
> enough to make a decision at this point.

Then shelve it and use textual file contexts. The only purpose of the
binary format is to
include the pre-compiled regex;s so if you cant use that feature,
there's no point in
using binary format.

My thought would be that it has to be an exact match for architecture
upfront, then
possibly investigate relaxing the requirement later. But, IIRC, if we get a 30%
increase in text file load time, theirs really no point, for the
binary format on Android.
Android fic files are smaller, and have much simpler regex's compared
to our desktop
brethren.

>
> My limited understanding is that the automata built by PCRE2 use pointers to
> link states or other structures. These pointers are symbolized when
> serialized, but they keep their width, which is why the regexes are at least
> sensitive to the word size.
>
> Just a wild Idea:
> PCRE2 also supports JIT compiling. I did not use this here because I did not
> feel comfortable for libselinux to depend on the execmem permission. But
> this could be developed into pre cross compiling the regexes into native
> code, which can than be linked into a device specific library. But I guess
> this would require quite some cooperation with the PCRE2 developers.

I was going to ask if the arch dependency was on JIT'd code or just
something else
which you elaborated above with word size/endiness, etc.

>
>>
>> The other thing is, this is only an issue on binary formated file_context
>> files,
>> this is the ideal reason to move back to text files, since their is no
>> speedup
>> gained if we have to compile them.
>
>
> Right, or you could see it as an intermediate solution that does not require
> changes to the build system until we can properly cross compile the regexes.

If you add an option to sefcontext_compile it should be to remove
them. not add it in.
This keeps it consistent with teh behavior for PCRE, it would be add
to say, "make me
a binary fc, but don't actually embed the regex information", since
that is currently not
the default behavior. Changing the Android make recipe is trivial, so
adding -r shouldn't
for Android shouldn't be a show stopper.

<snip>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 10:23       ` William Roberts
@ 2016-09-16 12:23         ` Janis Danisevskis
  2016-09-16 12:35           ` William Roberts
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 12:23 UTC (permalink / raw)
  To: William Roberts
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list, selinux

[-- Attachment #1: Type: text/plain, Size: 8546 bytes --]

On Fri, Sep 16, 2016 at 11:23 AM William Roberts <bill.c.roberts@gmail.com>
wrote:

> On Fri, Sep 16, 2016 at 3:13 AM, Janis Danisevskis <jdanis@google.com>
> wrote:
> > First of all, I would like to thank you, Stephen and William, for your
> > patience and support.
> >
> > On Thu, Sep 15, 2016 at 8:34 PM William Roberts <
> bill.c.roberts@gmail.com>
> > wrote:
> >>
> >> On Thu, Sep 15, 2016 at 7:57 AM, Stephen Smalley <sds@tycho.nsa.gov>
> >> wrote:
> >> > On 09/15/2016 10:04 AM, Janis Danisevskis wrote:
> >> >> From: Janis Danisevskis <jdanis@google.com>
> >> >>
> >> >> This patch moves all pcre1/2 dependencies into the new files regex.h
> >> >> and regex.c implementing the common denominator of features needed
> >> >> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> >> >> used implementations.
> >> >>
> >> >> As of this patch libselinux supports either pcre or pcre2 but not
> >> >> both at the same time. The persistently stored file contexts
> >> >> information differs. This means libselinux can only load file
> >> >> context files generated by sefcontext_compile build with the
> >> >> same pcre variant.
> >> >>
> >> >> Also, for pcre2 the persistent format is architecture dependent.
> >> >> Stored precompiled regular expressions can only be used on the
> >> >> same architecture they were generated on. If pcre2 is used,
> >> >> sefcontext_compile now respects the "-r". This flag makes
> >> >> sefcontext_compile include the precompiled regular expressions
> >> >> in the output file. The default is to omit them, so that the
> >> >> output remains portable at the cost of having to recompile
> >> >> the regular expressions at load time, or rather on first use.
> >> >
> >> > Is that really the default behavior you want?
> >>
> >> I would make the default include the pre-compiled regex's, if there
> >> is an arch mismatch then ditch the pre-compiled ones and reload
> >> them. I would make it an option to NOT generate pre-compiled
> >> regexes. For the upstream consumers, who compile and deploy
> >> on the same system, this means they don't need to update to add
> >> -r. For android, we just need to update a makefile to include -r to
> >> omit the precompiled regex's.
> >
> >
> > The arch is currently not encoded in the binary, so a mismatch cannot be
> > detected. I would sleep better if we left the -r option as is for now,
> > because it is kind of "use only if you know what you are doing". Also,
> this
> > way it does not break the android build process.
>
> We probably would want to encode the architecture since this matters.This
> way
> we  can detect a mismatch and compile them. This,prevents the
> why isn't my device booting issue for cross compilations that don't have
> -r.
>

That's just the thing. Without -r the phone _will_ boot because the regexes
are compiled on first use. With -r and an arch mismatch we have an
undefined behavior, which is bad.

See, I don't currently know what part of the architecture is important.
Will word size and endianess be enough, e.g., will regexes generated on
x86_64el work on armv8el (apparently this is what I observed but it could
change at the whim of the PCRE2 maintainers)? So what will be the encoding?
If we encode the complete architecture then the typical case for android
(build on x86_64 for armv7/armv8) will yield the same result as without the
-r option, namely, we rebuild the regexes on the device. If we just encode
word size and endianess it may work for a while... I just don't feel
comfortable enough to make a decision at this point.

My limited understanding is that the automata built by PCRE2 use pointers
to link states or other structures. These pointers are symbolized when
serialized, but they keep their width, which is why the regexes are at
least sensitive to the word size.

Just a wild Idea:
PCRE2 also supports JIT compiling. I did not use this here because I did
not feel comfortable for libselinux to depend on the execmem permission.
But this could be developed into pre cross compiling the regexes into
native code, which can than be linked into a device specific library. But I
guess this would require quite some cooperation with the PCRE2 developers.


> The other thing is, this is only an issue on binary formated file_context
> files,
> this is the ideal reason to move back to text files, since their is no
> speedup
> gained if we have to compile them.
>

Right, or you could see it as an intermediate solution that does not
require changes to the build system until we can properly cross compile the
regexes.


>
> >
> >>
> >>
> >> In the case of Android, where we cross compile the file_contexts.bin,
> >> does using binary fc's even make sense now? The major reason is that
> >> the binary format loads faster by skipping the regex compilation, we
> >> can no longer skip that with PCRE2 and cross compilation. We should
> >> likely move Android back to text based if this is the case. I have
> >> speedups in the neighborhood of 30% for text file processing in
> >> the pipeline.
> >
> >
> > I realize that the binary PCRE2 variant is currently much slower than the
> > PCRE stored regexes. But it still a little faster than the text file
> > processing. If you can make text file processing 30% faster, than this
> may
> > well be an option because serializing pcre2 regexes will probably never
> beat
> > pcre, even if we did stunts like compiling for the correct architecture
> > using qemu.
> > I'd be happy to help perform benchmarks and choose the best option.
>
> Once you dump the pre-compiled regex's from the binary format,
> your back to the text files performance. Also, with PCRE, the binary
> format was
> 3x the size on disk.
>
> >
> > Also I am exploring another option, which may sound a bit exotic. So I
> would
> > like to have a running POC first before I come forward. Maybe nothing
> comes
> > of it.
> >
> >>
> >>
> >> >
> >> >> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> >> >> ---
> >> >
> >> >> diff --git a/libselinux/src/label_file.h
> b/libselinux/src/label_file.h
> >> >> index 6d1e890..3df7972 100644
> >> >> --- a/libselinux/src/label_file.h
> >> >> +++ b/libselinux/src/label_file.h
> >> >> @@ -453,12 +429,14 @@ static inline int process_line(struct
> >> >> selabel_handle *rec,
> >> >>        */
> >> >>       data->nspec++;
> >> >>
> >> >> -     if (rec->validating &&
> >> >> -                         compile_regex(data, &spec_arr[nspec],
> >> >> &errbuf)) {
> >> >> +     if (rec->validating
> >> >> +                     && compile_regex(data, &spec_arr[nspec],
> >> >> &error_data)) {
> >> >> +             regex_format_error(&error_data,
> >> >> regex_error_format_buffer,
> >> >> +                             sizeof(regex_error_format_buffer));
> >> >>               COMPAT_LOG(SELINUX_ERROR,
> >> >>                          "%s:  line %u has invalid regex %s:  %s\n",
> >> >>                          path, lineno, regex,
> >> >> -                        (errbuf ? errbuf : "out of memory"));
> >> >> +                        regex_error_format_buffer);
> >> >
> >> > compile_regex() may fail on an out of memory condition before
> >> > regex_error_format_buffer is initialized, which is why we previously
> >> > passed errbuf ?: "out of memory" above.  I suppose you could
> initialize
> >> > regex_error_format_buffer with "out of memory" prior to calling
> >> > compile_regex().
> >> >
> >> >> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> >> >> new file mode 100644
> >> >> index 0000000..1c4a84d
> >> >> --- /dev/null
> >> >> +++ b/libselinux/src/regex.c
> >> > <snip>
> >> >> +int regex_writef(struct regex_data *regex, FILE *fp)
> >> >
> >> > This needs to be updated with the new do_write_precompregex argument,
> >> > and either use the argument or mark it unused to permit compilation
> for
> >> > the USE_PCRE2=n.
> >> >
> >> > _______________________________________________
> >> > Seandroid-list mailing list
> >> > Seandroid-list@tycho.nsa.gov
> >> > To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
> >> > To get help, send an email containing "help" to
> >> > Seandroid-list-request@tycho.nsa.gov.
> >>
> >>
> >>
> >> --
> >> Respectfully,
> >>
> >> William C Roberts
> >> _______________________________________________
> >> Selinux mailing list
> >> Selinux@tycho.nsa.gov
> >> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> >> To get help, send an email containing "help" to
> >> Selinux-request@tycho.nsa.gov.
>
>
>
> --
> Respectfully,
>
> William C Roberts
>

[-- Attachment #2: Type: text/html, Size: 14807 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-16 10:13     ` Janis Danisevskis
@ 2016-09-16 10:23       ` William Roberts
  2016-09-16 12:23         ` Janis Danisevskis
  0 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-16 10:23 UTC (permalink / raw)
  To: Janis Danisevskis
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list, selinux

On Fri, Sep 16, 2016 at 3:13 AM, Janis Danisevskis <jdanis@google.com> wrote:
> First of all, I would like to thank you, Stephen and William, for your
> patience and support.
>
> On Thu, Sep 15, 2016 at 8:34 PM William Roberts <bill.c.roberts@gmail.com>
> wrote:
>>
>> On Thu, Sep 15, 2016 at 7:57 AM, Stephen Smalley <sds@tycho.nsa.gov>
>> wrote:
>> > On 09/15/2016 10:04 AM, Janis Danisevskis wrote:
>> >> From: Janis Danisevskis <jdanis@google.com>
>> >>
>> >> This patch moves all pcre1/2 dependencies into the new files regex.h
>> >> and regex.c implementing the common denominator of features needed
>> >> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> >> used implementations.
>> >>
>> >> As of this patch libselinux supports either pcre or pcre2 but not
>> >> both at the same time. The persistently stored file contexts
>> >> information differs. This means libselinux can only load file
>> >> context files generated by sefcontext_compile build with the
>> >> same pcre variant.
>> >>
>> >> Also, for pcre2 the persistent format is architecture dependent.
>> >> Stored precompiled regular expressions can only be used on the
>> >> same architecture they were generated on. If pcre2 is used,
>> >> sefcontext_compile now respects the "-r". This flag makes
>> >> sefcontext_compile include the precompiled regular expressions
>> >> in the output file. The default is to omit them, so that the
>> >> output remains portable at the cost of having to recompile
>> >> the regular expressions at load time, or rather on first use.
>> >
>> > Is that really the default behavior you want?
>>
>> I would make the default include the pre-compiled regex's, if there
>> is an arch mismatch then ditch the pre-compiled ones and reload
>> them. I would make it an option to NOT generate pre-compiled
>> regexes. For the upstream consumers, who compile and deploy
>> on the same system, this means they don't need to update to add
>> -r. For android, we just need to update a makefile to include -r to
>> omit the precompiled regex's.
>
>
> The arch is currently not encoded in the binary, so a mismatch cannot be
> detected. I would sleep better if we left the -r option as is for now,
> because it is kind of "use only if you know what you are doing". Also, this
> way it does not break the android build process.

We probably would want to encode the architecture since this matters.This way
we  can detect a mismatch and compile them. This,prevents the
why isn't my device booting issue for cross compilations that don't have -r.
The other thing is, this is only an issue on binary formated file_context files,
this is the ideal reason to move back to text files, since their is no speedup
gained if we have to compile them.

>
>>
>>
>> In the case of Android, where we cross compile the file_contexts.bin,
>> does using binary fc's even make sense now? The major reason is that
>> the binary format loads faster by skipping the regex compilation, we
>> can no longer skip that with PCRE2 and cross compilation. We should
>> likely move Android back to text based if this is the case. I have
>> speedups in the neighborhood of 30% for text file processing in
>> the pipeline.
>
>
> I realize that the binary PCRE2 variant is currently much slower than the
> PCRE stored regexes. But it still a little faster than the text file
> processing. If you can make text file processing 30% faster, than this may
> well be an option because serializing pcre2 regexes will probably never beat
> pcre, even if we did stunts like compiling for the correct architecture
> using qemu.
> I'd be happy to help perform benchmarks and choose the best option.

Once you dump the pre-compiled regex's from the binary format,
your back to the text files performance. Also, with PCRE, the binary format was
3x the size on disk.

>
> Also I am exploring another option, which may sound a bit exotic. So I would
> like to have a running POC first before I come forward. Maybe nothing comes
> of it.
>
>>
>>
>> >
>> >> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> >> ---
>> >
>> >> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> >> index 6d1e890..3df7972 100644
>> >> --- a/libselinux/src/label_file.h
>> >> +++ b/libselinux/src/label_file.h
>> >> @@ -453,12 +429,14 @@ static inline int process_line(struct
>> >> selabel_handle *rec,
>> >>        */
>> >>       data->nspec++;
>> >>
>> >> -     if (rec->validating &&
>> >> -                         compile_regex(data, &spec_arr[nspec],
>> >> &errbuf)) {
>> >> +     if (rec->validating
>> >> +                     && compile_regex(data, &spec_arr[nspec],
>> >> &error_data)) {
>> >> +             regex_format_error(&error_data,
>> >> regex_error_format_buffer,
>> >> +                             sizeof(regex_error_format_buffer));
>> >>               COMPAT_LOG(SELINUX_ERROR,
>> >>                          "%s:  line %u has invalid regex %s:  %s\n",
>> >>                          path, lineno, regex,
>> >> -                        (errbuf ? errbuf : "out of memory"));
>> >> +                        regex_error_format_buffer);
>> >
>> > compile_regex() may fail on an out of memory condition before
>> > regex_error_format_buffer is initialized, which is why we previously
>> > passed errbuf ?: "out of memory" above.  I suppose you could initialize
>> > regex_error_format_buffer with "out of memory" prior to calling
>> > compile_regex().
>> >
>> >> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> >> new file mode 100644
>> >> index 0000000..1c4a84d
>> >> --- /dev/null
>> >> +++ b/libselinux/src/regex.c
>> > <snip>
>> >> +int regex_writef(struct regex_data *regex, FILE *fp)
>> >
>> > This needs to be updated with the new do_write_precompregex argument,
>> > and either use the argument or mark it unused to permit compilation for
>> > the USE_PCRE2=n.
>> >
>> > _______________________________________________
>> > Seandroid-list mailing list
>> > Seandroid-list@tycho.nsa.gov
>> > To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
>> > To get help, send an email containing "help" to
>> > Seandroid-list-request@tycho.nsa.gov.
>>
>>
>>
>> --
>> Respectfully,
>>
>> William C Roberts
>> _______________________________________________
>> Selinux mailing list
>> Selinux@tycho.nsa.gov
>> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
>> To get help, send an email containing "help" to
>> Selinux-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-15 19:31   ` William Roberts
@ 2016-09-16 10:13     ` Janis Danisevskis
  2016-09-16 10:23       ` William Roberts
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-16 10:13 UTC (permalink / raw)
  To: William Roberts, Stephen Smalley
  Cc: Janis Danisevskis, seandroid-list, selinux

[-- Attachment #1: Type: text/plain, Size: 5424 bytes --]

First of all, I would like to thank you, Stephen and William, for your
patience and support.

On Thu, Sep 15, 2016 at 8:34 PM William Roberts <bill.c.roberts@gmail.com>
wrote:

On Thu, Sep 15, 2016 at 7:57 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> On 09/15/2016 10:04 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>>
>> Also, for pcre2 the persistent format is architecture dependent.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used,
>> sefcontext_compile now respects the "-r". This flag makes
>> sefcontext_compile include the precompiled regular expressions
>> in the output file. The default is to omit them, so that the
>> output remains portable at the cost of having to recompile
>> the regular expressions at load time, or rather on first use.
>
> Is that really the default behavior you want?

I would make the default include the pre-compiled regex's, if there
is an arch mismatch then ditch the pre-compiled ones and reload
them. I would make it an option to NOT generate pre-compiled
regexes. For the upstream consumers, who compile and deploy
on the same system, this means they don't need to update to add
-r. For android, we just need to update a makefile to include -r to
omit the precompiled regex's.


The arch is currently not encoded in the binary, so a mismatch cannot be
detected. I would sleep better if we left the -r option as is for now,
because it is kind of "use only if you know what you are doing". Also, this
way it does not break the android build process.



In the case of Android, where we cross compile the file_contexts.bin,
does using binary fc's even make sense now? The major reason is that
the binary format loads faster by skipping the regex compilation, we
can no longer skip that with PCRE2 and cross compilation. We should
likely move Android back to text based if this is the case. I have
speedups in the neighborhood of 30% for text file processing in
the pipeline.


I realize that the binary PCRE2 variant is currently much slower than the
PCRE stored regexes. But it still a little faster than the text file
processing. If you can make text file processing 30% faster, than this may
well be an option because serializing pcre2 regexes will probably never
beat pcre, even if we did stunts like compiling for the correct
architecture using qemu.
I'd be happy to help perform benchmarks and choose the best option.

Also I am exploring another option, which may sound a bit exotic. So I
would like to have a running POC first before I come forward. Maybe nothing
comes of it.



>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..3df7972 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -453,12 +429,14 @@ static inline int process_line(struct
selabel_handle *rec,
>>        */
>>       data->nspec++;
>>
>> -     if (rec->validating &&
>> -                         compile_regex(data, &spec_arr[nspec],
&errbuf)) {
>> +     if (rec->validating
>> +                     && compile_regex(data, &spec_arr[nspec],
&error_data)) {
>> +             regex_format_error(&error_data, regex_error_format_buffer,
>> +                             sizeof(regex_error_format_buffer));
>>               COMPAT_LOG(SELINUX_ERROR,
>>                          "%s:  line %u has invalid regex %s:  %s\n",
>>                          path, lineno, regex,
>> -                        (errbuf ? errbuf : "out of memory"));
>> +                        regex_error_format_buffer);
>
> compile_regex() may fail on an out of memory condition before
> regex_error_format_buffer is initialized, which is why we previously
> passed errbuf ?: "out of memory" above.  I suppose you could initialize
> regex_error_format_buffer with "out of memory" prior to calling
> compile_regex().
>
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..1c4a84d
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
> <snip>
>> +int regex_writef(struct regex_data *regex, FILE *fp)
>
> This needs to be updated with the new do_write_precompregex argument,
> and either use the argument or mark it unused to permit compilation for
> the USE_PCRE2=n.
>
> _______________________________________________
> Seandroid-list mailing list
> Seandroid-list@tycho.nsa.gov
> To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to
Seandroid-list-request@tycho.nsa.gov.



--
Respectfully,

William C Roberts
_______________________________________________
Selinux mailing list
Selinux@tycho.nsa.gov
To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
To get help, send an email containing "help" to
Selinux-request@tycho.nsa.gov.

[-- Attachment #2: Type: text/html, Size: 10496 bytes --]

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-15 14:57 ` Stephen Smalley
@ 2016-09-15 19:31   ` William Roberts
  2016-09-16 10:13     ` Janis Danisevskis
  0 siblings, 1 reply; 24+ messages in thread
From: William Roberts @ 2016-09-15 19:31 UTC (permalink / raw)
  To: Stephen Smalley; +Cc: Janis Danisevskis, selinux, seandroid-list, James Carter

On Thu, Sep 15, 2016 at 7:57 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> On 09/15/2016 10:04 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>>
>> Also, for pcre2 the persistent format is architecture dependent.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used,
>> sefcontext_compile now respects the "-r". This flag makes
>> sefcontext_compile include the precompiled regular expressions
>> in the output file. The default is to omit them, so that the
>> output remains portable at the cost of having to recompile
>> the regular expressions at load time, or rather on first use.
>
> Is that really the default behavior you want?

I would make the default include the pre-compiled regex's, if there
is an arch mismatch then ditch the pre-compiled ones and reload
them. I would make it an option to NOT generate pre-compiled
regexes. For the upstream consumers, who compile and deploy
on the same system, this means they don't need to update to add
-r. For android, we just need to update a makefile to include -r to
omit the precompiled regex's.

In the case of Android, where we cross compile the file_contexts.bin,
does using binary fc's even make sense now? The major reason is that
the binary format loads faster by skipping the regex compilation, we
can no longer skip that with PCRE2 and cross compilation. We should
likely move Android back to text based if this is the case. I have
speedups in the neighborhood of 30% for text file processing in
the pipeline.

>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..3df7972 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -453,12 +429,14 @@ static inline int process_line(struct selabel_handle *rec,
>>        */
>>       data->nspec++;
>>
>> -     if (rec->validating &&
>> -                         compile_regex(data, &spec_arr[nspec], &errbuf)) {
>> +     if (rec->validating
>> +                     && compile_regex(data, &spec_arr[nspec], &error_data)) {
>> +             regex_format_error(&error_data, regex_error_format_buffer,
>> +                             sizeof(regex_error_format_buffer));
>>               COMPAT_LOG(SELINUX_ERROR,
>>                          "%s:  line %u has invalid regex %s:  %s\n",
>>                          path, lineno, regex,
>> -                        (errbuf ? errbuf : "out of memory"));
>> +                        regex_error_format_buffer);
>
> compile_regex() may fail on an out of memory condition before
> regex_error_format_buffer is initialized, which is why we previously
> passed errbuf ?: "out of memory" above.  I suppose you could initialize
> regex_error_format_buffer with "out of memory" prior to calling
> compile_regex().
>
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..1c4a84d
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
> <snip>
>> +int regex_writef(struct regex_data *regex, FILE *fp)
>
> This needs to be updated with the new do_write_precompregex argument,
> and either use the argument or mark it unused to permit compilation for
> the USE_PCRE2=n.
>
> _______________________________________________
> Seandroid-list mailing list
> Seandroid-list@tycho.nsa.gov
> To unsubscribe, send email to Seandroid-list-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Seandroid-list-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-15 14:04 Janis Danisevskis
@ 2016-09-15 14:57 ` Stephen Smalley
  2016-09-15 19:31   ` William Roberts
  0 siblings, 1 reply; 24+ messages in thread
From: Stephen Smalley @ 2016-09-15 14:57 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2

On 09/15/2016 10:04 AM, Janis Danisevskis wrote:
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.
> 
> Also, for pcre2 the persistent format is architecture dependent.
> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used,
> sefcontext_compile now respects the "-r". This flag makes
> sefcontext_compile include the precompiled regular expressions
> in the output file. The default is to omit them, so that the
> output remains portable at the cost of having to recompile
> the regular expressions at load time, or rather on first use.

Is that really the default behavior you want?

> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> ---

> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index 6d1e890..3df7972 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -453,12 +429,14 @@ static inline int process_line(struct selabel_handle *rec,
>  	 */
>  	data->nspec++;
>  
> -	if (rec->validating &&
> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +	if (rec->validating
> +			&& compile_regex(data, &spec_arr[nspec], &error_data)) {
> +		regex_format_error(&error_data, regex_error_format_buffer,
> +				sizeof(regex_error_format_buffer));
>  		COMPAT_LOG(SELINUX_ERROR,
>  			   "%s:  line %u has invalid regex %s:  %s\n",
>  			   path, lineno, regex,
> -			   (errbuf ? errbuf : "out of memory"));
> +			   regex_error_format_buffer);

compile_regex() may fail on an out of memory condition before
regex_error_format_buffer is initialized, which is why we previously
passed errbuf ?: "out of memory" above.  I suppose you could initialize
regex_error_format_buffer with "out of memory" prior to calling
compile_regex().

> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> new file mode 100644
> index 0000000..1c4a84d
> --- /dev/null
> +++ b/libselinux/src/regex.c
<snip>
> +int regex_writef(struct regex_data *regex, FILE *fp)

This needs to be updated with the new do_write_precompregex argument,
and either use the argument or mark it unused to permit compilation for
the USE_PCRE2=n.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] libselinux: add support for pcre2
@ 2016-09-15 14:04 Janis Danisevskis
  2016-09-15 14:57 ` Stephen Smalley
  0 siblings, 1 reply; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-15 14:04 UTC (permalink / raw)
  To: selinux, seandroid-list, sds, jwcart2

From: Janis Danisevskis <jdanis@google.com>

This patch moves all pcre1/2 dependencies into the new files regex.h
and regex.c implementing the common denominator of features needed
by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
used implementations.

As of this patch libselinux supports either pcre or pcre2 but not
both at the same time. The persistently stored file contexts
information differs. This means libselinux can only load file
context files generated by sefcontext_compile build with the
same pcre variant.

Also, for pcre2 the persistent format is architecture dependent.
Stored precompiled regular expressions can only be used on the
same architecture they were generated on. If pcre2 is used,
sefcontext_compile now respects the "-r". This flag makes
sefcontext_compile include the precompiled regular expressions
in the output file. The default is to omit them, so that the
output remains portable at the cost of having to recompile
the regular expressions at load time, or rather on first use.

Signed-off-by: Janis Danisevskis <jdanis@google.com>
---
 libselinux/Makefile                   |   9 +
 libselinux/src/Makefile               |   6 +-
 libselinux/src/label_file.c           |  95 ++-----
 libselinux/src/label_file.h           |  64 ++---
 libselinux/src/regex.c                | 496 ++++++++++++++++++++++++++++++++++
 libselinux/src/regex.h                | 149 ++++++++++
 libselinux/utils/Makefile             |   7 +-
 libselinux/utils/sefcontext_compile.c |  72 ++---
 policycoreutils/restorecond/Makefile  |   9 +-
 9 files changed, 740 insertions(+), 167 deletions(-)
 create mode 100644 libselinux/src/regex.c
 create mode 100644 libselinux/src/regex.h

diff --git a/libselinux/Makefile b/libselinux/Makefile
index 6142b60..5a8d42c 100644
--- a/libselinux/Makefile
+++ b/libselinux/Makefile
@@ -24,6 +24,15 @@ ifeq ($(DISABLE_SETRANS),y)
 endif
 export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
 
+USE_PCRE2 ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+export PCRE_CFLAGS PCRE_LDFLAGS
+
 all install relabel clean distclean indent:
 	@for subdir in $(SUBDIRS); do \
 		(cd $$subdir && $(MAKE) $@) || exit 1; \
diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
index 37d01af..36e42b8 100644
--- a/libselinux/src/Makefile
+++ b/libselinux/src/Makefile
@@ -74,7 +74,9 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
 
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+PCRE_LDFLAGS ?= -lpcre
+
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 
 SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
 		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
@@ -113,7 +115,7 @@ $(LIBA): $(OBJS)
 	$(RANLIB) $@
 
 $(LIBSO): $(LOBJS)
-	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
+	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
 	ln -sf $@ $(TARGET) 
 
 $(LIBPC): $(LIBPC).in ../VERSION
diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 9faecdb..e805189 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <limits.h>
 #include <stdint.h>
-#include <pcre.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -126,6 +125,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 	struct mmap_area *mmap_area;
 	uint32_t i, magic, version;
 	uint32_t entry_len, stem_map_len, regex_array_len;
+	const char *reg_version;
 
 	mmap_area = malloc(sizeof(*mmap_area));
 	if (!mmap_area) {
@@ -155,8 +155,13 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 	if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS)
 		return -1;
 
+	reg_version = regex_version();
+	if (!reg_version)
+		return -1;
+
 	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
-		len = strlen(pcre_version());
+
+		len = strlen(reg_version);
 
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
 		if (rc < 0)
@@ -166,7 +171,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 		if (len != entry_len)
 			return -1;
 
-		/* Check if pcre version mismatch */
+		/* Check if regex version mismatch */
 		str_buf = malloc(entry_len + 1);
 		if (!str_buf)
 			return -1;
@@ -178,7 +183,7 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 		}
 
 		str_buf[entry_len] = '\0';
-		if ((strcmp(str_buf, pcre_version()) != 0)) {
+		if ((strcmp(str_buf, reg_version) != 0)) {
 			free(str_buf);
 			return -1;
 		}
@@ -258,7 +263,6 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 
 		spec = &data->spec_arr[data->nspec];
 		spec->from_mmap = 1;
-		spec->regcomp = 1;
 
 		/* Process context */
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
@@ -345,47 +349,10 @@ static int load_mmap(FILE *fp, size_t len, struct selabel_handle *rec,
 			spec->prefix_len = prefix_len;
 		}
 
-		/* Process regex and study_data entries */
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto out;
-		}
-		spec->regex = (pcre *)mmap_area->next_addr;
-		rc = next_entry(NULL, mmap_area, entry_len);
+		rc = regex_load_mmap(mmap_area, &spec->regex);
 		if (rc < 0)
 			goto out;
 
-		/* Check that regex lengths match. pcre_fullinfo()
-		 * also validates its magic number. */
-		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
-		if (rc < 0 || len != entry_len) {
-			rc = -1;
-			goto out;
-		}
-
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto out;
-		}
-
-		if (entry_len) {
-			spec->lsd.study_data = (void *)mmap_area->next_addr;
-			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
-			rc = next_entry(NULL, mmap_area, entry_len);
-			if (rc < 0)
-				goto out;
-
-			/* Check that study data lengths match. */
-			rc = pcre_fullinfo(spec->regex, &spec->lsd,
-					   PCRE_INFO_STUDYSIZE, &len);
-			if (rc < 0 || len != entry_len) {
-				rc = -1;
-				goto out;
-			}
-		}
-
 		data->nspec++;
 	}
 
@@ -630,14 +597,11 @@ static void closef(struct selabel_handle *rec)
 		spec = &data->spec_arr[i];
 		free(spec->lr.ctx_trans);
 		free(spec->lr.ctx_raw);
+		regex_data_free(spec->regex);
 		if (spec->from_mmap)
 			continue;
 		free(spec->regex_str);
 		free(spec->type_str);
-		if (spec->regcomp) {
-			pcre_free(spec->regex);
-			pcre_free_study(spec->sd);
-		}
 	}
 
 	for (i = 0; i < (unsigned int)data->num_stems; i++) {
@@ -669,13 +633,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 {
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr = data->spec_arr;
-	int i, rc, file_stem, pcre_options = 0;
+	int i, rc, file_stem;
 	mode_t mode = (mode_t)type;
 	const char *buf;
 	struct spec *ret = NULL;
 	char *clean_key = NULL;
 	const char *prev_slash, *next_slash;
 	unsigned int sofar = 0;
+	struct regex_error_data regex_error_data;
 
 	if (!data->nspec) {
 		errno = ENOENT;
@@ -702,9 +667,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 	file_stem = find_stem_from_file(data, &buf);
 	mode &= S_IFMT;
 
-	if (partial)
-		pcre_options |= PCRE_PARTIAL_SOFT;
-
 	/*
 	 * Check for matching specifications in reverse order, so that
 	 * the last matching specification is used.
@@ -717,25 +679,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 		 * a regex check        */
 		if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
 		    (!mode || !spec->mode || mode == spec->mode)) {
-			if (compile_regex(data, spec, NULL) < 0)
+			if (compile_regex(data, spec, &regex_error_data) < 0)
 				goto finish;
 			if (spec->stem_id == -1)
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    key, strlen(key), 0,
-						    pcre_options, NULL, 0);
+				rc = regex_match(spec->regex, key, partial);
 			else
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    buf, strlen(buf), 0,
-						    pcre_options, NULL, 0);
-			if (rc == 0) {
+				rc = regex_match(spec->regex, buf, partial);
+			if (rc == REGEX_MATCH) {
 				spec->matches++;
 				break;
-			} else if (partial && rc == PCRE_ERROR_PARTIAL)
+			} else if (partial && rc == REGEX_MATCH_PARTIAL)
 				break;
 
-			if (rc == PCRE_ERROR_NOMATCH)
+			if (rc == REGEX_NO_MATCH)
 				continue;
 
 			errno = ENOENT;
@@ -874,17 +830,10 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
 			continue;
 		}
 
-		if (spec1->regcomp && spec2->regcomp) {
-			size_t len1, len2;
-			int rc;
-
-			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
-			assert(rc == 0);
-			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
-			assert(rc == 0);
-			if (len1 != len2 ||
-			    memcmp(spec1->regex, spec2->regex, len1))
+		if (spec1->regex && spec2->regex) {
+			if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
 				return incomp(spec1, spec2, "regex", i, j);
+			}
 		} else {
 			if (strcmp(spec1->regex_str, spec2->regex_str))
 				return incomp(spec1, spec2, "regex_str", i, j);
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index 6d1e890..3df7972 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -6,6 +6,14 @@
 
 #include <sys/stat.h>
 
+/*
+ * regex.h/c were introduced to hold all dependencies on the regular
+ * expression back-end when we started supporting PCRE2. regex.h defines a
+ * minimal interface required by libselinux, so that the remaining code
+ * can be agnostic about the underlying implementation.
+ */
+#include "regex.h"
+
 #include "callbacks.h"
 #include "label_internal.h"
 
@@ -19,26 +27,16 @@
 
 #define SELINUX_COMPILED_FCONTEXT_MAX_VERS	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
 
-/* Prior to version 8.20, libpcre did not have pcre_free_study() */
-#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
-#define pcre_free_study  pcre_free
-#endif
-
 /* A file security context specification. */
 struct spec {
 	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
 	char *regex_str;	/* regular expession string for diagnostics */
 	char *type_str;		/* type string for diagnostic messages */
-	pcre *regex;		/* compiled regular expression */
-	union {
-		pcre_extra *sd;	/* pointer to extra compiled stuff */
-		pcre_extra lsd;	/* used to hold the mmap'd version */
-	};
+	struct regex_data * regex; /* backend dependent regular expression data */
 	mode_t mode;		/* mode format value */
 	int matches;		/* number of matching pathnames */
 	int stem_id;		/* indicates which stem-compression item */
 	char hasMetaChars;	/* regular expression has meta-chars */
-	char regcomp;		/* regex_str has been compiled to regex */
 	char from_mmap;		/* this spec is from an mmap of the data */
 	size_t prefix_len;      /* length of fixed path prefix */
 };
@@ -78,17 +76,6 @@ struct saved_data {
 	struct mmap_area *mmap_areas;
 };
 
-static inline pcre_extra *get_pcre_extra(struct spec *spec)
-{
-	if (spec->from_mmap) {
-		if (spec->lsd.study_data)
-			return &spec->lsd;
-		else
-			return NULL;
-	} else
-		return spec->sd;
-}
-
 static inline mode_t string_to_mode(char *mode)
 {
 	size_t len;
@@ -331,15 +318,14 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
 }
 
 static inline int compile_regex(struct saved_data *data, struct spec *spec,
-					    const char **errbuf)
+					    struct regex_error_data *error_data)
 {
-	const char *tmperrbuf;
 	char *reg_buf, *anchored_regex, *cp;
 	struct stem *stem_arr = data->stem_arr;
 	size_t len;
-	int erroff;
+	int rc;
 
-	if (spec->regcomp)
+	if (spec->regex)
 		return 0; /* already done */
 
 	/* Skip the fixed stem. */
@@ -361,25 +347,13 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
 	*cp = '\0';
 
 	/* Compile the regular expression. */
-	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
-						    &erroff, NULL);
+	rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
 	free(anchored_regex);
-	if (!spec->regex) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
-		return -1;
-	}
-
-	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
-	if (!spec->sd && tmperrbuf) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
+	if (rc < 0) {
 		return -1;
 	}
 
 	/* Done. */
-	spec->regcomp = 1;
-
 	return 0;
 }
 
@@ -395,6 +369,8 @@ static inline int process_line(struct selabel_handle *rec,
 	struct spec *spec_arr;
 	unsigned int nspec = data->nspec;
 	const char *errbuf = NULL;
+	char regex_error_format_buffer[256];
+	struct regex_error_data error_data;
 
 	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
 	if (items < 0) {
@@ -453,12 +429,14 @@ static inline int process_line(struct selabel_handle *rec,
 	 */
 	data->nspec++;
 
-	if (rec->validating &&
-			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
+	if (rec->validating
+			&& compile_regex(data, &spec_arr[nspec], &error_data)) {
+		regex_format_error(&error_data, regex_error_format_buffer,
+				sizeof(regex_error_format_buffer));
 		COMPAT_LOG(SELINUX_ERROR,
 			   "%s:  line %u has invalid regex %s:  %s\n",
 			   path, lineno, regex,
-			   (errbuf ? errbuf : "out of memory"));
+			   regex_error_format_buffer);
 		errno = EINVAL;
 		return -1;
 	}
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
new file mode 100644
index 0000000..1c4a84d
--- /dev/null
+++ b/libselinux/src/regex.c
@@ -0,0 +1,496 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "regex.h"
+#include "label_file.h"
+
+#ifdef USE_PCRE2
+struct regex_data {
+	pcre2_code *regex; /* compiled regular expression */
+	/*
+	 * match data block required for the compiled
+	 * pattern in pcre2
+	 */
+	pcre2_match_data *match_data;
+};
+
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata)
+{
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex = pcre2_compile(
+	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
+	    &errordata->error_code, &errordata->error_offset, NULL);
+	if (!(*regex)->regex) {
+		goto err;
+	}
+
+	(*regex)->match_data =
+	    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+	if (!(*regex)->match_data) {
+		goto err;
+	}
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const *regex_version(void)
+{
+	static char version_buf[256];
+	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
+	if (len <= 0 || len > sizeof(version_buf))
+		return NULL;
+
+	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
+	return version_buf;
+}
+
+int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex)
+{
+	int rc;
+	uint32_t entry_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0)
+		return -1;
+
+	if (entry_len) {
+		/*
+		 * this should yield exactly one because we store one pattern at
+		 * a time
+		 */
+		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
+		if (rc != 1)
+			return -1;
+
+		*regex = regex_data_create();
+		if (!*regex)
+			return -1;
+
+		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
+					    (PCRE2_SPTR)mmap_area->next_addr,
+					    NULL);
+		if (rc != 1)
+			goto err;
+
+		(*regex)->match_data =
+		    pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+		if (!(*regex)->match_data)
+			goto err;
+	}
+
+	/* and skip the decoded bit */
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	return 0;
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+int regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
+{
+	int rc = 0;
+	size_t len;
+	PCRE2_SIZE serialized_size;
+	uint32_t to_write = 0;
+	PCRE2_UCHAR *bytes = NULL;
+
+	if (do_write_precompregex) {
+		/* encode the patter for serialization */
+		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
+					    1, &bytes, &serialized_size, NULL);
+		if (rc != 1) {
+			rc = -1;
+			goto out;
+		}
+		to_write = serialized_size;
+	}
+
+	/* write serialized pattern's size */
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1) {
+		rc = -1;
+		goto out;
+	}
+
+	if (do_write_precompregex) {
+		/* write serialized pattern */
+		len = fwrite(bytes, 1, to_write, fp);
+		if (len != to_write)
+			rc = -1;
+	}
+
+out:
+	if (bytes)
+		pcre2_serialize_free(bytes);
+
+	return rc;
+}
+
+void regex_data_free(struct regex_data *regex)
+{
+	if (regex) {
+		if (regex->regex)
+			pcre2_code_free(regex->regex);
+		if (regex->match_data)
+			pcre2_match_data_free(regex->match_data);
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data *regex, char const *subject, int partial)
+{
+	int rc;
+	rc = pcre2_match(
+	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
+	    partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data, NULL);
+	if (rc > 0)
+		return REGEX_MATCH;
+	switch (rc) {
+	case PCRE2_ERROR_PARTIAL:
+		return REGEX_MATCH_PARTIAL;
+	case PCRE2_ERROR_NOMATCH:
+		return REGEX_NO_MATCH;
+	default:
+		return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
+{
+	int rc;
+	size_t len1, len2;
+	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#else // !USE_PCRE2
+
+/* Prior to version 8.20, libpcre did not have pcre_free_study() */
+#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
+#define pcre_free_study pcre_free
+#endif
+
+struct regex_data {
+	int owned;   /*
+		      * non zero if regex and pcre_extra is owned by this
+		      * structure and thus must be freed on destruction.
+		      */
+	pcre *regex; /* compiled regular expression */
+	union {
+		pcre_extra *sd; /* pointer to extra compiled stuff */
+		pcre_extra lsd; /* used to hold the mmap'd version */
+	};
+};
+
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata)
+{
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex =
+	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
+			 &errordata->error_offset, NULL);
+	if (!(*regex)->regex)
+		goto err;
+
+	(*regex)->owned = 1;
+
+	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
+	if (!(*regex)->sd && errordata->error_buffer)
+		goto err;
+
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const *regex_version(void)
+{
+	return pcre_version();
+}
+
+int regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex)
+{
+	int rc;
+	uint32_t entry_len;
+	size_t info_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len)
+		return -1;
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->owned = 0;
+	(*regex)->regex = (pcre *)mmap_area->next_addr;
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	/*
+	 * Check that regex lengths match. pcre_fullinfo()
+	 * also validates its magic number.
+	 */
+	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
+	if (rc < 0 || info_len != entry_len)
+		goto err;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len)
+		goto err;
+
+	if (entry_len) {
+		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
+		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
+		rc = next_entry(NULL, mmap_area, entry_len);
+		if (rc < 0)
+			goto err;
+
+		/* Check that study data lengths match. */
+		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
+				   PCRE_INFO_STUDYSIZE, &info_len);
+		if (rc < 0 || info_len != entry_len)
+			goto err;
+	}
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+static inline pcre_extra *get_pcre_extra(struct regex_data *regex)
+{
+	if (!regex) return NULL;
+	if (regex->owned) {
+		return regex->sd;
+	} else if (regex->lsd.study_data) {
+		return &regex->lsd;
+	} else {
+		return NULL;
+	}
+}
+
+int regex_writef(struct regex_data *regex, FILE *fp)
+{
+	int rc;
+	size_t len;
+	uint32_t to_write;
+	size_t size;
+	pcre_extra *sd = get_pcre_extra(regex);
+
+	/* determine the size of the pcre data in bytes */
+	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
+	if (rc < 0)
+		return -1;
+
+	/* write the number of bytes in the pcre data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	/* write the actual pcre data as a char array */
+	len = fwrite(regex->regex, 1, to_write, fp);
+	if (len != to_write)
+		return -1;
+
+	if (sd) {
+		/* determine the size of the pcre study info */
+		rc =
+		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
+		if (rc < 0)
+			return -1;
+	} else
+		size = 0;
+
+	/* write the number of bytes in the pcre study data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	if (sd) {
+		/* write the actual pcre study data as a char array */
+		len = fwrite(sd->study_data, 1, to_write, fp);
+		if (len != to_write)
+			return -1;
+	}
+
+	return 0;
+}
+
+void regex_data_free(struct regex_data *regex)
+{
+	if (regex) {
+		if (regex->owned) {
+			if (regex->regex)
+				pcre_free(regex->regex);
+			if (regex->sd)
+				pcre_free_study(regex->sd);
+		}
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data *regex, char const *subject, int partial)
+{
+	int rc;
+
+	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
+		       subject, strlen(subject), 0,
+		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
+	switch (rc) {
+	case 0:
+		return REGEX_MATCH;
+	case PCRE_ERROR_PARTIAL:
+		return REGEX_MATCH_PARTIAL;
+	case PCRE_ERROR_NOMATCH:
+		return REGEX_NO_MATCH;
+	default:
+		return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
+{
+	int rc;
+	size_t len1, len2;
+	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#endif
+
+struct regex_data *regex_data_create(void)
+{
+	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
+}
+
+void regex_format_error(struct regex_error_data const *error_data, char *buffer,
+			size_t buf_size)
+{
+	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
+	char *ptr = &buffer[buf_size - the_end_length];
+	int rc = 0;
+	size_t pos = 0;
+	if (!buffer || !buf_size)
+		return;
+	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
+	if (rc < 0)
+		/*
+		 * If snprintf fails it constitutes a logical error that needs
+		 * fixing.
+		 */
+		abort();
+
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+	if (error_data->error_offset > 0) {
+#ifdef USE_PCRE2
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
+			      error_data->error_offset);
+#else
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
+			      error_data->error_offset);
+#endif
+		if (rc < 0)
+			abort();
+	}
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+#ifdef USE_PCRE2
+	rc = pcre2_get_error_message(error_data->error_code,
+				     (PCRE2_UCHAR *)(buffer + pos),
+				     buf_size - pos);
+	if (rc == PCRE2_ERROR_NOMEMORY)
+		goto truncated;
+#else
+	rc = snprintf(buffer + pos, buf_size - pos, "%s",
+		      error_data->error_buffer);
+	if (rc < 0)
+		abort();
+
+	if ((size_t)rc < strlen(error_data->error_buffer))
+		goto truncated;
+#endif
+
+	return;
+
+truncated:
+	/* replace end of string with "..." to indicate that it was truncated */
+	switch (the_end_length) {
+	/* no break statements, fall-through is intended */
+	case 4:
+		*ptr++ = '.';
+	case 3:
+		*ptr++ = '.';
+	case 2:
+		*ptr++ = '.';
+	case 1:
+		*ptr++ = '\0';
+	default:
+		break;
+	}
+	return;
+}
diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
new file mode 100644
index 0000000..810b3c5
--- /dev/null
+++ b/libselinux/src/regex.h
@@ -0,0 +1,149 @@
+#ifndef SRC_REGEX_H_
+#define SRC_REGEX_H_
+
+#include <stdio.h>
+
+#ifdef USE_PCRE2
+#include <pcre2.h>
+#else
+#include <pcre.h>
+#endif
+
+#include "dso.h"
+
+enum { REGEX_MATCH,
+       REGEX_MATCH_PARTIAL,
+       REGEX_NO_MATCH,
+       REGEX_ERROR = -1,
+};
+
+struct regex_data;
+
+#ifdef USE_PCRE2
+struct regex_error_data {
+	int error_code;
+	PCRE2_SIZE error_offset;
+};
+#else
+struct regex_error_data {
+	char const *error_buffer;
+	int error_offset;
+};
+#endif
+
+struct mmap_area;
+
+/**
+ * regex_verison returns the version string of the underlying regular
+ * regular expressions library. In the case of PCRE it just returns the
+ * result of pcre_version(). In the case of PCRE2, the very first time this
+ * function is called it allocates a buffer large enough to hold the version
+ * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
+ * The allocated buffer will linger in memory until the calling process is being
+ * reaped.
+ *
+ * It may return NULL on error.
+ */
+char const *regex_version(void) hidden;
+/**
+ * This constructor function allocates a buffer for a regex_data structure.
+ * The buffer is being initialized with zeroes.
+ */
+struct regex_data *regex_data_create(void) hidden;
+/**
+ * This complementary destructor function frees the a given regex_data buffer.
+ * It also frees any non NULL member pointers with the appropriate pcreX_X_free
+ * function. For PCRE this function respects the extra_owned field and frees
+ * the pcre_extra data conditionally. Calling this function on a NULL pointer is
+ * save.
+ */
+void regex_data_free(struct regex_data *regex) hidden;
+/**
+ * This function compiles the regular expression. Additionally, it prepares
+ * data structures required by the different underlying engines. For PCRE
+ * it calls pcre_study to generate optional data required for optimized
+ * execution of the compiled pattern. In the case of PCRE2, it allocates
+ * a pcre2_match_data structure of appropriate size to hold all possible
+ * matches created by the pattern.
+ *
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ * @arg pattern_string The pattern string that is to be compiled.
+ * @arg errordata A pointer to a regex_error_data structure must be passed
+ *                to this function. This structure depends on the underlying
+ *                implementation. It can be passed to regex_format_error
+ *                to generate a human readable error message.
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_prepare_data(struct regex_data **regex, char const *pattern_string,
+		       struct regex_error_data *errordata) hidden;
+/**
+ * This function loads a serialized precompiled pattern from a contiguous
+ * data region given by map_area.
+ *
+ * @arg map_area Description of the memory region holding a serialized
+ *               representation of the precompiled pattern.
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ *
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_load_mmap(struct mmap_area *map_area,
+		    struct regex_data **regex) hidden;
+/**
+ * This function stores a precompiled regular expression to a file.
+ * In the case of PCRE, it just dumps the binary representation of the
+ * precomplied pattern into a file. In the case of PCRE2, it uses the
+ * serialization function provided by the library.
+ *
+ * @arg regex The precomplied regular expression data.
+ * @arg fp A file stream specifying the output file.
+ * @arg do_write_precompregex If non-zero precompiled patterns are written to
+ *			      the output file (ignored by PCRE1 back-end).
+ */
+int regex_writef(struct regex_data *regex, FILE *fp,
+		 int do_write_precompregex) hidden;
+/**
+ * This function applies a precompiled pattern to a subject string and
+ * returns whether or not a match was found.
+ *
+ * @arg regex The precompiled pattern.
+ * @arg subject The subject string.
+ * @arg partial Boolean indicating if partial matches are wanted. A nonzero
+ *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
+ *              option to pcre_exec of pcre2_match.
+ * @retval REGEX_MATCH if a match was found
+ * @retval REGEX_MATCH_PARTIAL if a partial match was found
+ * @retval REGEX_NO_MATCH if no match was found
+ * @retval REGEX_ERROR if an error was encountered during the execution of the
+ *                     regular expression
+ */
+int regex_match(struct regex_data *regex, char const *subject,
+		int partial) hidden;
+/**
+ * This function compares two compiled regular expressions (regex1 and regex2).
+ * It compares the binary representations of the compiled patterns. It is a very
+ * crude approximation because the binary representation holds data like
+ * reference counters, that has nothing to do with the actual state machine.
+ *
+ * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
+ *                       the same
+ * @retval SELABEL_INCOMPARABLE otherwise
+ */
+int regex_cmp(struct regex_data *regex1, struct regex_data *regex2) hidden;
+/**
+ * This function takes the error data returned by regex_prepare_data and turns
+ * it in to a human readable error message.
+ * If the buffer given to hold the error message is to small it truncates the
+ * message and indicates the truncation with an ellipsis ("...") at the end of
+ * the buffer.
+ *
+ * @arg error_data Error data as returned by regex_prepare_data.
+ * @arg buffer String buffer to hold the formated error string.
+ * @arg buf_size Total size of the given bufer in bytes.
+ */
+void regex_format_error(struct regex_error_data const *error_data, char *buffer,
+			size_t buf_size) hidden;
+#endif /* SRC_REGEX_H_ */
diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
index 8497cb4..1943bef 100644
--- a/libselinux/utils/Makefile
+++ b/libselinux/utils/Makefile
@@ -24,12 +24,15 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 LDLIBS += -L../src -lselinux -L$(LIBDIR)
+PCRE_LDFLAGS ?= -lpcre
 
 TARGETS=$(patsubst %.c,%,$(wildcard *.c))
 
-sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
+sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
+
+sefcontext_compile: sefcontext_compile.o ../src/regex.o
 
 selinux_restorecon: LDLIBS += -lsepol
 
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index fd6fb78..770ec4c 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,5 @@
 #include <ctype.h>
 #include <errno.h>
-#include <pcre.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -13,6 +12,7 @@
 #include <sepol/sepol.h>
 
 #include "../src/label_file.h"
+#include "../src/regex.h"
 
 const char *policy_file;
 static int ctx_err;
@@ -92,7 +92,8 @@ out:
  *	u32  - data length of the pcre regex study daya
  *	char - a buffer holding the raw pcre regex study data
  */
-static int write_binary_file(struct saved_data *data, int fd)
+static int write_binary_file(struct saved_data *data, int fd,
+			     int do_write_precompregex)
 {
 	struct spec *specs = data->spec_arr;
 	FILE *bin_file;
@@ -101,6 +102,7 @@ static int write_binary_file(struct saved_data *data, int fd)
 	uint32_t section_len;
 	uint32_t i;
 	int rc;
+	const char *reg_version;
 
 	bin_file = fdopen(fd, "w");
 	if (!bin_file) {
@@ -119,12 +121,15 @@ static int write_binary_file(struct saved_data *data, int fd)
 	if (len != 1)
 		goto err;
 
-	/* write the pcre version */
-	section_len = strlen(pcre_version());
+	/* write version of the regex back-end */
+	reg_version = regex_version();
+	if (!reg_version)
+		goto err;
+	section_len = strlen(reg_version);
 	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
 	if (len != 1)
 		goto err;
-	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
+	len = fwrite(reg_version, sizeof(char), section_len, bin_file);
 	if (len != section_len)
 		goto err;
 
@@ -162,10 +167,8 @@ static int write_binary_file(struct saved_data *data, int fd)
 		mode_t mode = specs[i].mode;
 		size_t prefix_len = specs[i].prefix_len;
 		int32_t stem_id = specs[i].stem_id;
-		pcre *re = specs[i].regex;
-		pcre_extra *sd = get_pcre_extra(&specs[i]);
+		struct regex_data *re = specs[i].regex;
 		uint32_t to_write;
-		size_t size;
 
 		/* length of the context string (including nul) */
 		to_write = strlen(context) + 1;
@@ -212,42 +215,10 @@ static int write_binary_file(struct saved_data *data, int fd)
 		if (len != 1)
 			goto err;
 
-		/* determine the size of the pcre data in bytes */
-		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
+		/* Write regex related data */
+		rc = regex_writef(re, bin_file, do_write_precompregex);
 		if (rc < 0)
 			goto err;
-
-		/* write the number of bytes in the pcre data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		/* write the actual pcre data as a char array */
-		len = fwrite(re, 1, to_write, bin_file);
-		if (len != to_write)
-			goto err;
-
-		if (sd) {
-			/* determine the size of the pcre study info */
-			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
-			if (rc < 0)
-				goto err;
-		} else
-			size = 0;
-
-		/* write the number of bytes in the pcre study data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		if (sd) {
-			/* write the actual pcre study data as a char array */
-			len = fwrite(sd->study_data, 1, to_write, bin_file);
-			if (len != to_write)
-				goto err;
-		}
 	}
 
 	rc = 0;
@@ -270,8 +241,7 @@ static void free_specs(struct saved_data *data)
 		free(specs[i].lr.ctx_trans);
 		free(specs[i].regex_str);
 		free(specs[i].type_str);
-		pcre_free(specs[i].regex);
-		pcre_free_study(specs[i].sd);
+		regex_data_free(specs[i].regex);
 	}
 	free(specs);
 
@@ -293,6 +263,12 @@ static void usage(const char *progname)
 	    "         will be fc_file with the .bin suffix appended.\n\t"
 	    "-p       Optional binary policy file that will be used to\n\t"
 	    "         validate contexts defined in the fc_file.\n\t"
+	    "-r       Include precompiled regular expressions in the output.\n\t"
+	    "         (PCRE2 only. Compiled PCRE2 regular expressions are\n\t"
+	    "         not portable across architectures. When linked against\n\t"
+	    "         PCRE this flag is ignored)\n\t"
+	    "         Omit precompiled regular expressions (only meaningful\n\t"
+	    "         when using PCRE2 regular expression back-end).\n\t"
 	    "fc_file  The text based file contexts file to be processed.\n",
 	    progname);
 		exit(EXIT_FAILURE);
@@ -302,6 +278,7 @@ int main(int argc, char *argv[])
 {
 	const char *path = NULL;
 	const char *out_file = NULL;
+	int do_write_precompregex = 0;
 	char stack_path[PATH_MAX + 1];
 	char *tmp = NULL;
 	int fd, rc, opt;
@@ -313,7 +290,7 @@ int main(int argc, char *argv[])
 	if (argc < 2)
 		usage(argv[0]);
 
-	while ((opt = getopt(argc, argv, "o:p:")) > 0) {
+	while ((opt = getopt(argc, argv, "o:p:r")) > 0) {
 		switch (opt) {
 		case 'o':
 			out_file = optarg;
@@ -321,6 +298,9 @@ int main(int argc, char *argv[])
 		case 'p':
 			policy_file = optarg;
 			break;
+		case 'r':
+			do_write_precompregex = 1;
+			break;
 		default:
 			usage(argv[0]);
 		}
@@ -418,7 +398,7 @@ int main(int argc, char *argv[])
 		goto err_unlink;
 	}
 
-	rc = write_binary_file(data, fd);
+	rc = write_binary_file(data, fd, do_write_precompregex);
 	if (rc < 0)
 		goto err_unlink;
 
diff --git a/policycoreutils/restorecond/Makefile b/policycoreutils/restorecond/Makefile
index f99e1e7..253246d 100644
--- a/policycoreutils/restorecond/Makefile
+++ b/policycoreutils/restorecond/Makefile
@@ -17,7 +17,14 @@ DBUSLIB = -ldbus-glib-1 -ldbus-1
 CFLAGS ?= -g -Werror -Wall -W
 override CFLAGS += -I$(PREFIX)/include $(DBUSFLAGS) -I/usr/include/glib-2.0 -I/usr/lib64/glib-2.0/include -I/usr/lib/glib-2.0/include
 
-LDLIBS += -lselinux $(DBUSLIB) -lglib-2.0 -L$(LIBDIR)
+USE_PCRE2 ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+
+LDLIBS += -lselinux $(PCRE_LDFLAGS) $(DBUSLIB) -lglib-2.0 -L$(LIBDIR)
 
 all: restorecond
 
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* RE: [PATCH] libselinux: add support for pcre2
  2016-09-08 15:52 Janis Danisevskis
  2016-09-08 17:51 ` Stephen Smalley
@ 2016-09-08 18:56 ` Roberts, William C
  1 sibling, 0 replies; 24+ messages in thread
From: Roberts, William C @ 2016-09-08 18:56 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, sds, jwcart2



> -----Original Message-----
> From: Janis Danisevskis [mailto:jdanis@android.com]
> Sent: Thursday, September 8, 2016 8:52 AM
> To: selinux@tycho.nsa.gov; seandroid-list@tycho.nsa.gov; sds@tycho.nsa.gov;
> jwcart2@tycho.nsa.gov
> Cc: Janis Danisevskis <jdanis@google.com>; Roberts, William C
> <william.c.roberts@intel.com>
> Subject: [PATCH] libselinux: add support for pcre2
> 
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h and regex.c
> implementing the common denominator of features needed by libselinux. The
> compiler flag -DUSE_PCRE2 toggles between the used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not both at the same
> time. The persistently stored file contexts information differs. This means
> libselinux can only load file context files generated by sefcontext_compile build
> with the same pcre variant.
> 
> Also, for pcre2 the persistent format is architecture dependent.
> Stored precompiled regular expressions can only be used on the same
> architecture they were generated on. If pcre2 is used and sefcontext_compile
> shall generate portable output, it and libselinux must be compiled with -
> DNO_PERSISTENTLY_STORED_PATTERNS, at the cost of having to recompile the
> regular expressions at load time.
> 
> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> 
> This patch includes includes:

Double includes

> 
> libselinux: fix memory leak on pcre2
> 
> Introduced a malloc on pcre_version(). Libselinux expected this to be static, just
> use a static internal buffer.
> 
> Signed-off-by: William Roberts <william.c.roberts@intel.com>

You can remove any attribution since its squashed down, this really doesn't
Apply, so don't feel obligated to include any of this information.

> ---
>  libselinux/Makefile                   |  13 +
>  libselinux/src/Makefile               |   4 +-
>  libselinux/src/label_file.c           |  93 ++-----
>  libselinux/src/label_file.h           |  59 ++---
>  libselinux/src/regex.c                | 461 ++++++++++++++++++++++++++++++++++
>  libselinux/src/regex.h                | 169 +++++++++++++
>  libselinux/utils/Makefile             |   4 +-
>  libselinux/utils/sefcontext_compile.c |  55 +---
>  8 files changed, 697 insertions(+), 161 deletions(-)  create mode 100644
> libselinux/src/regex.c  create mode 100644 libselinux/src/regex.h
> 
> diff --git a/libselinux/Makefile b/libselinux/Makefile index 6142b60..15d051e
> 100644
> --- a/libselinux/Makefile
> +++ b/libselinux/Makefile
> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)  endif  export DISABLE_AVC
> DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
> 
> +USE_PCRE2 ?= n
> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n ifeq ($(USE_PCRE2),y)
> +	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> +	ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> +		PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> +	endif
> +	PCRE_LDFLAGS := -lpcre2-8
> +else
> +	PCRE_LDFLAGS := -lpcre
> +endif
> +export PCRE_CFLAGS PCRE_LDFLAGS
> +
>  all install relabel clean distclean indent:
>  	@for subdir in $(SUBDIRS); do \
>  		(cd $$subdir && $(MAKE) $@) || exit 1; \ diff --git
> a/libselinux/src/Makefile b/libselinux/src/Makefile index 37d01af..66687e6
> 100644
> --- a/libselinux/src/Makefile
> +++ b/libselinux/src/Makefile
> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-
> security -Winit-self -Wmissi
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-
> attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> 
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> +$(EMFLAGS) $(PCRE_CFLAGS)
> 
>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-
> variable -Wno-unused-parameter \
>  		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -
> Wno-missing-declarations @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>  	$(RANLIB) $@
> 
>  $(LIBSO): $(LOBJS)
> -	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-
> soname,$(LIBSO),-z,defs,-z,relro
> +	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS)
> +-L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>  	ln -sf $@ $(TARGET)
> 
>  $(LIBPC): $(LIBPC).in ../VERSION
> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c index
> c89bb35..e41c351 100644
> --- a/libselinux/src/label_file.c
> +++ b/libselinux/src/label_file.c
> @@ -15,7 +15,6 @@
>  #include <errno.h>
>  #include <limits.h>
>  #include <stdint.h>
> -#include <pcre.h>
>  #include <unistd.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
> @@ -112,6 +111,7 @@ static int load_mmap(struct selabel_handle *rec, const
> char *path,
>  	struct mmap_area *mmap_area;
>  	uint32_t i, magic, version;
>  	uint32_t entry_len, stem_map_len, regex_array_len;
> +	const char *reg_version;
> 
>  	if (isbinary) {
>  		len = strlen(path);
> @@ -175,8 +175,13 @@ static int load_mmap(struct selabel_handle *rec, const
> char *path,
>  	if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS)
>  		return -1;
> 
> +	reg_version = regex_version();
> +	if (!reg_version)
> +		return -1;
> +
>  	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> -		len = strlen(pcre_version());
> +
> +		len = strlen(reg_version);
> 
>  		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>  		if (rc < 0)
> @@ -198,7 +203,7 @@ static int load_mmap(struct selabel_handle *rec, const
> char *path,
>  		}
> 
>  		str_buf[entry_len] = '\0';
> -		if ((strcmp(str_buf, pcre_version()) != 0)) {
> +		if ((strcmp(str_buf, reg_version) != 0)) {
>  			free(str_buf);
>  			return -1;
>  		}
> @@ -278,7 +283,6 @@ static int load_mmap(struct selabel_handle *rec, const
> char *path,
> 
>  		spec = &data->spec_arr[data->nspec];
>  		spec->from_mmap = 1;
> -		spec->regcomp = 1;
> 
>  		/* Process context */
>  		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t)); @@ -
> 364,47 +368,10 @@ static int load_mmap(struct selabel_handle *rec, const char
> *path,
>  			spec->prefix_len = prefix_len;
>  		}
> 
> -		/* Process regex and study_data entries */
> -		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -		if (rc < 0 || !entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -		spec->regex = (pcre *)mmap_area->next_addr;
> -		rc = next_entry(NULL, mmap_area, entry_len);
> +		rc = regex_load_mmap(mmap_area, &spec->regex);
>  		if (rc < 0)
>  			goto err;
> 
> -		/* Check that regex lengths match. pcre_fullinfo()
> -		 * also validates its magic number. */
> -		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
> -		if (rc < 0 || len != entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -
> -		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -		if (rc < 0 || !entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -
> -		if (entry_len) {
> -			spec->lsd.study_data = (void *)mmap_area->next_addr;
> -			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> -			rc = next_entry(NULL, mmap_area, entry_len);
> -			if (rc < 0)
> -				goto err;
> -
> -			/* Check that study data lengths match. */
> -			rc = pcre_fullinfo(spec->regex, &spec->lsd,
> -					   PCRE_INFO_STUDYSIZE, &len);
> -			if (rc < 0 || len != entry_len) {
> -				rc = -1;
> -				goto err;
> -			}
> -		}
> -
>  		data->nspec++;
>  	}
> 
> @@ -605,14 +572,11 @@ static void closef(struct selabel_handle *rec)
>  		spec = &data->spec_arr[i];
>  		free(spec->lr.ctx_trans);
>  		free(spec->lr.ctx_raw);
> +		regex_data_free(spec->regex);
>  		if (spec->from_mmap)
>  			continue;
>  		free(spec->regex_str);
>  		free(spec->type_str);
> -		if (spec->regcomp) {
> -			pcre_free(spec->regex);
> -			pcre_free_study(spec->sd);
> -		}
>  	}
> 
>  	for (i = 0; i < (unsigned int)data->num_stems; i++) { @@ -644,13 +608,14
> @@ static struct spec *lookup_common(struct selabel_handle *rec,  {
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr = data->spec_arr;
> -	int i, rc, file_stem, pcre_options = 0;
> +	int i, rc, file_stem;
>  	mode_t mode = (mode_t)type;
>  	const char *buf;
>  	struct spec *ret = NULL;
>  	char *clean_key = NULL;
>  	const char *prev_slash, *next_slash;
>  	unsigned int sofar = 0;
> +	struct regex_error_data regex_error_data;
> 
>  	if (!data->nspec) {
>  		errno = ENOENT;
> @@ -677,9 +642,6 @@ static struct spec *lookup_common(struct selabel_handle
> *rec,
>  	file_stem = find_stem_from_file(data, &buf);
>  	mode &= S_IFMT;
> 
> -	if (partial)
> -		pcre_options |= PCRE_PARTIAL_SOFT;
> -
>  	/*
>  	 * Check for matching specifications in reverse order, so that
>  	 * the last matching specification is used.
> @@ -692,25 +654,19 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
>  		 * a regex check        */
>  		if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
>  		    (!mode || !spec->mode || mode == spec->mode)) {
> -			if (compile_regex(data, spec, NULL) < 0)
> +			if (compile_regex(data, spec, &regex_error_data) < 0)
>  				goto finish;
>  			if (spec->stem_id == -1)
> -				rc = pcre_exec(spec->regex,
> -						    get_pcre_extra(spec),
> -						    key, strlen(key), 0,
> -						    pcre_options, NULL, 0);
> +				rc = regex_match(spec->regex, key, partial);
>  			else
> -				rc = pcre_exec(spec->regex,
> -						    get_pcre_extra(spec),
> -						    buf, strlen(buf), 0,
> -						    pcre_options, NULL, 0);
> -			if (rc == 0) {
> +				rc = regex_match(spec->regex, buf, partial);
> +			if (rc == REGEX_MATCH) {
>  				spec->matches++;
>  				break;
> -			} else if (partial && rc == PCRE_ERROR_PARTIAL)
> +			} else if (partial && rc == REGEX_MATCH_PARTIAL)
>  				break;
> 
> -			if (rc == PCRE_ERROR_NOMATCH)
> +			if (rc == REGEX_NO_MATCH)
>  				continue;
> 
>  			errno = ENOENT;
> @@ -849,17 +805,10 @@ static enum selabel_cmp_result cmp(struct
> selabel_handle *h1,
>  			continue;
>  		}
> 
> -		if (spec1->regcomp && spec2->regcomp) {
> -			size_t len1, len2;
> -			int rc;
> -
> -			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE,
> &len1);
> -			assert(rc == 0);
> -			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE,
> &len2);
> -			assert(rc == 0);
> -			if (len1 != len2 ||
> -			    memcmp(spec1->regex, spec2->regex, len1))
> +		if (spec1->regex && spec2->regex) {
> +			if (regex_cmp(spec1->regex, spec2->regex) ==
> SELABEL_INCOMPARABLE){
>  				return incomp(spec1, spec2, "regex", i, j);
> +			}
>  		} else {
>  			if (strcmp(spec1->regex_str, spec2->regex_str))
>  				return incomp(spec1, spec2, "regex_str", i, j); diff
> --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h index
> 6d1e890..24cb9e0 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -6,6 +6,14 @@
> 
>  #include <sys/stat.h>
> 
> +/*
> + * regex.h/c were introduced to hold all dependencies on the regular
> + * expression back-end when we started supporting PCRE2. regex.h
> +defines a
> + * minimal interface required by libselinux, so that the remaining code
> + * can be agnostic about the underlying implementation.
> + */
> +#include "regex.h"
> +
>  #include "callbacks.h"
>  #include "label_internal.h"
> 
> @@ -19,26 +27,16 @@
> 
>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS
> 	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
> 
> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */ -#if
> (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) -#define
> pcre_free_study  pcre_free -#endif
> -
>  /* A file security context specification. */  struct spec {
>  	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
>  	char *regex_str;	/* regular expession string for diagnostics */
>  	char *type_str;		/* type string for diagnostic messages */
> -	pcre *regex;		/* compiled regular expression */
> -	union {
> -		pcre_extra *sd;	/* pointer to extra compiled stuff */
> -		pcre_extra lsd;	/* used to hold the mmap'd version */
> -	};
> +	struct regex_data * regex; /* backend dependent regular expression
> +data */
>  	mode_t mode;		/* mode format value */
>  	int matches;		/* number of matching pathnames */
>  	int stem_id;		/* indicates which stem-compression item */
>  	char hasMetaChars;	/* regular expression has meta-chars */
> -	char regcomp;		/* regex_str has been compiled to regex */
>  	char from_mmap;		/* this spec is from an mmap of the data
> */
>  	size_t prefix_len;      /* length of fixed path prefix */
>  };
> @@ -78,17 +76,6 @@ struct saved_data {
>  	struct mmap_area *mmap_areas;
>  };
> 
> -static inline pcre_extra *get_pcre_extra(struct spec *spec) -{
> -	if (spec->from_mmap) {
> -		if (spec->lsd.study_data)
> -			return &spec->lsd;
> -		else
> -			return NULL;
> -	} else
> -		return spec->sd;
> -}
> -
>  static inline mode_t string_to_mode(char *mode)  {
>  	size_t len;
> @@ -331,15 +318,14 @@ static inline int next_entry(void *buf, struct mmap_area
> *fp, size_t bytes)  }
> 
>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
> -					    const char **errbuf)
> +					    struct regex_error_data * error_data)
>  {
> -	const char *tmperrbuf;
>  	char *reg_buf, *anchored_regex, *cp;
>  	struct stem *stem_arr = data->stem_arr;
>  	size_t len;
> -	int erroff;
> +	int rc;
> 
> -	if (spec->regcomp)
> +	if (spec->regex)
>  		return 0; /* already done */
> 
>  	/* Skip the fixed stem. */
> @@ -361,25 +347,13 @@ static inline int compile_regex(struct saved_data *data,
> struct spec *spec,
>  	*cp = '\0';
> 
>  	/* Compile the regular expression. */
> -	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL,
> &tmperrbuf,
> -						    &erroff, NULL);
> +	rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
>  	free(anchored_regex);
> -	if (!spec->regex) {
> -		if (errbuf)
> -			*errbuf = tmperrbuf;
> -		return -1;
> -	}
> -
> -	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> -	if (!spec->sd && tmperrbuf) {
> -		if (errbuf)
> -			*errbuf = tmperrbuf;
> +	if (rc < 0) {
>  		return -1;
>  	}
> 
>  	/* Done. */
> -	spec->regcomp = 1;
> -
>  	return 0;
>  }
> 
> @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec,
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr;
>  	unsigned int nspec = data->nspec;
> -	const char *errbuf = NULL;
> +	char const *errbuf;
> +	struct regex_error_data error_data;
> 
>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type,
> &context);
>  	if (items < 0) {
> @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec,
>  	data->nspec++;
> 
>  	if (rec->validating &&
> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {
>  		COMPAT_LOG(SELINUX_ERROR,
>  			   "%s:  line %u has invalid regex %s:  %s\n",
>  			   path, lineno, regex,
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c new file mode 100644
> index 0000000..558a72a
> --- /dev/null
> +++ b/libselinux/src/regex.c
> @@ -0,0 +1,461 @@
> +#include <assert.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "regex.h"
> +#include "label_file.h"
> +
> +#ifdef USE_PCRE2
> +int regex_prepare_data(struct regex_data ** regex,
> +			     char const * pattern_string,
> +			     struct regex_error_data * errordata) {
> +	memset(errordata, 0, sizeof(struct regex_error_data));
> +
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> +			PCRE2_ZERO_TERMINATED,
> +			PCRE2_DOTALL,
> +			&errordata->error_code,
> +			&errordata->error_offset, NULL);
> +	if (!(*regex)->regex) {
> +		goto err;
> +	}
> +
> +	(*regex)->match_data =
> +		pcre2_match_data_create_from_pattern((*regex)->regex,
> NULL);
> +	if (!(*regex)->match_data) {
> +		goto err;
> +	}
> +	return 0;
> +
> +err:	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +char const * regex_version(void) {
> +	static char version_buf[256];
> +	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
> +	if (len <= 0 || len > sizeof(version_buf))
> +		return NULL;
> +
> +	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
> +	return version_buf;
> +}
> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data **
> regex) {
> +	int rc;
> +	size_t entry_len;
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0)
> +		return -1;
> +
> +	if (entry_len) {
> +		/*
> +		 * this should yield exactly one because we store one pattern at a
> time
> +		 */
> +		rc = pcre2_serialize_get_number_of_codes(mmap_area-
> >next_addr);
> +		if (rc != 1)
> +			return -1;
> +
> +		*regex = regex_data_create();
> +		if (!*regex)
> +			return -1;
> +
> +		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> +				(PCRE2_SPTR)mmap_area->next_addr, NULL);
> +		if (rc != 1)
> +			goto err;
> +
> +		(*regex)->match_data =
> +			pcre2_match_data_create_from_pattern((*regex)-
> >regex, NULL);
> +		if (!(*regex)->match_data)
> +			goto err;
> +	}
> +
> +	/* and skip the decoded bit */
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	return 0;
> +err:
> +	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +	int rc = 0;
> +	size_t len;
> +	PCRE2_SIZE to_write;
> +	PCRE2_UCHAR * bytes = NULL;
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +	int do_write_patterns = 1;
> +#else
> +	int do_write_patterns = 0;
> +#endif
> +
> +	if (do_write_patterns) {
> +		/* encode the patter for serialization */
> +		rc = pcre2_serialize_encode((const pcre2_code **)&regex-
> >regex, 1,
> +					    &bytes, &to_write, NULL);
> +		if (rc != 1) {
> +			rc = -1;
> +			goto err;
> +		}
> +	} else {
> +		to_write = 0;
> +	}
> +
> +	/* write serialized pattern's size */
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1) {
> +		rc = -1;
> +		goto err;
> +	}
> +
> +	if (do_write_patterns) {
> +		/* write serialized pattern */
> +		len = fwrite(bytes, 1, to_write, fp);
> +		if (len != to_write) {
> +			rc = -1;
> +		}
> +	}
> +
> +err:
> +	if (bytes)
> +		pcre2_serialize_free(bytes);
> +
> +	return rc;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +		if (regex->regex) {
> +			pcre2_code_free(regex->regex);
> +		}
> +		if (regex->match_data) {
> +			pcre2_match_data_free(regex->match_data);
> +		}
> +		free(regex);
> +	}
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +	int rc;
> +	rc = pcre2_match(regex->regex,
> +			(PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> +			partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
> +			NULL);
> +	if (rc > 0)
> +	return REGEX_MATCH;
> +	switch (rc) {
> +		case PCRE2_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE2_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +}
> +
> +/*
> + * TODO Replace this compare function with something that actually
> +compares the
> + * regular expressions.
> + * This compare function basically just compares the binary
> +representations of
> + * the automatons, and because this representation contains pointers
> +and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that
> +computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +	int rc;
> +	size_t len1, len2;
> +	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +
> +	return SELABEL_EQUAL;
> +}
> +
> +#else // !USE_PCRE2
> +
> +int regex_prepare_data(struct regex_data ** regex,
> +			    char const * pattern_string,
> +			    struct regex_error_data * errordata) {
> +	memset(errordata, 0, sizeof(struct regex_error_data));
> +
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> +					&errordata->error_buffer,
> +					&errordata->error_offset, NULL);
> +	if (!(*regex)->regex) {
> +		goto err;
> +	}
> +	(*regex)->owned = 1;
> +
> +	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata-
> >error_buffer);
> +	if (!(*regex)->sd && errordata->error_buffer) {
> +		goto err;
> +	}
> +
> +	return 0;
> +
> +err:	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +char const * regex_version(void) {
> +	return pcre_version();
> +}
> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data **
> regex) {
> +	int rc;
> +	size_t entry_len, info_len;
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0 || !entry_len) {
> +		return -1;
> +	}
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->owned = 0;
> +	(*regex)->regex = (pcre *) mmap_area->next_addr;
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	/*
> +	 * Check that regex lengths match. pcre_fullinfo()
> +	 * also validates its magic number.
> +	 */
> +	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
> +	if (rc < 0 || info_len != entry_len) {
> +		goto err;
> +	}
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0 || !entry_len) {
> +		goto err;
> +	}
> +
> +	if (entry_len) {
> +		(*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> +		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> +		rc = next_entry(NULL, mmap_area, entry_len);
> +		if (rc < 0)
> +			goto err;
> +
> +		/* Check that study data lengths match. */
> +		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> +				   PCRE_INFO_STUDYSIZE, &info_len);
> +		if (rc < 0 || info_len != entry_len)
> +			goto err;
> +	}
> +	return 0;
> +
> +err:
> +	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +	int rc;
> +	size_t len;
> +	uint32_t to_write;
> +	size_t size;
> +	pcre_extra * sd = regex->owned ? regex->sd :
> +			(regex->lsd.study_data ? &regex->lsd : NULL);
> +
> +	/* determine the size of the pcre data in bytes */
> +	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> +	if (rc < 0)
> +		return -1;
> +
> +	/* write the number of bytes in the pcre data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	/* write the actual pcre data as a char array */
> +	len = fwrite(regex->regex, 1, to_write, fp);
> +	if (len != to_write)
> +		return -1;
> +
> +	if (sd) {
> +		/* determine the size of the pcre study info */
> +		rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> +				&size);
> +		if (rc < 0)
> +			return -1;
> +	} else
> +		size = 0;
> +
> +	/* write the number of bytes in the pcre study data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	if (sd) {
> +		/* write the actual pcre study data as a char array */
> +		len = fwrite(sd->study_data, 1, to_write, fp);
> +		if (len != to_write)
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +		if (regex->owned) {
> +			if (regex->regex)
> +				pcre_free(regex->regex);
> +			if (regex->sd)
> +				pcre_free_study(regex->sd);
> +		}
> +		free(regex);
> +	}
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +	int rc;
> +
> +	rc = pcre_exec(regex->regex, regex->owned ? regex->sd : &regex->lsd,
> +			subject, strlen(subject), 0,
> +			partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
> +	switch (rc) {
> +		case 0:
> +			return REGEX_MATCH;
> +		case PCRE_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +}
> +
> +/*
> + * TODO Replace this compare function with something that actually
> +compares the
> + * regular expressions.
> + * This compare function basically just compares the binary
> +representations of
> + * the automatons, and because this representation contains pointers
> +and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that
> +computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +	int rc;
> +	size_t len1, len2;
> +	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +
> +	return SELABEL_EQUAL;
> +}
> +
> +#endif
> +
> +
> +struct regex_data * regex_data_create(void) {
> +	struct regex_data * dummy = (struct regex_data*) malloc(
> +			sizeof(struct regex_data));
> +	if (dummy) {
> +		memset(dummy, 0, sizeof(struct regex_data));
> +	}
> +	return dummy;
> +}
> +
> +void regex_format_error(struct regex_error_data const * error_data,
> +			char * buffer, size_t buf_size) {
> +	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> +	char * ptr = &buffer[buf_size - the_end_length];
> +	int rc = 0;
> +	size_t pos = 0;
> +	if (!buffer || !buf_size)
> +		return;
> +	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> +	if (rc < 0)
> +		/*
> +		 * If snprintf fails it constitutes a logical error that needs
> +		 * fixing.
> +		 */
> +		abort();
> +
> +	pos += rc;
> +	if (pos >= buf_size)
> +		goto truncated;
> +
> +	if (error_data->error_offset > 0) {
> +#ifdef USE_PCRE2
> +		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
> +				error_data->error_offset);
> +#else
> +		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
> +				error_data->error_offset);
> +#endif
> +		if (rc < 0)
> +			abort();
> +
> +	}
> +	pos += rc;
> +	if (pos >= buf_size)
> +		goto truncated;
> +
> +#ifdef USE_PCRE2
> +	rc = pcre2_get_error_message(error_data->error_code,
> +			(PCRE2_UCHAR*)(buffer + pos),
> +			buf_size - pos);
> +	if (rc == PCRE2_ERROR_NOMEMORY)
> +		goto truncated;
> +#else
> +	rc = snprintf(buffer + pos, buf_size - pos, "%s",
> +			error_data->error_buffer);
> +	if (rc < 0)
> +		abort();
> +
> +	if ((size_t)rc < strlen(error_data->error_buffer))
> +		goto truncated;
> +#endif
> +
> +	return;
> +
> +truncated:
> +	/* replace end of string with "..." to indicate that it was truncated */
> +	switch (the_end_length) {
> +		/* no break statements, fall-through is intended */
> +		case 4:
> +			*ptr++ = '.';
> +		case 3:
> +			*ptr++ = '.';
> +		case 2:
> +			*ptr++ = '.';
> +		case 1:
> +			*ptr++ = '\0';
> +		default:
> +			break;
> +	}
> +	return;
> +}
> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h new file mode 100644
> index 0000000..6a113e8
> --- /dev/null
> +++ b/libselinux/src/regex.h
> @@ -0,0 +1,169 @@
> +#ifndef SRC_REGEX_H_
> +#define SRC_REGEX_H_
> +
> +#include <stdio.h>
> +
> +#ifdef USE_PCRE2
> +#include <pcre2.h>
> +#else
> +#include <pcre.h>
> +#endif
> +
> +enum {
> +	REGEX_MATCH,
> +	REGEX_MATCH_PARTIAL,
> +	REGEX_NO_MATCH,
> +	REGEX_ERROR = -1,
> +};
> +
> +#ifdef USE_PCRE2
> +struct regex_data {

As far as I can tell, and downloading and applying patches is a PITA for me at the moment,
This struct is not dereferenced outside of regex.c, so I would imagine that we would only
Want a forward declaration in regex.h and the definition in regex.c. I don't see a point in 
Exporting the structure internals to the rest of the code base if they are not used, just
Make it opaque,

> +	pcre2_code * regex; /* compiled regular expression */
> +	pcre2_match_data * match_data; /* match data block required for the
> compiled
> +	 pattern in regex2 */
> +};
> +
> +struct regex_error_data {
> +	int error_code;
> +	PCRE2_SIZE error_offset;
> +};
> +
> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> +#else
> +/* vvvvvv USE_PCRE vvvvvv */
> +
> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */ #if
> +(PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20)) #define
> +pcre_free_study  pcre_free #endif
> +
> +struct regex_data {
> +	int owned; /*
> +			  * non zero if regex and pcre_extra is owned by this
> +			  * structure and thus must be freed on destruction.
> +			  */
> +	pcre *regex; /* compiled regular expression */
> +	union {
> +		pcre_extra *sd; /* pointer to extra compiled stuff */
> +		pcre_extra lsd; /* used to hold the mmap'd version */
> +	};
> +};
> +
> +struct regex_error_data {
> +	char const * error_buffer;
> +	int error_offset;
> +};
> +
> +#endif /* USE_PCRE2 */
> +
> +struct mmap_area;
> +
> +/**
> + * regex_verison returns the version string of the underlying regular
> + * regular expressions library. In the case of PCRE it just returns the
> + * result of pcre_version(). In the case of PCRE2, the very first time
> +this
> + * function is called it allocates a buffer large enough to hold the
> +version
> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> + * The allocated buffer will linger in memory until the calling process
> +is being
> + * reaped.
> + *
> + * It may return NULL on error.
> + */
> +char const * regex_version(void);
> +/**
> + * This constructor function allocates a buffer for a regex_data structure.
> + * The buffer is being initialized with zeroes.
> + */
> +struct regex_data * regex_data_create(void);
> +/**
> + * This complementary destructor function frees the a given regex_data buffer.
> + * It also frees any non NULL member pointers with the appropriate
> +pcreX_X_free
> + * function. For PCRE this function respects the extra_owned field and
> +frees
> + * the pcre_extra data conditionally. Calling this function on a NULL
> +pointer is
> + * save.
> + */
> +void regex_data_free(struct regex_data * regex);
> +/**
> + * This function compiles the regular expression. Additionally, it
> +prepares
> + * data structures required by the different underlying engines. For
> +PCRE
> + * it calls pcre_study to generate optional data required for optimized
> + * execution of the compiled pattern. In the case of PCRE2, it
> +allocates
> + * a pcre2_match_data structure of appropriate size to hold all
> +possible
> + * matches created by the pattern.
> + *
> + * @arg regex If successful, the structure returned through *regex was
> allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + * @arg pattern_string The pattern string that is to be compiled.
> + * @arg errordata A pointer to a regex_error_data structure must be passed
> + *                to this function. This structure depends on the underlying
> + *                implementation. It can be passed to regex_format_error
> + *                to generate a human readable error message.
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_prepare_data(struct regex_data ** regex, char const *
> pattern_string,
> +			struct regex_error_data * errordata);
> +/**
> + * This function loads a serialized precompiled pattern from a
> +contiguous
> + * data region given by map_area.
> + *
> + * @arg map_area Description of the memory region holding a serialized
> + *               representation of the precompiled pattern.
> + * @arg regex If successful, the structure returned through *regex was
> allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + *
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data **
> +regex);
> +/**
> + * This function stores a precompiled regular expression to a file.
> + * In the case of PCRE, it just dumps the binary representation of the
> + * precomplied pattern into a file. In the case of PCRE2, it uses the
> + * serialization function provided by the library.
> + *
> + * @arg regex The precomplied regular expression data.
> + * @arg fp A file stream specifying the output file.
> + */
> +int regex_writef(struct regex_data * regex, FILE * fp);
> +/**
> + * This function applies a precompiled pattern to a subject string and
> + * returns whether or not a match was found.
> + *
> + * @arg regex The precompiled pattern.
> + * @arg subject The subject string.
> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
> + *              option to pcre_exec of pcre2_match.
> + * @retval REGEX_MATCH if a match was found
> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> + * @retval REGEX_NO_MATCH if no match was found
> + * @retval REGEX_ERROR if an error was encountered during the execution of
> the
> + *                     regular expression
> + */
> +int regex_match(struct regex_data * regex, char const * subject, int
> +partial);
> +/**
> + * This function compares two compiled regular expressions (regex1 and
> regex2).
> + * It compares the binary representations of the compiled patterns. It
> +is a very
> + * crude approximation because the binary representation holds data
> +like
> + * reference counters, that has nothing to do with the actual state machine.
> + *
> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
> + *                       the same
> + * @retval SELABEL_INCOMPARABLE otherwise  */ int regex_cmp(struct
> +regex_data * regex1, struct regex_data * regex2);
> +/**
> + * This function takes the error data returned by regex_prepare_data
> +and turns
> + * it in to a human readable error message.
> + * If the buffer given to hold the error message is to small it
> +truncates the
> + * message and indicates the truncation with an ellipsis ("...") at the
> +end of
> + * the buffer.
> + *
> + * @arg error_data Error data as returned by regex_prepare_data.
> + * @arg buffer String buffer to hold the formated error string.
> + * @arg buf_size Total size of the given bufer in bytes.
> + */
> +void regex_format_error(struct regex_error_data const * error_data,
> +			char * buffer, size_t buf_size);
> +#endif  /* SRC_REGEX_H_ */
> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile index
> 8497cb4..1e7a048 100644
> --- a/libselinux/utils/Makefile
> +++ b/libselinux/utils/Makefile
> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-
> security -Winit-self -Wmissi
>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-
> attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls -override CFLAGS
> += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> +$(EMFLAGS) $(PCRE_CFLAGS)
>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
> 
>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
> 
> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a
> +-lsepol
> 
>  selinux_restorecon: LDLIBS += -lsepol
> 
> diff --git a/libselinux/utils/sefcontext_compile.c
> b/libselinux/utils/sefcontext_compile.c
> index fd6fb78..b6b8d92 100644
> --- a/libselinux/utils/sefcontext_compile.c
> +++ b/libselinux/utils/sefcontext_compile.c
> @@ -1,6 +1,5 @@
>  #include <ctype.h>
>  #include <errno.h>
> -#include <pcre.h>
>  #include <stdint.h>
>  #include <stdio.h>
>  #include <string.h>
> @@ -13,6 +12,7 @@
>  #include <sepol/sepol.h>
> 
>  #include "../src/label_file.h"
> +#include "../src/regex.h"
> 
>  const char *policy_file;
>  static int ctx_err;
> @@ -101,6 +101,7 @@ static int write_binary_file(struct saved_data *data, int fd)
>  	uint32_t section_len;
>  	uint32_t i;
>  	int rc;
> +	const char *reg_version;
> 
>  	bin_file = fdopen(fd, "w");
>  	if (!bin_file) {
> @@ -119,12 +120,15 @@ static int write_binary_file(struct saved_data *data, int
> fd)
>  	if (len != 1)
>  		goto err;
> 
> -	/* write the pcre version */
> -	section_len = strlen(pcre_version());
> +	/* write version of the regex back-end */
> +	reg_version = regex_version();
> +	if (!reg_version)
> +		goto err;
> +	section_len = strlen(reg_version);
>  	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
>  	if (len != 1)
>  		goto err;
> -	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> +	len = fwrite(reg_version, sizeof(char), section_len, bin_file);
>  	if (len != section_len)
>  		goto err;
> 
> @@ -162,10 +166,8 @@ static int write_binary_file(struct saved_data *data, int
> fd)
>  		mode_t mode = specs[i].mode;
>  		size_t prefix_len = specs[i].prefix_len;
>  		int32_t stem_id = specs[i].stem_id;
> -		pcre *re = specs[i].regex;
> -		pcre_extra *sd = get_pcre_extra(&specs[i]);
> +		struct regex_data *re = specs[i].regex;
>  		uint32_t to_write;
> -		size_t size;
> 
>  		/* length of the context string (including nul) */
>  		to_write = strlen(context) + 1;
> @@ -212,42 +214,10 @@ static int write_binary_file(struct saved_data *data, int
> fd)
>  		if (len != 1)
>  			goto err;
> 
> -		/* determine the size of the pcre data in bytes */
> -		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> +		/* Write regex related data */
> +		rc = regex_writef(re, bin_file);
>  		if (rc < 0)
>  			goto err;
> -
> -		/* write the number of bytes in the pcre data */
> -		to_write = size;
> -		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -		if (len != 1)
> -			goto err;
> -
> -		/* write the actual pcre data as a char array */
> -		len = fwrite(re, 1, to_write, bin_file);
> -		if (len != to_write)
> -			goto err;
> -
> -		if (sd) {
> -			/* determine the size of the pcre study info */
> -			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
> -			if (rc < 0)
> -				goto err;
> -		} else
> -			size = 0;
> -
> -		/* write the number of bytes in the pcre study data */
> -		to_write = size;
> -		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -		if (len != 1)
> -			goto err;
> -
> -		if (sd) {
> -			/* write the actual pcre study data as a char array */
> -			len = fwrite(sd->study_data, 1, to_write, bin_file);
> -			if (len != to_write)
> -				goto err;
> -		}
>  	}
> 
>  	rc = 0;
> @@ -270,8 +240,7 @@ static void free_specs(struct saved_data *data)
>  		free(specs[i].lr.ctx_trans);
>  		free(specs[i].regex_str);
>  		free(specs[i].type_str);
> -		pcre_free(specs[i].regex);
> -		pcre_free_study(specs[i].sd);
> +		regex_data_free(specs[i].regex);
>  	}
>  	free(specs);
> 
> --
> 2.8.0.rc3.226.g39d4020

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH] libselinux: add support for pcre2
  2016-09-08 15:52 Janis Danisevskis
@ 2016-09-08 17:51 ` Stephen Smalley
  2016-09-08 18:56 ` Roberts, William C
  1 sibling, 0 replies; 24+ messages in thread
From: Stephen Smalley @ 2016-09-08 17:51 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2

On 09/08/2016 11:52 AM, Janis Danisevskis wrote:
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.
> 
> Also, for pcre2 the persistent format is architecture dependent.
> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used and
> sefcontext_compile shall generate portable output, it and libselinux
> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> cost of having to recompile the regular expressions at load time.
> 
> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> 
> This patch includes includes:
> 
> libselinux: fix memory leak on pcre2
> 
> Introduced a malloc on pcre_version(). Libselinux
> expected this to be static, just use a static
> internal buffer.
> 
> Signed-off-by: William Roberts <william.c.roberts@intel.com>
> ---
>  libselinux/Makefile                   |  13 +
>  libselinux/src/Makefile               |   4 +-
>  libselinux/src/label_file.c           |  93 ++-----
>  libselinux/src/label_file.h           |  59 ++---
>  libselinux/src/regex.c                | 461 ++++++++++++++++++++++++++++++++++
>  libselinux/src/regex.h                | 169 +++++++++++++
>  libselinux/utils/Makefile             |   4 +-
>  libselinux/utils/sefcontext_compile.c |  55 +---
>  8 files changed, 697 insertions(+), 161 deletions(-)
>  create mode 100644 libselinux/src/regex.c
>  create mode 100644 libselinux/src/regex.h
> 
> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> index 37d01af..66687e6 100644
> --- a/libselinux/src/Makefile
> +++ b/libselinux/src/Makefile
> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>  
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>  
>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
>  		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>  	$(RANLIB) $@
>  
>  $(LIBSO): $(LOBJS)
> -	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> +	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>  	ln -sf $@ $(TARGET) 
>  
>  $(LIBPC): $(LIBPC).in ../VERSION

We want make to still work in this subdirectory, so we need defaults
assigned to PCRE_CFLAGS and PCRE_LDFLAGS if not set by the caller.

> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index 6d1e890..24cb9e0 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec,
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr;
>  	unsigned int nspec = data->nspec;
> -	const char *errbuf = NULL;
> +	char const *errbuf;

Unnecessary change?

> +	struct regex_error_data error_data;
>  
>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>  	if (items < 0) {
> @@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec,
>  	data->nspec++;
>  
>  	if (rec->validating &&
> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {

Same bug as before - errbuf was being used in the following log call, so
you need to change it to use regex_format_error().

>  		COMPAT_LOG(SELINUX_ERROR,
>  			   "%s:  line %u has invalid regex %s:  %s\n",
>  			   path, lineno, regex,
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> new file mode 100644
> index 0000000..558a72a
> --- /dev/null
> +++ b/libselinux/src/regex.c
> @@ -0,0 +1,461 @@
> +#include <assert.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "regex.h"
> +#include "label_file.h"
> +
> +#ifdef USE_PCRE2
> +int regex_prepare_data(struct regex_data ** regex,
> +			     char const * pattern_string,
> +			     struct regex_error_data * errordata) {

All of the non-static functions in this file need to be marked with
"hidden" so that they are not part of the shared library ABI.

Preferred coding style is Linux kernel, so opening bracket goes on a
line by itself for functions.

> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> +	int rc;
> +	size_t entry_len;

As before, this needs to be uint32_t or it will be the wrong size on
64-bit systems and the subsequent next_entry() call won't fully
initialize it.

> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0)
> +		return -1;
> +
> +	if (entry_len) {
> +		/*
> +		 * this should yield exactly one because we store one pattern at a time
> +		 */
> +		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> +		if (rc != 1)
> +			return -1;
> +
> +		*regex = regex_data_create();
> +		if (!*regex)
> +			return -1;
> +
> +		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> +				(PCRE2_SPTR)mmap_area->next_addr, NULL);
> +		if (rc != 1)
> +			goto err;
> +
> +		(*regex)->match_data =
> +			pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> +		if (!(*regex)->match_data)
> +			goto err;
> +	}
> +
> +	/* and skip the decoded bit */
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	return 0;
> +err:
> +	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +	int rc = 0;
> +	size_t len;
> +	PCRE2_SIZE to_write;
> +	PCRE2_UCHAR * bytes = NULL;
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +	int do_write_patterns = 1;
> +#else
> +	int do_write_patterns = 0;
> +#endif
> +
> +	if (do_write_patterns) {
> +		/* encode the patter for serialization */
> +		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> +					    &bytes, &to_write, NULL);
> +		if (rc != 1) {
> +			rc = -1;
> +			goto err;
> +		}
> +	} else {
> +		to_write = 0;
> +	}
> +
> +	/* write serialized pattern's size */
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);

We need a uint32_t variable here, not PCRE2_SIZE which may be larger.

> +	if (len != 1) {
> +		rc = -1;
> +		goto err;
> +	}
> +
> +	if (do_write_patterns) {
> +		/* write serialized pattern */
> +		len = fwrite(bytes, 1, to_write, fp);
> +		if (len != to_write) {
> +			rc = -1;
> +		}

In general you don't need { } for a single statement except where it
might otherwise be hard to read (e.g. helpful for the else statement
above, but not really here or in several other cases below).

> +	}
> +
> +err:
> +	if (bytes)
> +		pcre2_serialize_free(bytes);
> +
> +	return rc;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +		if (regex->regex) {
> +			pcre2_code_free(regex->regex);
> +		}
> +		if (regex->match_data) {
> +			pcre2_match_data_free(regex->match_data);
> +		}
> +		free(regex);
> +	}
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +	int rc;
> +	rc = pcre2_match(regex->regex,
> +			(PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> +			partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
> +			NULL);
> +	if (rc > 0)
> +	return REGEX_MATCH;

Indentation problem.

> +	switch (rc) {
> +		case PCRE2_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE2_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +}
> +
> +/*
> + * TODO Replace this compare function with something that actually compares the
> + * regular expressions.
> + * This compare function basically just compares the binary representations of
> + * the automatons, and because this representation contains pointers and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +	int rc;
> +	size_t len1, len2;
> +	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +
> +	return SELABEL_EQUAL;
> +}
> +
> +#else // !USE_PCRE2
> +
> +int regex_prepare_data(struct regex_data ** regex,
> +			    char const * pattern_string,
> +			    struct regex_error_data * errordata) {
> +	memset(errordata, 0, sizeof(struct regex_error_data));
> +
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> +					&errordata->error_buffer,
> +					&errordata->error_offset, NULL);
> +	if (!(*regex)->regex) {
> +		goto err;
> +	}
> +	(*regex)->owned = 1;
> +
> +	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
> +	if (!(*regex)->sd && errordata->error_buffer) {
> +		goto err;
> +	}
> +
> +	return 0;
> +
> +err:	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +char const * regex_version(void) {
> +	return pcre_version();
> +}
> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> +	int rc;
> +	size_t entry_len, info_len;

Same as before: entry_len needs to be uint32_t.

> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0 || !entry_len) {
> +		return -1;
> +	}
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->owned = 0;
> +	(*regex)->regex = (pcre *) mmap_area->next_addr;
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	/*
> +	 * Check that regex lengths match. pcre_fullinfo()
> +	 * also validates its magic number.
> +	 */
> +	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
> +	if (rc < 0 || info_len != entry_len) {
> +		goto err;
> +	}
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0 || !entry_len) {
> +		goto err;
> +	}
> +
> +	if (entry_len) {
> +		(*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> +		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> +		rc = next_entry(NULL, mmap_area, entry_len);
> +		if (rc < 0)
> +			goto err;
> +
> +		/* Check that study data lengths match. */
> +		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> +				   PCRE_INFO_STUDYSIZE, &info_len);
> +		if (rc < 0 || info_len != entry_len)
> +			goto err;
> +	}
> +	return 0;
> +
> +err:
> +	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +	int rc;
> +	size_t len;
> +	uint32_t to_write;
> +	size_t size;
> +	pcre_extra * sd = regex->owned ? regex->sd :
> +			(regex->lsd.study_data ? &regex->lsd : NULL);

Recommend defining and using an inline function for the above and the
similar, inconsistent case in regex_match() below.  Prior to this patch,
the equivalent was get_pcre_extra().

> +
> +	/* determine the size of the pcre data in bytes */
> +	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> +	if (rc < 0)
> +		return -1;
> +
> +	/* write the number of bytes in the pcre data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	/* write the actual pcre data as a char array */
> +	len = fwrite(regex->regex, 1, to_write, fp);
> +	if (len != to_write)
> +		return -1;
> +
> +	if (sd) {
> +		/* determine the size of the pcre study info */
> +		rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> +				&size);
> +		if (rc < 0)
> +			return -1;
> +	} else
> +		size = 0;
> +
> +	/* write the number of bytes in the pcre study data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	if (sd) {
> +		/* write the actual pcre study data as a char array */
> +		len = fwrite(sd->study_data, 1, to_write, fp);
> +		if (len != to_write)
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +		if (regex->owned) {
> +			if (regex->regex)
> +				pcre_free(regex->regex);
> +			if (regex->sd)
> +				pcre_free_study(regex->sd);
> +		}
> +		free(regex);
> +	}
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +	int rc;
> +
> +	rc = pcre_exec(regex->regex, regex->owned ? regex->sd : &regex->lsd,

get_pcre_extra()

> +			subject, strlen(subject), 0,
> +			partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
> +	switch (rc) {
> +		case 0:
> +			return REGEX_MATCH;
> +		case PCRE_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +}
> +
> +/*
> + * TODO Replace this compare function with something that actually compares the
> + * regular expressions.
> + * This compare function basically just compares the binary representations of
> + * the automatons, and because this representation contains pointers and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +	int rc;
> +	size_t len1, len2;
> +	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +
> +	return SELABEL_EQUAL;
> +}
> +
> +#endif
> +
> +
> +struct regex_data * regex_data_create(void) {
> +	struct regex_data * dummy = (struct regex_data*) malloc(
> +			sizeof(struct regex_data));
> +	if (dummy) {
> +		memset(dummy, 0, sizeof(struct regex_data));
> +	}

Just use calloc()?

> +	return dummy;
> +}
> +

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH] libselinux: add support for pcre2
@ 2016-09-08 15:52 Janis Danisevskis
  2016-09-08 17:51 ` Stephen Smalley
  2016-09-08 18:56 ` Roberts, William C
  0 siblings, 2 replies; 24+ messages in thread
From: Janis Danisevskis @ 2016-09-08 15:52 UTC (permalink / raw)
  To: selinux, seandroid-list, sds, jwcart2

From: Janis Danisevskis <jdanis@google.com>

This patch moves all pcre1/2 dependencies into the new files regex.h
and regex.c implementing the common denominator of features needed
by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
used implementations.

As of this patch libselinux supports either pcre or pcre2 but not
both at the same time. The persistently stored file contexts
information differs. This means libselinux can only load file
context files generated by sefcontext_compile build with the
same pcre variant.

Also, for pcre2 the persistent format is architecture dependent.
Stored precompiled regular expressions can only be used on the
same architecture they were generated on. If pcre2 is used and
sefcontext_compile shall generate portable output, it and libselinux
must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
cost of having to recompile the regular expressions at load time.

Signed-off-by: Janis Danisevskis <jdanis@google.com>

This patch includes includes:

libselinux: fix memory leak on pcre2

Introduced a malloc on pcre_version(). Libselinux
expected this to be static, just use a static
internal buffer.

Signed-off-by: William Roberts <william.c.roberts@intel.com>
---
 libselinux/Makefile                   |  13 +
 libselinux/src/Makefile               |   4 +-
 libselinux/src/label_file.c           |  93 ++-----
 libselinux/src/label_file.h           |  59 ++---
 libselinux/src/regex.c                | 461 ++++++++++++++++++++++++++++++++++
 libselinux/src/regex.h                | 169 +++++++++++++
 libselinux/utils/Makefile             |   4 +-
 libselinux/utils/sefcontext_compile.c |  55 +---
 8 files changed, 697 insertions(+), 161 deletions(-)
 create mode 100644 libselinux/src/regex.c
 create mode 100644 libselinux/src/regex.h

diff --git a/libselinux/Makefile b/libselinux/Makefile
index 6142b60..15d051e 100644
--- a/libselinux/Makefile
+++ b/libselinux/Makefile
@@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
 endif
 export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
 
+USE_PCRE2 ?= n
+DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
+	ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
+		PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
+	endif
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+export PCRE_CFLAGS PCRE_LDFLAGS
+
 all install relabel clean distclean indent:
 	@for subdir in $(SUBDIRS); do \
 		(cd $$subdir && $(MAKE) $@) || exit 1; \
diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
index 37d01af..66687e6 100644
--- a/libselinux/src/Makefile
+++ b/libselinux/src/Makefile
@@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
 
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 
 SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
 		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
@@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
 	$(RANLIB) $@
 
 $(LIBSO): $(LOBJS)
-	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
+	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
 	ln -sf $@ $(TARGET) 
 
 $(LIBPC): $(LIBPC).in ../VERSION
diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index c89bb35..e41c351 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <limits.h>
 #include <stdint.h>
-#include <pcre.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -112,6 +111,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 	struct mmap_area *mmap_area;
 	uint32_t i, magic, version;
 	uint32_t entry_len, stem_map_len, regex_array_len;
+	const char *reg_version;
 
 	if (isbinary) {
 		len = strlen(path);
@@ -175,8 +175,13 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 	if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS)
 		return -1;
 
+	reg_version = regex_version();
+	if (!reg_version)
+		return -1;
+
 	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
-		len = strlen(pcre_version());
+
+		len = strlen(reg_version);
 
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
 		if (rc < 0)
@@ -198,7 +203,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 		}
 
 		str_buf[entry_len] = '\0';
-		if ((strcmp(str_buf, pcre_version()) != 0)) {
+		if ((strcmp(str_buf, reg_version) != 0)) {
 			free(str_buf);
 			return -1;
 		}
@@ -278,7 +283,6 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 
 		spec = &data->spec_arr[data->nspec];
 		spec->from_mmap = 1;
-		spec->regcomp = 1;
 
 		/* Process context */
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
@@ -364,47 +368,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 			spec->prefix_len = prefix_len;
 		}
 
-		/* Process regex and study_data entries */
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto err;
-		}
-		spec->regex = (pcre *)mmap_area->next_addr;
-		rc = next_entry(NULL, mmap_area, entry_len);
+		rc = regex_load_mmap(mmap_area, &spec->regex);
 		if (rc < 0)
 			goto err;
 
-		/* Check that regex lengths match. pcre_fullinfo()
-		 * also validates its magic number. */
-		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
-		if (rc < 0 || len != entry_len) {
-			rc = -1;
-			goto err;
-		}
-
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto err;
-		}
-
-		if (entry_len) {
-			spec->lsd.study_data = (void *)mmap_area->next_addr;
-			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
-			rc = next_entry(NULL, mmap_area, entry_len);
-			if (rc < 0)
-				goto err;
-
-			/* Check that study data lengths match. */
-			rc = pcre_fullinfo(spec->regex, &spec->lsd,
-					   PCRE_INFO_STUDYSIZE, &len);
-			if (rc < 0 || len != entry_len) {
-				rc = -1;
-				goto err;
-			}
-		}
-
 		data->nspec++;
 	}
 
@@ -605,14 +572,11 @@ static void closef(struct selabel_handle *rec)
 		spec = &data->spec_arr[i];
 		free(spec->lr.ctx_trans);
 		free(spec->lr.ctx_raw);
+		regex_data_free(spec->regex);
 		if (spec->from_mmap)
 			continue;
 		free(spec->regex_str);
 		free(spec->type_str);
-		if (spec->regcomp) {
-			pcre_free(spec->regex);
-			pcre_free_study(spec->sd);
-		}
 	}
 
 	for (i = 0; i < (unsigned int)data->num_stems; i++) {
@@ -644,13 +608,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 {
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr = data->spec_arr;
-	int i, rc, file_stem, pcre_options = 0;
+	int i, rc, file_stem;
 	mode_t mode = (mode_t)type;
 	const char *buf;
 	struct spec *ret = NULL;
 	char *clean_key = NULL;
 	const char *prev_slash, *next_slash;
 	unsigned int sofar = 0;
+	struct regex_error_data regex_error_data;
 
 	if (!data->nspec) {
 		errno = ENOENT;
@@ -677,9 +642,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 	file_stem = find_stem_from_file(data, &buf);
 	mode &= S_IFMT;
 
-	if (partial)
-		pcre_options |= PCRE_PARTIAL_SOFT;
-
 	/*
 	 * Check for matching specifications in reverse order, so that
 	 * the last matching specification is used.
@@ -692,25 +654,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 		 * a regex check        */
 		if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
 		    (!mode || !spec->mode || mode == spec->mode)) {
-			if (compile_regex(data, spec, NULL) < 0)
+			if (compile_regex(data, spec, &regex_error_data) < 0)
 				goto finish;
 			if (spec->stem_id == -1)
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    key, strlen(key), 0,
-						    pcre_options, NULL, 0);
+				rc = regex_match(spec->regex, key, partial);
 			else
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    buf, strlen(buf), 0,
-						    pcre_options, NULL, 0);
-			if (rc == 0) {
+				rc = regex_match(spec->regex, buf, partial);
+			if (rc == REGEX_MATCH) {
 				spec->matches++;
 				break;
-			} else if (partial && rc == PCRE_ERROR_PARTIAL)
+			} else if (partial && rc == REGEX_MATCH_PARTIAL)
 				break;
 
-			if (rc == PCRE_ERROR_NOMATCH)
+			if (rc == REGEX_NO_MATCH)
 				continue;
 
 			errno = ENOENT;
@@ -849,17 +805,10 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
 			continue;
 		}
 
-		if (spec1->regcomp && spec2->regcomp) {
-			size_t len1, len2;
-			int rc;
-
-			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
-			assert(rc == 0);
-			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
-			assert(rc == 0);
-			if (len1 != len2 ||
-			    memcmp(spec1->regex, spec2->regex, len1))
+		if (spec1->regex && spec2->regex) {
+			if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
 				return incomp(spec1, spec2, "regex", i, j);
+			}
 		} else {
 			if (strcmp(spec1->regex_str, spec2->regex_str))
 				return incomp(spec1, spec2, "regex_str", i, j);
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index 6d1e890..24cb9e0 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -6,6 +6,14 @@
 
 #include <sys/stat.h>
 
+/*
+ * regex.h/c were introduced to hold all dependencies on the regular
+ * expression back-end when we started supporting PCRE2. regex.h defines a
+ * minimal interface required by libselinux, so that the remaining code
+ * can be agnostic about the underlying implementation.
+ */
+#include "regex.h"
+
 #include "callbacks.h"
 #include "label_internal.h"
 
@@ -19,26 +27,16 @@
 
 #define SELINUX_COMPILED_FCONTEXT_MAX_VERS	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
 
-/* Prior to version 8.20, libpcre did not have pcre_free_study() */
-#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
-#define pcre_free_study  pcre_free
-#endif
-
 /* A file security context specification. */
 struct spec {
 	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
 	char *regex_str;	/* regular expession string for diagnostics */
 	char *type_str;		/* type string for diagnostic messages */
-	pcre *regex;		/* compiled regular expression */
-	union {
-		pcre_extra *sd;	/* pointer to extra compiled stuff */
-		pcre_extra lsd;	/* used to hold the mmap'd version */
-	};
+	struct regex_data * regex; /* backend dependent regular expression data */
 	mode_t mode;		/* mode format value */
 	int matches;		/* number of matching pathnames */
 	int stem_id;		/* indicates which stem-compression item */
 	char hasMetaChars;	/* regular expression has meta-chars */
-	char regcomp;		/* regex_str has been compiled to regex */
 	char from_mmap;		/* this spec is from an mmap of the data */
 	size_t prefix_len;      /* length of fixed path prefix */
 };
@@ -78,17 +76,6 @@ struct saved_data {
 	struct mmap_area *mmap_areas;
 };
 
-static inline pcre_extra *get_pcre_extra(struct spec *spec)
-{
-	if (spec->from_mmap) {
-		if (spec->lsd.study_data)
-			return &spec->lsd;
-		else
-			return NULL;
-	} else
-		return spec->sd;
-}
-
 static inline mode_t string_to_mode(char *mode)
 {
 	size_t len;
@@ -331,15 +318,14 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
 }
 
 static inline int compile_regex(struct saved_data *data, struct spec *spec,
-					    const char **errbuf)
+					    struct regex_error_data * error_data)
 {
-	const char *tmperrbuf;
 	char *reg_buf, *anchored_regex, *cp;
 	struct stem *stem_arr = data->stem_arr;
 	size_t len;
-	int erroff;
+	int rc;
 
-	if (spec->regcomp)
+	if (spec->regex)
 		return 0; /* already done */
 
 	/* Skip the fixed stem. */
@@ -361,25 +347,13 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
 	*cp = '\0';
 
 	/* Compile the regular expression. */
-	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
-						    &erroff, NULL);
+	rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
 	free(anchored_regex);
-	if (!spec->regex) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
-		return -1;
-	}
-
-	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
-	if (!spec->sd && tmperrbuf) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
+	if (rc < 0) {
 		return -1;
 	}
 
 	/* Done. */
-	spec->regcomp = 1;
-
 	return 0;
 }
 
@@ -394,7 +368,8 @@ static inline int process_line(struct selabel_handle *rec,
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr;
 	unsigned int nspec = data->nspec;
-	const char *errbuf = NULL;
+	char const *errbuf;
+	struct regex_error_data error_data;
 
 	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
 	if (items < 0) {
@@ -454,7 +429,7 @@ static inline int process_line(struct selabel_handle *rec,
 	data->nspec++;
 
 	if (rec->validating &&
-			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
+			    compile_regex(data, &spec_arr[nspec], &error_data)) {
 		COMPAT_LOG(SELINUX_ERROR,
 			   "%s:  line %u has invalid regex %s:  %s\n",
 			   path, lineno, regex,
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
new file mode 100644
index 0000000..558a72a
--- /dev/null
+++ b/libselinux/src/regex.c
@@ -0,0 +1,461 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "regex.h"
+#include "label_file.h"
+
+#ifdef USE_PCRE2
+int regex_prepare_data(struct regex_data ** regex,
+			     char const * pattern_string,
+			     struct regex_error_data * errordata) {
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
+			PCRE2_ZERO_TERMINATED,
+			PCRE2_DOTALL,
+			&errordata->error_code,
+			&errordata->error_offset, NULL);
+	if (!(*regex)->regex) {
+		goto err;
+	}
+
+	(*regex)->match_data =
+		pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+	if (!(*regex)->match_data) {
+		goto err;
+	}
+	return 0;
+
+err:	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const * regex_version(void) {
+	static char version_buf[256];
+	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
+	if (len <= 0 || len > sizeof(version_buf))
+		return NULL;
+
+	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
+	return version_buf;
+}
+
+int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
+	int rc;
+	size_t entry_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0)
+		return -1;
+
+	if (entry_len) {
+		/*
+		 * this should yield exactly one because we store one pattern at a time
+		 */
+		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
+		if (rc != 1)
+			return -1;
+
+		*regex = regex_data_create();
+		if (!*regex)
+			return -1;
+
+		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
+				(PCRE2_SPTR)mmap_area->next_addr, NULL);
+		if (rc != 1)
+			goto err;
+
+		(*regex)->match_data =
+			pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+		if (!(*regex)->match_data)
+			goto err;
+	}
+
+	/* and skip the decoded bit */
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	return 0;
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+int regex_writef(struct regex_data * regex, FILE * fp) {
+	int rc = 0;
+	size_t len;
+	PCRE2_SIZE to_write;
+	PCRE2_UCHAR * bytes = NULL;
+#ifndef NO_PERSISTENTLY_STORED_PATTERNS
+	int do_write_patterns = 1;
+#else
+	int do_write_patterns = 0;
+#endif
+
+	if (do_write_patterns) {
+		/* encode the patter for serialization */
+		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
+					    &bytes, &to_write, NULL);
+		if (rc != 1) {
+			rc = -1;
+			goto err;
+		}
+	} else {
+		to_write = 0;
+	}
+
+	/* write serialized pattern's size */
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1) {
+		rc = -1;
+		goto err;
+	}
+
+	if (do_write_patterns) {
+		/* write serialized pattern */
+		len = fwrite(bytes, 1, to_write, fp);
+		if (len != to_write) {
+			rc = -1;
+		}
+	}
+
+err:
+	if (bytes)
+		pcre2_serialize_free(bytes);
+
+	return rc;
+}
+
+void regex_data_free(struct regex_data * regex) {
+	if (regex) {
+		if (regex->regex) {
+			pcre2_code_free(regex->regex);
+		}
+		if (regex->match_data) {
+			pcre2_match_data_free(regex->match_data);
+		}
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data * regex, char const * subject, int partial) {
+	int rc;
+	rc = pcre2_match(regex->regex,
+			(PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
+			partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
+			NULL);
+	if (rc > 0)
+	return REGEX_MATCH;
+	switch (rc) {
+		case PCRE2_ERROR_PARTIAL:
+			return REGEX_MATCH_PARTIAL;
+		case PCRE2_ERROR_NOMATCH:
+			return REGEX_NO_MATCH;
+		default:
+			return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
+	int rc;
+	size_t len1, len2;
+	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#else // !USE_PCRE2
+
+int regex_prepare_data(struct regex_data ** regex,
+			    char const * pattern_string,
+			    struct regex_error_data * errordata) {
+	memset(errordata, 0, sizeof(struct regex_error_data));
+
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
+					&errordata->error_buffer,
+					&errordata->error_offset, NULL);
+	if (!(*regex)->regex) {
+		goto err;
+	}
+	(*regex)->owned = 1;
+
+	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
+	if (!(*regex)->sd && errordata->error_buffer) {
+		goto err;
+	}
+
+	return 0;
+
+err:	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const * regex_version(void) {
+	return pcre_version();
+}
+
+int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
+	int rc;
+	size_t entry_len, info_len;
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len) {
+		return -1;
+	}
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->owned = 0;
+	(*regex)->regex = (pcre *) mmap_area->next_addr;
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	/*
+	 * Check that regex lengths match. pcre_fullinfo()
+	 * also validates its magic number.
+	 */
+	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
+	if (rc < 0 || info_len != entry_len) {
+		goto err;
+	}
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len) {
+		goto err;
+	}
+
+	if (entry_len) {
+		(*regex)->lsd.study_data = (void *) mmap_area->next_addr;
+		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
+		rc = next_entry(NULL, mmap_area, entry_len);
+		if (rc < 0)
+			goto err;
+
+		/* Check that study data lengths match. */
+		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
+				   PCRE_INFO_STUDYSIZE, &info_len);
+		if (rc < 0 || info_len != entry_len)
+			goto err;
+	}
+	return 0;
+
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+int regex_writef(struct regex_data * regex, FILE * fp) {
+	int rc;
+	size_t len;
+	uint32_t to_write;
+	size_t size;
+	pcre_extra * sd = regex->owned ? regex->sd :
+			(regex->lsd.study_data ? &regex->lsd : NULL);
+
+	/* determine the size of the pcre data in bytes */
+	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
+	if (rc < 0)
+		return -1;
+
+	/* write the number of bytes in the pcre data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	/* write the actual pcre data as a char array */
+	len = fwrite(regex->regex, 1, to_write, fp);
+	if (len != to_write)
+		return -1;
+
+	if (sd) {
+		/* determine the size of the pcre study info */
+		rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
+				&size);
+		if (rc < 0)
+			return -1;
+	} else
+		size = 0;
+
+	/* write the number of bytes in the pcre study data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	if (sd) {
+		/* write the actual pcre study data as a char array */
+		len = fwrite(sd->study_data, 1, to_write, fp);
+		if (len != to_write)
+			return -1;
+	}
+
+	return 0;
+}
+
+void regex_data_free(struct regex_data * regex) {
+	if (regex) {
+		if (regex->owned) {
+			if (regex->regex)
+				pcre_free(regex->regex);
+			if (regex->sd)
+				pcre_free_study(regex->sd);
+		}
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data * regex, char const * subject, int partial) {
+	int rc;
+
+	rc = pcre_exec(regex->regex, regex->owned ? regex->sd : &regex->lsd,
+			subject, strlen(subject), 0,
+			partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
+	switch (rc) {
+		case 0:
+			return REGEX_MATCH;
+		case PCRE_ERROR_PARTIAL:
+			return REGEX_MATCH_PARTIAL;
+		case PCRE_ERROR_NOMATCH:
+			return REGEX_NO_MATCH;
+		default:
+			return REGEX_ERROR;
+	}
+}
+
+/*
+ * TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
+	int rc;
+	size_t len1, len2;
+	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+
+	return SELABEL_EQUAL;
+}
+
+#endif
+
+
+struct regex_data * regex_data_create(void) {
+	struct regex_data * dummy = (struct regex_data*) malloc(
+			sizeof(struct regex_data));
+	if (dummy) {
+		memset(dummy, 0, sizeof(struct regex_data));
+	}
+	return dummy;
+}
+
+void regex_format_error(struct regex_error_data const * error_data,
+			char * buffer, size_t buf_size) {
+	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
+	char * ptr = &buffer[buf_size - the_end_length];
+	int rc = 0;
+	size_t pos = 0;
+	if (!buffer || !buf_size)
+		return;
+	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
+	if (rc < 0)
+		/*
+		 * If snprintf fails it constitutes a logical error that needs
+		 * fixing.
+		 */
+		abort();
+
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+	if (error_data->error_offset > 0) {
+#ifdef USE_PCRE2
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
+				error_data->error_offset);
+#else
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
+				error_data->error_offset);
+#endif
+		if (rc < 0)
+			abort();
+
+	}
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+#ifdef USE_PCRE2
+	rc = pcre2_get_error_message(error_data->error_code,
+			(PCRE2_UCHAR*)(buffer + pos),
+			buf_size - pos);
+	if (rc == PCRE2_ERROR_NOMEMORY)
+		goto truncated;
+#else
+	rc = snprintf(buffer + pos, buf_size - pos, "%s",
+			error_data->error_buffer);
+	if (rc < 0)
+		abort();
+
+	if ((size_t)rc < strlen(error_data->error_buffer))
+		goto truncated;
+#endif
+
+	return;
+
+truncated:
+	/* replace end of string with "..." to indicate that it was truncated */
+	switch (the_end_length) {
+		/* no break statements, fall-through is intended */
+		case 4:
+			*ptr++ = '.';
+		case 3:
+			*ptr++ = '.';
+		case 2:
+			*ptr++ = '.';
+		case 1:
+			*ptr++ = '\0';
+		default:
+			break;
+	}
+	return;
+}
diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
new file mode 100644
index 0000000..6a113e8
--- /dev/null
+++ b/libselinux/src/regex.h
@@ -0,0 +1,169 @@
+#ifndef SRC_REGEX_H_
+#define SRC_REGEX_H_
+
+#include <stdio.h>
+
+#ifdef USE_PCRE2
+#include <pcre2.h>
+#else
+#include <pcre.h>
+#endif
+
+enum {
+	REGEX_MATCH,
+	REGEX_MATCH_PARTIAL,
+	REGEX_NO_MATCH,
+	REGEX_ERROR = -1,
+};
+
+#ifdef USE_PCRE2
+struct regex_data {
+	pcre2_code * regex; /* compiled regular expression */
+	pcre2_match_data * match_data; /* match data block required for the compiled
+	 pattern in regex2 */
+};
+
+struct regex_error_data {
+	int error_code;
+	PCRE2_SIZE error_offset;
+};
+
+/* ^^^^^^ USE_PCRE2  ^^^^^^ */
+#else
+/* vvvvvv USE_PCRE vvvvvv */
+
+/* Prior to version 8.20, libpcre did not have pcre_free_study() */
+#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
+#define pcre_free_study  pcre_free
+#endif
+
+struct regex_data {
+	int owned; /*
+			  * non zero if regex and pcre_extra is owned by this
+			  * structure and thus must be freed on destruction.
+			  */
+	pcre *regex; /* compiled regular expression */
+	union {
+		pcre_extra *sd; /* pointer to extra compiled stuff */
+		pcre_extra lsd; /* used to hold the mmap'd version */
+	};
+};
+
+struct regex_error_data {
+	char const * error_buffer;
+	int error_offset;
+};
+
+#endif /* USE_PCRE2 */
+
+struct mmap_area;
+
+/**
+ * regex_verison returns the version string of the underlying regular
+ * regular expressions library. In the case of PCRE it just returns the
+ * result of pcre_version(). In the case of PCRE2, the very first time this
+ * function is called it allocates a buffer large enough to hold the version
+ * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
+ * The allocated buffer will linger in memory until the calling process is being
+ * reaped.
+ *
+ * It may return NULL on error.
+ */
+char const * regex_version(void);
+/**
+ * This constructor function allocates a buffer for a regex_data structure.
+ * The buffer is being initialized with zeroes.
+ */
+struct regex_data * regex_data_create(void);
+/**
+ * This complementary destructor function frees the a given regex_data buffer.
+ * It also frees any non NULL member pointers with the appropriate pcreX_X_free
+ * function. For PCRE this function respects the extra_owned field and frees
+ * the pcre_extra data conditionally. Calling this function on a NULL pointer is
+ * save.
+ */
+void regex_data_free(struct regex_data * regex);
+/**
+ * This function compiles the regular expression. Additionally, it prepares
+ * data structures required by the different underlying engines. For PCRE
+ * it calls pcre_study to generate optional data required for optimized
+ * execution of the compiled pattern. In the case of PCRE2, it allocates
+ * a pcre2_match_data structure of appropriate size to hold all possible
+ * matches created by the pattern.
+ *
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ * @arg pattern_string The pattern string that is to be compiled.
+ * @arg errordata A pointer to a regex_error_data structure must be passed
+ *                to this function. This structure depends on the underlying
+ *                implementation. It can be passed to regex_format_error
+ *                to generate a human readable error message.
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
+			struct regex_error_data * errordata);
+/**
+ * This function loads a serialized precompiled pattern from a contiguous
+ * data region given by map_area.
+ *
+ * @arg map_area Description of the memory region holding a serialized
+ *               representation of the precompiled pattern.
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ *
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
+/**
+ * This function stores a precompiled regular expression to a file.
+ * In the case of PCRE, it just dumps the binary representation of the
+ * precomplied pattern into a file. In the case of PCRE2, it uses the
+ * serialization function provided by the library.
+ *
+ * @arg regex The precomplied regular expression data.
+ * @arg fp A file stream specifying the output file.
+ */
+int regex_writef(struct regex_data * regex, FILE * fp);
+/**
+ * This function applies a precompiled pattern to a subject string and
+ * returns whether or not a match was found.
+ *
+ * @arg regex The precompiled pattern.
+ * @arg subject The subject string.
+ * @arg partial Boolean indicating if partial matches are wanted. A nonzero
+ *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
+ *              option to pcre_exec of pcre2_match.
+ * @retval REGEX_MATCH if a match was found
+ * @retval REGEX_MATCH_PARTIAL if a partial match was found
+ * @retval REGEX_NO_MATCH if no match was found
+ * @retval REGEX_ERROR if an error was encountered during the execution of the
+ *                     regular expression
+ */
+int regex_match(struct regex_data * regex, char const * subject, int partial);
+/**
+ * This function compares two compiled regular expressions (regex1 and regex2).
+ * It compares the binary representations of the compiled patterns. It is a very
+ * crude approximation because the binary representation holds data like
+ * reference counters, that has nothing to do with the actual state machine.
+ *
+ * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
+ *                       the same
+ * @retval SELABEL_INCOMPARABLE otherwise
+ */
+int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
+/**
+ * This function takes the error data returned by regex_prepare_data and turns
+ * it in to a human readable error message.
+ * If the buffer given to hold the error message is to small it truncates the
+ * message and indicates the truncation with an ellipsis ("...") at the end of
+ * the buffer.
+ *
+ * @arg error_data Error data as returned by regex_prepare_data.
+ * @arg buffer String buffer to hold the formated error string.
+ * @arg buf_size Total size of the given bufer in bytes.
+ */
+void regex_format_error(struct regex_error_data const * error_data,
+			char * buffer, size_t buf_size);
+#endif  /* SRC_REGEX_H_ */
diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
index 8497cb4..1e7a048 100644
--- a/libselinux/utils/Makefile
+++ b/libselinux/utils/Makefile
@@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 LDLIBS += -L../src -lselinux -L$(LIBDIR)
 
 TARGETS=$(patsubst %.c,%,$(wildcard *.c))
 
-sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
+sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
 
 selinux_restorecon: LDLIBS += -lsepol
 
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index fd6fb78..b6b8d92 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,5 @@
 #include <ctype.h>
 #include <errno.h>
-#include <pcre.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -13,6 +12,7 @@
 #include <sepol/sepol.h>
 
 #include "../src/label_file.h"
+#include "../src/regex.h"
 
 const char *policy_file;
 static int ctx_err;
@@ -101,6 +101,7 @@ static int write_binary_file(struct saved_data *data, int fd)
 	uint32_t section_len;
 	uint32_t i;
 	int rc;
+	const char *reg_version;
 
 	bin_file = fdopen(fd, "w");
 	if (!bin_file) {
@@ -119,12 +120,15 @@ static int write_binary_file(struct saved_data *data, int fd)
 	if (len != 1)
 		goto err;
 
-	/* write the pcre version */
-	section_len = strlen(pcre_version());
+	/* write version of the regex back-end */
+	reg_version = regex_version();
+	if (!reg_version)
+		goto err;
+	section_len = strlen(reg_version);
 	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
 	if (len != 1)
 		goto err;
-	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
+	len = fwrite(reg_version, sizeof(char), section_len, bin_file);
 	if (len != section_len)
 		goto err;
 
@@ -162,10 +166,8 @@ static int write_binary_file(struct saved_data *data, int fd)
 		mode_t mode = specs[i].mode;
 		size_t prefix_len = specs[i].prefix_len;
 		int32_t stem_id = specs[i].stem_id;
-		pcre *re = specs[i].regex;
-		pcre_extra *sd = get_pcre_extra(&specs[i]);
+		struct regex_data *re = specs[i].regex;
 		uint32_t to_write;
-		size_t size;
 
 		/* length of the context string (including nul) */
 		to_write = strlen(context) + 1;
@@ -212,42 +214,10 @@ static int write_binary_file(struct saved_data *data, int fd)
 		if (len != 1)
 			goto err;
 
-		/* determine the size of the pcre data in bytes */
-		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
+		/* Write regex related data */
+		rc = regex_writef(re, bin_file);
 		if (rc < 0)
 			goto err;
-
-		/* write the number of bytes in the pcre data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		/* write the actual pcre data as a char array */
-		len = fwrite(re, 1, to_write, bin_file);
-		if (len != to_write)
-			goto err;
-
-		if (sd) {
-			/* determine the size of the pcre study info */
-			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
-			if (rc < 0)
-				goto err;
-		} else
-			size = 0;
-
-		/* write the number of bytes in the pcre study data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		if (sd) {
-			/* write the actual pcre study data as a char array */
-			len = fwrite(sd->study_data, 1, to_write, bin_file);
-			if (len != to_write)
-				goto err;
-		}
 	}
 
 	rc = 0;
@@ -270,8 +240,7 @@ static void free_specs(struct saved_data *data)
 		free(specs[i].lr.ctx_trans);
 		free(specs[i].regex_str);
 		free(specs[i].type_str);
-		pcre_free(specs[i].regex);
-		pcre_free_study(specs[i].sd);
+		regex_data_free(specs[i].regex);
 	}
 	free(specs);
 
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-09-16 19:46 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-15 16:14 [PATCH] libselinux: add support for pcre2 Janis Danisevskis
2016-09-15 17:53 ` Stephen Smalley
  -- strict thread matches above, loose matches on Subject: below --
2016-09-15 14:04 Janis Danisevskis
2016-09-15 14:57 ` Stephen Smalley
2016-09-15 19:31   ` William Roberts
2016-09-16 10:13     ` Janis Danisevskis
2016-09-16 10:23       ` William Roberts
2016-09-16 12:23         ` Janis Danisevskis
2016-09-16 12:35           ` William Roberts
2016-09-16 13:09             ` Janis Danisevskis
2016-09-16 13:15               ` William Roberts
2016-09-16 13:31                 ` Jason Zaman
2016-09-16 13:43                   ` William Roberts
2016-09-16 13:51                     ` William Roberts
2016-09-16 14:06                       ` Jason Zaman
2016-09-16 14:14                         ` William Roberts
2016-09-16 13:58                   ` Janis Danisevskis
2016-09-16 14:52                   ` Stephen Smalley
2016-09-16 18:57                     ` Janis Danisevskis
2016-09-16 19:06                       ` Stephen Smalley
2016-09-16 19:45                         ` Janis Danisevskis
2016-09-08 15:52 Janis Danisevskis
2016-09-08 17:51 ` Stephen Smalley
2016-09-08 18:56 ` Roberts, William C

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.