selinux.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] libselinux: add support for pcre2
@ 2016-09-07  8:08 Janis Danisevskis
  2016-09-07  8:08 ` [PATCH 2/2] libselinux: fix memory leak on pcre2 Janis Danisevskis
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Janis Danisevskis @ 2016-09-07  8:08 UTC (permalink / raw)
  To: selinux, seandroid-list, sds, jwcart2; +Cc: Janis Danisevskis

From: Janis Danisevskis <jdanis@google.com>

This patch moves all pcre1/2 dependencies into the new files regex.h
and regex.c implementing the common denominator of features needed
by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
used implementations.

As of this patch libselinux supports either pcre or pcre2 but not
both at the same time. The persistently stored file contexts
information differs. This means libselinux can only load file
context files generated by sefcontext_compile build with the
same pcre variant.

Also, for pcre2 the persistent format is architecture dependant.
Stored precompiled regular expressions can only be used on the
same architecture they were generated on. If pcre2 is used and
sefcontext_compile shall generate portable output, it and libselinux
must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
cost of having to recompile the regular expressions at load time.

Signed-off-by: Janis Danisevskis <jdanis@google.com>
---
 libselinux/Makefile                   |  13 ++
 libselinux/src/Makefile               |   4 +-
 libselinux/src/label_file.c           |  91 ++------
 libselinux/src/label_file.h           |  54 ++---
 libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
 libselinux/src/regex.h                | 168 ++++++++++++++
 libselinux/utils/Makefile             |   4 +-
 libselinux/utils/sefcontext_compile.c |  53 +----
 8 files changed, 637 insertions(+), 155 deletions(-)
 create mode 100644 libselinux/src/regex.c
 create mode 100644 libselinux/src/regex.h

diff --git a/libselinux/Makefile b/libselinux/Makefile
index 6142b60..15d051e 100644
--- a/libselinux/Makefile
+++ b/libselinux/Makefile
@@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
 endif
 export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
 
+USE_PCRE2 ?= n
+DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
+ifeq ($(USE_PCRE2),y)
+	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
+	ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
+		PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
+	endif
+	PCRE_LDFLAGS := -lpcre2-8
+else
+	PCRE_LDFLAGS := -lpcre
+endif
+export PCRE_CFLAGS PCRE_LDFLAGS
+
 all install relabel clean distclean indent:
 	@for subdir in $(SUBDIRS); do \
 		(cd $$subdir && $(MAKE) $@) || exit 1; \
diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
index 37d01af..66687e6 100644
--- a/libselinux/src/Makefile
+++ b/libselinux/src/Makefile
@@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
 
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 
 SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
 		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
@@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
 	$(RANLIB) $@
 
 $(LIBSO): $(LOBJS)
-	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
+	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
 	ln -sf $@ $(TARGET) 
 
 $(LIBPC): $(LIBPC).in ../VERSION
diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index c89bb35..6698624 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -15,7 +15,6 @@
 #include <errno.h>
 #include <limits.h>
 #include <stdint.h>
-#include <pcre.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <sys/types.h>
@@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 		return -1;
 
 	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
-		len = strlen(pcre_version());
+		if (!regex_version()) {
+			return -1;
+		}
+		len = strlen(regex_version());
 
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
 		if (rc < 0)
@@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 		}
 
 		str_buf[entry_len] = '\0';
-		if ((strcmp(str_buf, pcre_version()) != 0)) {
+		if ((strcmp(str_buf, regex_version()) != 0)) {
 			free(str_buf);
 			return -1;
 		}
@@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 
 		spec = &data->spec_arr[data->nspec];
 		spec->from_mmap = 1;
+#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
+		spec->regcomp = 0;
+#else
 		spec->regcomp = 1;
+#endif
 
 		/* Process context */
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
@@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 			spec->prefix_len = prefix_len;
 		}
 
-		/* Process regex and study_data entries */
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto err;
-		}
-		spec->regex = (pcre *)mmap_area->next_addr;
-		rc = next_entry(NULL, mmap_area, entry_len);
+		rc = regex_load_mmap(mmap_area, &spec->regex);
 		if (rc < 0)
 			goto err;
 
-		/* Check that regex lengths match. pcre_fullinfo()
-		 * also validates its magic number. */
-		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
-		if (rc < 0 || len != entry_len) {
-			rc = -1;
-			goto err;
-		}
-
-		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
-		if (rc < 0 || !entry_len) {
-			rc = -1;
-			goto err;
-		}
-
-		if (entry_len) {
-			spec->lsd.study_data = (void *)mmap_area->next_addr;
-			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
-			rc = next_entry(NULL, mmap_area, entry_len);
-			if (rc < 0)
-				goto err;
-
-			/* Check that study data lengths match. */
-			rc = pcre_fullinfo(spec->regex, &spec->lsd,
-					   PCRE_INFO_STUDYSIZE, &len);
-			if (rc < 0 || len != entry_len) {
-				rc = -1;
-				goto err;
-			}
-		}
-
 		data->nspec++;
 	}
 
@@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
 			continue;
 		free(spec->regex_str);
 		free(spec->type_str);
-		if (spec->regcomp) {
-			pcre_free(spec->regex);
-			pcre_free_study(spec->sd);
-		}
+		regex_data_free(spec->regex);
 	}
 
 	for (i = 0; i < (unsigned int)data->num_stems; i++) {
@@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 {
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr = data->spec_arr;
-	int i, rc, file_stem, pcre_options = 0;
+	int i, rc, file_stem;
 	mode_t mode = (mode_t)type;
 	const char *buf;
 	struct spec *ret = NULL;
 	char *clean_key = NULL;
 	const char *prev_slash, *next_slash;
 	unsigned int sofar = 0;
+	struct regex_error_data regex_error_data;
 
 	if (!data->nspec) {
 		errno = ENOENT;
@@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 	file_stem = find_stem_from_file(data, &buf);
 	mode &= S_IFMT;
 
-	if (partial)
-		pcre_options |= PCRE_PARTIAL_SOFT;
-
 	/*
 	 * Check for matching specifications in reverse order, so that
 	 * the last matching specification is used.
@@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
 		 * a regex check        */
 		if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
 		    (!mode || !spec->mode || mode == spec->mode)) {
-			if (compile_regex(data, spec, NULL) < 0)
+			if (compile_regex(data, spec, &regex_error_data) < 0)
 				goto finish;
 			if (spec->stem_id == -1)
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    key, strlen(key), 0,
-						    pcre_options, NULL, 0);
+				rc = regex_match(spec->regex, key, partial);
 			else
-				rc = pcre_exec(spec->regex,
-						    get_pcre_extra(spec),
-						    buf, strlen(buf), 0,
-						    pcre_options, NULL, 0);
-			if (rc == 0) {
+				rc = regex_match(spec->regex, buf, partial);
+			if (rc == REGEX_MATCH) {
 				spec->matches++;
 				break;
-			} else if (partial && rc == PCRE_ERROR_PARTIAL)
+			} else if (partial && rc == REGEX_MATCH_PARTIAL)
 				break;
 
-			if (rc == PCRE_ERROR_NOMATCH)
+			if (rc == REGEX_NO_MATCH)
 				continue;
 
 			errno = ENOENT;
@@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
 		}
 
 		if (spec1->regcomp && spec2->regcomp) {
-			size_t len1, len2;
-			int rc;
-
-			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
-			assert(rc == 0);
-			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
-			assert(rc == 0);
-			if (len1 != len2 ||
-			    memcmp(spec1->regex, spec2->regex, len1))
+			if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
 				return incomp(spec1, spec2, "regex", i, j);
+			}
 		} else {
 			if (strcmp(spec1->regex_str, spec2->regex_str))
 				return incomp(spec1, spec2, "regex_str", i, j);
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index 6d1e890..a2e30e5 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -6,6 +6,14 @@
 
 #include <sys/stat.h>
 
+/*
+ * regex.h/c were introduced to hold all dependencies on the regular
+ * expression back-end when we started supporting PCRE2. regex.h defines a
+ * minimal interface required by libselinux, so that the remaining code
+ * can be agnostic about the underlying implementation.
+ */
+#include "regex.h"
+
 #include "callbacks.h"
 #include "label_internal.h"
 
@@ -19,21 +27,12 @@
 
 #define SELINUX_COMPILED_FCONTEXT_MAX_VERS	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
 
-/* Prior to version 8.20, libpcre did not have pcre_free_study() */
-#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
-#define pcre_free_study  pcre_free
-#endif
-
 /* A file security context specification. */
 struct spec {
 	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
 	char *regex_str;	/* regular expession string for diagnostics */
 	char *type_str;		/* type string for diagnostic messages */
-	pcre *regex;		/* compiled regular expression */
-	union {
-		pcre_extra *sd;	/* pointer to extra compiled stuff */
-		pcre_extra lsd;	/* used to hold the mmap'd version */
-	};
+	struct regex_data * regex; /* backend dependent regular expression data */
 	mode_t mode;		/* mode format value */
 	int matches;		/* number of matching pathnames */
 	int stem_id;		/* indicates which stem-compression item */
@@ -78,17 +77,6 @@ struct saved_data {
 	struct mmap_area *mmap_areas;
 };
 
-static inline pcre_extra *get_pcre_extra(struct spec *spec)
-{
-	if (spec->from_mmap) {
-		if (spec->lsd.study_data)
-			return &spec->lsd;
-		else
-			return NULL;
-	} else
-		return spec->sd;
-}
-
 static inline mode_t string_to_mode(char *mode)
 {
 	size_t len;
@@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
 }
 
 static inline int compile_regex(struct saved_data *data, struct spec *spec,
-					    const char **errbuf)
+					    struct regex_error_data * error_data)
 {
-	const char *tmperrbuf;
 	char *reg_buf, *anchored_regex, *cp;
 	struct stem *stem_arr = data->stem_arr;
 	size_t len;
-	int erroff;
+	int rc;
 
 	if (spec->regcomp)
 		return 0; /* already done */
@@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
 	*cp = '\0';
 
 	/* Compile the regular expression. */
-	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
-						    &erroff, NULL);
+	rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
 	free(anchored_regex);
-	if (!spec->regex) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
-		return -1;
-	}
-
-	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
-	if (!spec->sd && tmperrbuf) {
-		if (errbuf)
-			*errbuf = tmperrbuf;
+	if (rc < 0) {
 		return -1;
 	}
 
@@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
 	struct saved_data *data = (struct saved_data *)rec->data;
 	struct spec *spec_arr;
 	unsigned int nspec = data->nspec;
-	const char *errbuf = NULL;
+	char const *errbuf;
+	struct regex_error_data error_data;
 
 	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
 	if (items < 0) {
@@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
 	data->nspec++;
 
 	if (rec->validating &&
-			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
+			    compile_regex(data, &spec_arr[nspec], &error_data)) {
 		COMPAT_LOG(SELINUX_ERROR,
 			   "%s:  line %u has invalid regex %s:  %s\n",
 			   path, lineno, regex,
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
new file mode 100644
index 0000000..6b92b04
--- /dev/null
+++ b/libselinux/src/regex.c
@@ -0,0 +1,405 @@
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "regex.h"
+#include "label_file.h"
+
+int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
+			struct regex_error_data * errordata) {
+	memset(errordata, 0, sizeof(struct regex_error_data));
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+#ifdef USE_PCRE2
+	(*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
+			PCRE2_ZERO_TERMINATED,
+			PCRE2_DOTALL,
+			&errordata->error_code,
+			&errordata->error_offset, NULL);
+#else
+	(*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
+					&errordata->error_buffer,
+					&errordata->error_offset, NULL);
+#endif
+	if (!(*regex)->regex) {
+		goto err;
+	}
+
+#ifdef USE_PCRE2
+	(*regex)->match_data =
+		pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+	if (!(*regex)->match_data) {
+		goto err;
+	}
+#else
+	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
+	if (!(*regex)->sd && errordata->error_buffer) {
+		goto err;
+	}
+	(*regex)->extra_owned = !!(*regex)->sd;
+#endif
+	return 0;
+
+err:	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+char const * regex_version(void) {
+#ifdef USE_PCRE2
+	static int initialized = 0;
+	static char * version_string = NULL;
+	size_t version_string_len;
+	if (!initialized) {
+		version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
+		version_string = (char*) malloc(version_string_len);
+		if (!version_string) {
+			return NULL;
+		}
+		pcre2_config(PCRE2_CONFIG_VERSION, version_string);
+		initialized = 1;
+	}
+	return version_string;
+#else
+	return pcre_version();
+#endif
+}
+
+int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
+	int rc;
+	size_t entry_len;
+#ifndef USE_PCRE2
+	size_t info_len;
+#endif
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+#ifdef USE_PCRE2
+	if (rc < 0)
+		return -1;
+
+#ifndef NO_PERSISTENTLY_STORED_PATTERNS
+	/* this should yield exactly one because we store one pattern at a time
+	 */
+	rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
+	if (rc != 1)
+		return -1;
+
+	*regex = regex_data_create();
+	if (!*regex)
+		return -1;
+
+	rc = pcre2_serialize_decode(&(*regex)->regex, 1,
+			(PCRE2_SPTR)mmap_area->next_addr, NULL);
+	if (rc != 1)
+		goto err;
+
+	(*regex)->match_data =
+		pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
+	if (!(*regex)->match_data)
+		goto err;
+
+#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
+	/* and skip the decoded bit */
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	return 0;
+#else
+	if (rc < 0 || !entry_len) {
+		return -1;
+	}
+	*regex = regex_data_create();
+	if (!(*regex))
+		return -1;
+
+	(*regex)->extra_owned = 0;
+	(*regex)->regex = (pcre *) mmap_area->next_addr;
+	rc = next_entry(NULL, mmap_area, entry_len);
+	if (rc < 0)
+		goto err;
+
+	/* Check that regex lengths match. pcre_fullinfo()
+	 * also validates its magic number. */
+	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
+	if (rc < 0 || info_len != entry_len) {
+		goto err;
+	}
+
+	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
+	if (rc < 0 || !entry_len) {
+		goto err;
+	}
+
+	if (entry_len) {
+		(*regex)->lsd.study_data = (void *) mmap_area->next_addr;
+		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
+		rc = next_entry(NULL, mmap_area, entry_len);
+		if (rc < 0)
+			goto err;
+
+		/* Check that study data lengths match. */
+		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
+				   PCRE_INFO_STUDYSIZE, &info_len);
+		if (rc < 0 || info_len != entry_len)
+			goto err;
+	}
+	return 0;
+#endif
+err:
+	regex_data_free(*regex);
+	*regex = NULL;
+	return -1;
+}
+
+int regex_writef(struct regex_data * regex, FILE * fp) {
+#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
+	int rc;
+#endif
+	size_t len;
+#ifdef USE_PCRE2
+	PCRE2_SIZE to_write;
+#ifndef NO_PERSISTENTLY_STORED_PATTERNS
+	PCRE2_UCHAR * bytes;
+
+	/* encode the patter for serialization */
+	rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
+				    &bytes, &to_write, NULL);
+	if (rc != 1)
+		return -1;
+
+#else
+	(void)regex; // silence unused parameter warning
+	to_write = 0;
+#endif
+	/* write serialized pattern's size */
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1) {
+#ifndef NO_PERSISTENTLY_STORED_PATTERNS
+		pcre2_serialize_free(bytes);
+#endif
+		return -1;
+	}
+
+#ifndef NO_PERSISTENTLY_STORED_PATTERNS
+	/* write serialized pattern */
+	len = fwrite(bytes, 1, to_write, fp);
+	if (len != to_write) {
+		pcre2_serialize_free(bytes);
+		return -1;
+	}
+	pcre2_serialize_free(bytes);
+#endif
+#else
+	uint32_t to_write;
+	size_t size;
+	pcre_extra * sd = regex->extra_owned ? regex->sd :
+			(regex->lsd.study_data ? &regex->lsd : NULL);
+
+	/* determine the size of the pcre data in bytes */
+	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
+	if (rc < 0)
+		return -1;
+
+	/* write the number of bytes in the pcre data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	/* write the actual pcre data as a char array */
+	len = fwrite(regex->regex, 1, to_write, fp);
+	if (len != to_write)
+		return -1;
+
+	if (sd) {
+		/* determine the size of the pcre study info */
+		rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
+				&size);
+		if (rc < 0)
+			return -1;
+	} else
+		size = 0;
+
+	/* write the number of bytes in the pcre study data */
+	to_write = size;
+	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
+	if (len != 1)
+		return -1;
+
+	if (sd) {
+		/* write the actual pcre study data as a char array */
+		len = fwrite(sd->study_data, 1, to_write, fp);
+		if (len != to_write)
+			return -1;
+	}
+#endif
+	return 0;
+}
+
+struct regex_data * regex_data_create(void) {
+	struct regex_data * dummy = (struct regex_data*) malloc(
+			sizeof(struct regex_data));
+	if (dummy) {
+		memset(dummy, 0, sizeof(struct regex_data));
+	}
+	return dummy;
+}
+
+void regex_data_free(struct regex_data * regex) {
+	if (regex) {
+#ifdef USE_PCRE2
+		if (regex->regex) {
+			pcre2_code_free(regex->regex);
+		}
+		if (regex->match_data) {
+			pcre2_match_data_free(regex->match_data);
+		}
+#else
+		if (regex->regex)
+			pcre_free(regex->regex);
+		if (regex->extra_owned && regex->sd) {
+			pcre_free_study(regex->sd);
+		}
+#endif
+		free(regex);
+	}
+}
+
+int regex_match(struct regex_data * regex, char const * subject, int partial) {
+	int rc;
+#ifdef USE_PCRE2
+	rc = pcre2_match(regex->regex,
+			(PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
+			partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
+			NULL);
+	if (rc > 0)
+	return REGEX_MATCH;
+	switch (rc) {
+		case PCRE2_ERROR_PARTIAL:
+			return REGEX_MATCH_PARTIAL;
+		case PCRE2_ERROR_NOMATCH:
+			return REGEX_NO_MATCH;
+		default:
+			return REGEX_ERROR;
+	}
+#else
+	rc = pcre_exec(regex->regex,
+			regex->extra_owned ? regex->sd : &regex->lsd, subject,
+			strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
+			NULL,
+			0);
+	switch (rc) {
+		case 0:
+			return REGEX_MATCH;
+		case PCRE_ERROR_PARTIAL:
+			return REGEX_MATCH_PARTIAL;
+		case PCRE_ERROR_NOMATCH:
+			return REGEX_NO_MATCH;
+		default:
+			return REGEX_ERROR;
+	}
+#endif
+}
+
+/* TODO Replace this compare function with something that actually compares the
+ * regular expressions.
+ * This compare function basically just compares the binary representations of
+ * the automatons, and because this representation contains pointers and
+ * metadata, it can only return a match if regex1 == regex2.
+ * Preferably, this function would be replaced with an algorithm that computes
+ * the equivalence of the automatons systematically.
+ */
+int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
+	int rc;
+	size_t len1, len2;
+#ifdef USE_PCRE2
+	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+#else
+	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
+	assert(rc == 0);
+	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
+	assert(rc == 0);
+	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
+		return SELABEL_INCOMPARABLE;
+#endif
+	return SELABEL_EQUAL;
+}
+
+void regex_format_error(struct regex_error_data const * error_data,
+			char * buffer, size_t buf_size) {
+	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
+	char * ptr = &buffer[buf_size - the_end_length];
+	int rc = 0;
+	size_t pos = 0;
+	if (!buffer || !buf_size)
+		return;
+	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
+	if (rc < 0)
+		/* If snprintf fails it constitutes a logical error that needs
+		 * fixing.
+		 */
+		abort();
+
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+	if (error_data->error_offset > 0) {
+#ifdef USE_PCRE2
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
+				error_data->error_offset);
+#else
+		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
+				error_data->error_offset);
+#endif
+		if (rc < 0)
+			abort();
+
+	}
+	pos += rc;
+	if (pos >= buf_size)
+		goto truncated;
+
+#ifdef USE_PCRE2
+	rc = pcre2_get_error_message(error_data->error_code,
+			(PCRE2_UCHAR*)(buffer + pos),
+			buf_size - pos);
+	if (rc == PCRE2_ERROR_NOMEMORY)
+		goto truncated;
+#else
+	rc = snprintf(buffer + pos, buf_size - pos, "%s",
+			error_data->error_buffer);
+	if (rc < 0)
+		abort();
+
+	if ((size_t)rc < strlen(error_data->error_buffer))
+		goto truncated;
+#endif
+
+	return;
+
+truncated:
+	/* replace end of string with "..." to indicate that it was truncated */
+	switch (the_end_length) {
+		/* no break statements, fall-through is intended */
+		case 4:
+			*ptr++ = '.';
+		case 3:
+			*ptr++ = '.';
+		case 2:
+			*ptr++ = '.';
+		case 1:
+			*ptr++ = '\0';
+		default:
+			break;
+	}
+	return;
+}
diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
new file mode 100644
index 0000000..bdc10c0
--- /dev/null
+++ b/libselinux/src/regex.h
@@ -0,0 +1,168 @@
+#ifndef SRC_REGEX_H_
+#define SRC_REGEX_H_
+
+#include <stdio.h>
+
+#ifdef USE_PCRE2
+#include <pcre2.h>
+#else
+#include <pcre.h>
+#endif
+
+enum {
+	REGEX_MATCH,
+	REGEX_MATCH_PARTIAL,
+	REGEX_NO_MATCH,
+	REGEX_ERROR = -1,
+};
+
+#ifdef USE_PCRE2
+struct regex_data {
+	pcre2_code * regex; /* compiled regular expression */
+	pcre2_match_data * match_data; /* match data block required for the compiled
+	 pattern in regex2 */
+};
+
+struct regex_error_data {
+	int error_code;
+	PCRE2_SIZE error_offset;
+};
+
+/* ^^^^^^ USE_PCRE2  ^^^^^^ */
+#else
+/* vvvvvv USE_PCRE vvvvvv */
+
+/* Prior to version 8.20, libpcre did not have pcre_free_study() */
+#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
+#define pcre_free_study  pcre_free
+#endif
+
+struct regex_data {
+	pcre *regex; /* compiled regular expression */
+	int extra_owned; /* non zero if pcre_extra is owned by this structure
+			  * and thus must be freed on destruction.
+			  */
+	union {
+		pcre_extra *sd; /* pointer to extra compiled stuff */
+		pcre_extra lsd; /* used to hold the mmap'd version */
+	};
+};
+
+struct regex_error_data {
+	char const * error_buffer;
+	int error_offset;
+};
+
+#endif /* USE_PCRE2 */
+
+struct mmap_area;
+
+/**
+ * regex_verison returns the version string of the underlying regular
+ * regular expressions library. In the case of PCRE it just returns the
+ * result of pcre_version(). In the case of PCRE2, the very first time this
+ * function is called it allocates a buffer large enough to hold the version
+ * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
+ * The allocated buffer will linger in memory until the calling process is being
+ * reaped.
+ *
+ * It may return NULL on error.
+ */
+char const * regex_version(void);
+/**
+ * This constructor function allocates a buffer for a regex_data structure.
+ * The buffer is being initialized with zeroes.
+ */
+struct regex_data * regex_data_create(void);
+/**
+ * This complementary destructor function frees the a given regex_data buffer.
+ * It also frees any non NULL member pointers with the appropriate pcreX_X_free
+ * function. For PCRE this function respects the extra_owned field and frees
+ * the pcre_extra data conditionally. Calling this function on a NULL pointer is
+ * save.
+ */
+void regex_data_free(struct regex_data * regex);
+/**
+ * This function compiles the regular expression. Additionally, it prepares
+ * data structures required by the different underlying engines. For PCRE
+ * it calls pcre_study to generate optional data required for optimized
+ * execution of the compiled pattern. In the case of PCRE2, it allocates
+ * a pcre2_match_data structure of appropriate size to hold all possible
+ * matches created by the pattern.
+ *
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ * @arg pattern_string The pattern string that is to be compiled.
+ * @arg errordata A pointer to a regex_error_data structure must be passed
+ *                to this function. This structure depends on the underlying
+ *                implementation. It can be passed to regex_format_error
+ *                to generate a human readable error message.
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
+			struct regex_error_data * errordata);
+/**
+ * This function loads a serialized precompiled pattern from a contiguous
+ * data region given by map_area.
+ *
+ * @arg map_area Description of the memory region holding a serialized
+ *               representation of the precompiled pattern.
+ * @arg regex If successful, the structure returned through *regex was allocated
+ *            with regex_data_create and must be freed with regex_data_free.
+ *
+ * @retval 0 on success
+ * @retval -1 on error
+ */
+int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
+/**
+ * This function stores a precompiled regular expression to a file.
+ * In the case of PCRE, it just dumps the binary representation of the
+ * precomplied pattern into a file. In the case of PCRE2, it uses the
+ * serialization function provided by the library.
+ *
+ * @arg regex The precomplied regular expression data.
+ * @arg fp A file stream specifying the output file.
+ */
+int regex_writef(struct regex_data * regex, FILE * fp);
+/**
+ * This function applies a precompiled pattern to a subject string and
+ * returns whether or not a match was found.
+ *
+ * @arg regex The precompiled pattern.
+ * @arg subject The subject string.
+ * @arg partial Boolean indicating if partial matches are wanted. A nonzero
+ *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
+ *              option to pcre_exec of pcre2_match.
+ * @retval REGEX_MATCH if a match was found
+ * @retval REGEX_MATCH_PARTIAL if a partial match was found
+ * @retval REGEX_NO_MATCH if no match was found
+ * @retval REGEX_ERROR if an error was encountered during the execution of the
+ *                     regular expression
+ */
+int regex_match(struct regex_data * regex, char const * subject, int partial);
+/**
+ * This function compares two compiled regular expressions (regex1 and regex2).
+ * It compares the binary representations of the compiled patterns. It is a very
+ * crude approximation because the binary representation holds data like
+ * reference counters, that has nothing to do with the actual state machine.
+ *
+ * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
+ *                       the same
+ * @retval SELABEL_INCOMPARABLE otherwise
+ */
+int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
+/**
+ * This function takes the error data returned by regex_prepare_data and turns
+ * it in to a human readable error message.
+ * If the buffer given to hold the error message is to small it truncates the
+ * message and indicates the truncation with an ellipsis ("...") at the end of
+ * the buffer.
+ *
+ * @arg error_data Error data as returned by regex_prepare_data.
+ * @arg buffer String buffer to hold the formated error string.
+ * @arg buf_size Total size of the given bufer in bytes.
+ */
+void regex_format_error(struct regex_error_data const * error_data,
+			char * buffer, size_t buf_size);
+#endif  /* SRC_REGEX_H_ */
diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
index 8497cb4..1e7a048 100644
--- a/libselinux/utils/Makefile
+++ b/libselinux/utils/Makefile
@@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
           -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
           -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
           -Werror -Wno-aggregate-return -Wno-redundant-decls
-override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
+override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
 LDLIBS += -L../src -lselinux -L$(LIBDIR)
 
 TARGETS=$(patsubst %.c,%,$(wildcard *.c))
 
-sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
+sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
 
 selinux_restorecon: LDLIBS += -lsepol
 
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index fd6fb78..8ff73f4 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -1,6 +1,5 @@
 #include <ctype.h>
 #include <errno.h>
-#include <pcre.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
@@ -13,6 +12,7 @@
 #include <sepol/sepol.h>
 
 #include "../src/label_file.h"
+#include "../src/regex.h"
 
 const char *policy_file;
 static int ctx_err;
@@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
 	if (len != 1)
 		goto err;
 
-	/* write the pcre version */
-	section_len = strlen(pcre_version());
+	/* write version of the regex back-end */
+	if (!regex_version())
+		goto err;
+	section_len = strlen(regex_version());
 	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
 	if (len != 1)
 		goto err;
-	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
+	len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
 	if (len != section_len)
 		goto err;
 
@@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
 		mode_t mode = specs[i].mode;
 		size_t prefix_len = specs[i].prefix_len;
 		int32_t stem_id = specs[i].stem_id;
-		pcre *re = specs[i].regex;
-		pcre_extra *sd = get_pcre_extra(&specs[i]);
+		struct regex_data *re = specs[i].regex;
 		uint32_t to_write;
-		size_t size;
 
 		/* length of the context string (including nul) */
 		to_write = strlen(context) + 1;
@@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
 		if (len != 1)
 			goto err;
 
-		/* determine the size of the pcre data in bytes */
-		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
+		/* Write regex related data */
+		rc = regex_writef(re, bin_file);
 		if (rc < 0)
 			goto err;
-
-		/* write the number of bytes in the pcre data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		/* write the actual pcre data as a char array */
-		len = fwrite(re, 1, to_write, bin_file);
-		if (len != to_write)
-			goto err;
-
-		if (sd) {
-			/* determine the size of the pcre study info */
-			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
-			if (rc < 0)
-				goto err;
-		} else
-			size = 0;
-
-		/* write the number of bytes in the pcre study data */
-		to_write = size;
-		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
-		if (len != 1)
-			goto err;
-
-		if (sd) {
-			/* write the actual pcre study data as a char array */
-			len = fwrite(sd->study_data, 1, to_write, bin_file);
-			if (len != to_write)
-				goto err;
-		}
 	}
 
 	rc = 0;
@@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
 		free(specs[i].lr.ctx_trans);
 		free(specs[i].regex_str);
 		free(specs[i].type_str);
-		pcre_free(specs[i].regex);
-		pcre_free_study(specs[i].sd);
+		regex_data_free(specs[i].regex);
 	}
 	free(specs);
 
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [PATCH 2/2] libselinux: fix memory leak on pcre2
  2016-09-07  8:08 [PATCH 1/2] libselinux: add support for pcre2 Janis Danisevskis
@ 2016-09-07  8:08 ` Janis Danisevskis
  2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Janis Danisevskis @ 2016-09-07  8:08 UTC (permalink / raw)
  To: selinux, seandroid-list, sds, jwcart2

From: William Roberts <william.c.roberts@intel.com>

Introduced a malloc on pcre_version(). Libselinux
expected this to be static, just use a static
internal buffer.

Signed-off-by: William Roberts <william.c.roberts@intel.com>
---
 libselinux/src/label_file.c           | 13 ++++++++-----
 libselinux/src/regex.c                | 20 +++++++-------------
 libselinux/utils/sefcontext_compile.c |  8 +++++---
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
index 6698624..110db11 100644
--- a/libselinux/src/label_file.c
+++ b/libselinux/src/label_file.c
@@ -111,6 +111,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 	struct mmap_area *mmap_area;
 	uint32_t i, magic, version;
 	uint32_t entry_len, stem_map_len, regex_array_len;
+	const char *reg_version;
 
 	if (isbinary) {
 		len = strlen(path);
@@ -174,11 +175,13 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 	if (rc < 0 || version > SELINUX_COMPILED_FCONTEXT_MAX_VERS)
 		return -1;
 
+	reg_version = regex_version();
+	if (!reg_version)
+		return -1;
+
 	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
-		if (!regex_version()) {
-			return -1;
-		}
-		len = strlen(regex_version());
+
+		len = strlen(reg_version);
 
 		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
 		if (rc < 0)
@@ -200,7 +203,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
 		}
 
 		str_buf[entry_len] = '\0';
-		if ((strcmp(str_buf, regex_version()) != 0)) {
+		if ((strcmp(str_buf, reg_version) != 0)) {
 			free(str_buf);
 			return -1;
 		}
diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
index 6b92b04..c880bfa 100644
--- a/libselinux/src/regex.c
+++ b/libselinux/src/regex.c
@@ -49,19 +49,13 @@ err:	regex_data_free(*regex);
 
 char const * regex_version(void) {
 #ifdef USE_PCRE2
-	static int initialized = 0;
-	static char * version_string = NULL;
-	size_t version_string_len;
-	if (!initialized) {
-		version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
-		version_string = (char*) malloc(version_string_len);
-		if (!version_string) {
-			return NULL;
-		}
-		pcre2_config(PCRE2_CONFIG_VERSION, version_string);
-		initialized = 1;
-	}
-	return version_string;
+	static char version_buf[256];
+	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
+	if (len <= 0 || len > sizeof(version_buf))
+		return NULL;
+
+	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
+	return version_buf;
 #else
 	return pcre_version();
 #endif
diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
index 8ff73f4..b6b8d92 100644
--- a/libselinux/utils/sefcontext_compile.c
+++ b/libselinux/utils/sefcontext_compile.c
@@ -101,6 +101,7 @@ static int write_binary_file(struct saved_data *data, int fd)
 	uint32_t section_len;
 	uint32_t i;
 	int rc;
+	const char *reg_version;
 
 	bin_file = fdopen(fd, "w");
 	if (!bin_file) {
@@ -120,13 +121,14 @@ static int write_binary_file(struct saved_data *data, int fd)
 		goto err;
 
 	/* write version of the regex back-end */
-	if (!regex_version())
+	reg_version = regex_version();
+	if (!reg_version)
 		goto err;
-	section_len = strlen(regex_version());
+	section_len = strlen(reg_version);
 	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
 	if (len != 1)
 		goto err;
-	len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
+	len = fwrite(reg_version, sizeof(char), section_len, bin_file);
 	if (len != section_len)
 		goto err;
 
-- 
2.8.0.rc3.226.g39d4020

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07  8:08 [PATCH 1/2] libselinux: add support for pcre2 Janis Danisevskis
  2016-09-07  8:08 ` [PATCH 2/2] libselinux: fix memory leak on pcre2 Janis Danisevskis
@ 2016-09-07 15:02 ` Stephen Smalley
  2016-09-07 15:37   ` William Roberts
                     ` (2 more replies)
  2016-09-07 15:19 ` William Roberts
  2016-09-07 18:25 ` Stephen Smalley
  3 siblings, 3 replies; 13+ messages in thread
From: Stephen Smalley @ 2016-09-07 15:02 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2; +Cc: Janis Danisevskis

On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.

Shouldn't the pcre variant be encoded in some manner in the
file_contexts.bin file so that libselinux can tell immediately whether
it is supported?

> Also, for pcre2 the persistent format is architecture dependant.
> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used and
> sefcontext_compile shall generate portable output, it and libselinux
> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> cost of having to recompile the regular expressions at load time.

Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
point of using file_contexts.bin was to move the cost of compiling the
regexes to build time rather than load time; if we cannot do that, then
how much do we gain from using file_contexts.bin instead of just falling
back to file_contexts?

The #ifdef maze makes it very hard to read and maintain this code; that
needs to be refactored.

valgrind is reporting numerous errors, including both use of
uninitialised values and memory leaks with both patches applied.  Try:
make DESTDIR=~/obj CFLAGS+=-g clean install
LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
~/obj/usr/sbin/matchpathcon /etc

On x86_64.

Will provide review of the code itself later...

> 
> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> ---
>  libselinux/Makefile                   |  13 ++
>  libselinux/src/Makefile               |   4 +-
>  libselinux/src/label_file.c           |  91 ++------
>  libselinux/src/label_file.h           |  54 ++---
>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>  libselinux/src/regex.h                | 168 ++++++++++++++
>  libselinux/utils/Makefile             |   4 +-
>  libselinux/utils/sefcontext_compile.c |  53 +----
>  8 files changed, 637 insertions(+), 155 deletions(-)
>  create mode 100644 libselinux/src/regex.c
>  create mode 100644 libselinux/src/regex.h
> 
> diff --git a/libselinux/Makefile b/libselinux/Makefile
> index 6142b60..15d051e 100644
> --- a/libselinux/Makefile
> +++ b/libselinux/Makefile
> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
>  endif
>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
>  
> +USE_PCRE2 ?= n
> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> +ifeq ($(USE_PCRE2),y)
> +	PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> +	ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> +		PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> +	endif
> +	PCRE_LDFLAGS := -lpcre2-8
> +else
> +	PCRE_LDFLAGS := -lpcre
> +endif
> +export PCRE_CFLAGS PCRE_LDFLAGS
> +
>  all install relabel clean distclean indent:
>  	@for subdir in $(SUBDIRS); do \
>  		(cd $$subdir && $(MAKE) $@) || exit 1; \
> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> index 37d01af..66687e6 100644
> --- a/libselinux/src/Makefile
> +++ b/libselinux/src/Makefile
> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>  
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>  
>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
>  		-Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>  	$(RANLIB) $@
>  
>  $(LIBSO): $(LOBJS)
> -	$(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> +	$(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>  	ln -sf $@ $(TARGET) 
>  
>  $(LIBPC): $(LIBPC).in ../VERSION
> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> index c89bb35..6698624 100644
> --- a/libselinux/src/label_file.c
> +++ b/libselinux/src/label_file.c
> @@ -15,7 +15,6 @@
>  #include <errno.h>
>  #include <limits.h>
>  #include <stdint.h>
> -#include <pcre.h>
>  #include <unistd.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>  		return -1;
>  
>  	if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> -		len = strlen(pcre_version());
> +		if (!regex_version()) {
> +			return -1;
> +		}
> +		len = strlen(regex_version());
>  
>  		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>  		if (rc < 0)
> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>  		}
>  
>  		str_buf[entry_len] = '\0';
> -		if ((strcmp(str_buf, pcre_version()) != 0)) {
> +		if ((strcmp(str_buf, regex_version()) != 0)) {
>  			free(str_buf);
>  			return -1;
>  		}
> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>  
>  		spec = &data->spec_arr[data->nspec];
>  		spec->from_mmap = 1;
> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> +		spec->regcomp = 0;
> +#else
>  		spec->regcomp = 1;
> +#endif
>  
>  		/* Process context */
>  		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>  			spec->prefix_len = prefix_len;
>  		}
>  
> -		/* Process regex and study_data entries */
> -		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -		if (rc < 0 || !entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -		spec->regex = (pcre *)mmap_area->next_addr;
> -		rc = next_entry(NULL, mmap_area, entry_len);
> +		rc = regex_load_mmap(mmap_area, &spec->regex);
>  		if (rc < 0)
>  			goto err;
>  
> -		/* Check that regex lengths match. pcre_fullinfo()
> -		 * also validates its magic number. */
> -		rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
> -		if (rc < 0 || len != entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -
> -		rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -		if (rc < 0 || !entry_len) {
> -			rc = -1;
> -			goto err;
> -		}
> -
> -		if (entry_len) {
> -			spec->lsd.study_data = (void *)mmap_area->next_addr;
> -			spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> -			rc = next_entry(NULL, mmap_area, entry_len);
> -			if (rc < 0)
> -				goto err;
> -
> -			/* Check that study data lengths match. */
> -			rc = pcre_fullinfo(spec->regex, &spec->lsd,
> -					   PCRE_INFO_STUDYSIZE, &len);
> -			if (rc < 0 || len != entry_len) {
> -				rc = -1;
> -				goto err;
> -			}
> -		}
> -
>  		data->nspec++;
>  	}
>  
> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
>  			continue;
>  		free(spec->regex_str);
>  		free(spec->type_str);
> -		if (spec->regcomp) {
> -			pcre_free(spec->regex);
> -			pcre_free_study(spec->sd);
> -		}
> +		regex_data_free(spec->regex);
>  	}
>  
>  	for (i = 0; i < (unsigned int)data->num_stems; i++) {
> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>  {
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr = data->spec_arr;
> -	int i, rc, file_stem, pcre_options = 0;
> +	int i, rc, file_stem;
>  	mode_t mode = (mode_t)type;
>  	const char *buf;
>  	struct spec *ret = NULL;
>  	char *clean_key = NULL;
>  	const char *prev_slash, *next_slash;
>  	unsigned int sofar = 0;
> +	struct regex_error_data regex_error_data;
>  
>  	if (!data->nspec) {
>  		errno = ENOENT;
> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>  	file_stem = find_stem_from_file(data, &buf);
>  	mode &= S_IFMT;
>  
> -	if (partial)
> -		pcre_options |= PCRE_PARTIAL_SOFT;
> -
>  	/*
>  	 * Check for matching specifications in reverse order, so that
>  	 * the last matching specification is used.
> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>  		 * a regex check        */
>  		if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
>  		    (!mode || !spec->mode || mode == spec->mode)) {
> -			if (compile_regex(data, spec, NULL) < 0)
> +			if (compile_regex(data, spec, &regex_error_data) < 0)
>  				goto finish;
>  			if (spec->stem_id == -1)
> -				rc = pcre_exec(spec->regex,
> -						    get_pcre_extra(spec),
> -						    key, strlen(key), 0,
> -						    pcre_options, NULL, 0);
> +				rc = regex_match(spec->regex, key, partial);
>  			else
> -				rc = pcre_exec(spec->regex,
> -						    get_pcre_extra(spec),
> -						    buf, strlen(buf), 0,
> -						    pcre_options, NULL, 0);
> -			if (rc == 0) {
> +				rc = regex_match(spec->regex, buf, partial);
> +			if (rc == REGEX_MATCH) {
>  				spec->matches++;
>  				break;
> -			} else if (partial && rc == PCRE_ERROR_PARTIAL)
> +			} else if (partial && rc == REGEX_MATCH_PARTIAL)
>  				break;
>  
> -			if (rc == PCRE_ERROR_NOMATCH)
> +			if (rc == REGEX_NO_MATCH)
>  				continue;
>  
>  			errno = ENOENT;
> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
>  		}
>  
>  		if (spec1->regcomp && spec2->regcomp) {
> -			size_t len1, len2;
> -			int rc;
> -
> -			rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
> -			assert(rc == 0);
> -			rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
> -			assert(rc == 0);
> -			if (len1 != len2 ||
> -			    memcmp(spec1->regex, spec2->regex, len1))
> +			if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
>  				return incomp(spec1, spec2, "regex", i, j);
> +			}
>  		} else {
>  			if (strcmp(spec1->regex_str, spec2->regex_str))
>  				return incomp(spec1, spec2, "regex_str", i, j);
> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index 6d1e890..a2e30e5 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -6,6 +6,14 @@
>  
>  #include <sys/stat.h>
>  
> +/*
> + * regex.h/c were introduced to hold all dependencies on the regular
> + * expression back-end when we started supporting PCRE2. regex.h defines a
> + * minimal interface required by libselinux, so that the remaining code
> + * can be agnostic about the underlying implementation.
> + */
> +#include "regex.h"
> +
>  #include "callbacks.h"
>  #include "label_internal.h"
>  
> @@ -19,21 +27,12 @@
>  
>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS	SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
>  
> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> -#define pcre_free_study  pcre_free
> -#endif
> -
>  /* A file security context specification. */
>  struct spec {
>  	struct selabel_lookup_rec lr;	/* holds contexts for lookup result */
>  	char *regex_str;	/* regular expession string for diagnostics */
>  	char *type_str;		/* type string for diagnostic messages */
> -	pcre *regex;		/* compiled regular expression */
> -	union {
> -		pcre_extra *sd;	/* pointer to extra compiled stuff */
> -		pcre_extra lsd;	/* used to hold the mmap'd version */
> -	};
> +	struct regex_data * regex; /* backend dependent regular expression data */
>  	mode_t mode;		/* mode format value */
>  	int matches;		/* number of matching pathnames */
>  	int stem_id;		/* indicates which stem-compression item */
> @@ -78,17 +77,6 @@ struct saved_data {
>  	struct mmap_area *mmap_areas;
>  };
>  
> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> -{
> -	if (spec->from_mmap) {
> -		if (spec->lsd.study_data)
> -			return &spec->lsd;
> -		else
> -			return NULL;
> -	} else
> -		return spec->sd;
> -}
> -
>  static inline mode_t string_to_mode(char *mode)
>  {
>  	size_t len;
> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
>  }
>  
>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
> -					    const char **errbuf)
> +					    struct regex_error_data * error_data)
>  {
> -	const char *tmperrbuf;
>  	char *reg_buf, *anchored_regex, *cp;
>  	struct stem *stem_arr = data->stem_arr;
>  	size_t len;
> -	int erroff;
> +	int rc;
>  
>  	if (spec->regcomp)
>  		return 0; /* already done */
> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
>  	*cp = '\0';
>  
>  	/* Compile the regular expression. */
> -	spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
> -						    &erroff, NULL);
> +	rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
>  	free(anchored_regex);
> -	if (!spec->regex) {
> -		if (errbuf)
> -			*errbuf = tmperrbuf;
> -		return -1;
> -	}
> -
> -	spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> -	if (!spec->sd && tmperrbuf) {
> -		if (errbuf)
> -			*errbuf = tmperrbuf;
> +	if (rc < 0) {
>  		return -1;
>  	}
>  
> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr;
>  	unsigned int nspec = data->nspec;
> -	const char *errbuf = NULL;
> +	char const *errbuf;
> +	struct regex_error_data error_data;
>  
>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>  	if (items < 0) {
> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>  	data->nspec++;
>  
>  	if (rec->validating &&
> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {
>  		COMPAT_LOG(SELINUX_ERROR,
>  			   "%s:  line %u has invalid regex %s:  %s\n",
>  			   path, lineno, regex,
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> new file mode 100644
> index 0000000..6b92b04
> --- /dev/null
> +++ b/libselinux/src/regex.c
> @@ -0,0 +1,405 @@
> +#include <assert.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "regex.h"
> +#include "label_file.h"
> +
> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> +			struct regex_error_data * errordata) {
> +	memset(errordata, 0, sizeof(struct regex_error_data));
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +#ifdef USE_PCRE2
> +	(*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> +			PCRE2_ZERO_TERMINATED,
> +			PCRE2_DOTALL,
> +			&errordata->error_code,
> +			&errordata->error_offset, NULL);
> +#else
> +	(*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> +					&errordata->error_buffer,
> +					&errordata->error_offset, NULL);
> +#endif
> +	if (!(*regex)->regex) {
> +		goto err;
> +	}
> +
> +#ifdef USE_PCRE2
> +	(*regex)->match_data =
> +		pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> +	if (!(*regex)->match_data) {
> +		goto err;
> +	}
> +#else
> +	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
> +	if (!(*regex)->sd && errordata->error_buffer) {
> +		goto err;
> +	}
> +	(*regex)->extra_owned = !!(*regex)->sd;
> +#endif
> +	return 0;
> +
> +err:	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +char const * regex_version(void) {
> +#ifdef USE_PCRE2
> +	static int initialized = 0;
> +	static char * version_string = NULL;
> +	size_t version_string_len;
> +	if (!initialized) {
> +		version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
> +		version_string = (char*) malloc(version_string_len);
> +		if (!version_string) {
> +			return NULL;
> +		}
> +		pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> +		initialized = 1;
> +	}
> +	return version_string;
> +#else
> +	return pcre_version();
> +#endif
> +}
> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> +	int rc;
> +	size_t entry_len;
> +#ifndef USE_PCRE2
> +	size_t info_len;
> +#endif
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +#ifdef USE_PCRE2
> +	if (rc < 0)
> +		return -1;
> +
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +	/* this should yield exactly one because we store one pattern at a time
> +	 */
> +	rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> +	if (rc != 1)
> +		return -1;
> +
> +	*regex = regex_data_create();
> +	if (!*regex)
> +		return -1;
> +
> +	rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> +			(PCRE2_SPTR)mmap_area->next_addr, NULL);
> +	if (rc != 1)
> +		goto err;
> +
> +	(*regex)->match_data =
> +		pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> +	if (!(*regex)->match_data)
> +		goto err;
> +
> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> +	/* and skip the decoded bit */
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	return 0;
> +#else
> +	if (rc < 0 || !entry_len) {
> +		return -1;
> +	}
> +	*regex = regex_data_create();
> +	if (!(*regex))
> +		return -1;
> +
> +	(*regex)->extra_owned = 0;
> +	(*regex)->regex = (pcre *) mmap_area->next_addr;
> +	rc = next_entry(NULL, mmap_area, entry_len);
> +	if (rc < 0)
> +		goto err;
> +
> +	/* Check that regex lengths match. pcre_fullinfo()
> +	 * also validates its magic number. */
> +	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
> +	if (rc < 0 || info_len != entry_len) {
> +		goto err;
> +	}
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +	if (rc < 0 || !entry_len) {
> +		goto err;
> +	}
> +
> +	if (entry_len) {
> +		(*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> +		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> +		rc = next_entry(NULL, mmap_area, entry_len);
> +		if (rc < 0)
> +			goto err;
> +
> +		/* Check that study data lengths match. */
> +		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> +				   PCRE_INFO_STUDYSIZE, &info_len);
> +		if (rc < 0 || info_len != entry_len)
> +			goto err;
> +	}
> +	return 0;
> +#endif
> +err:
> +	regex_data_free(*regex);
> +	*regex = NULL;
> +	return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> +	int rc;
> +#endif
> +	size_t len;
> +#ifdef USE_PCRE2
> +	PCRE2_SIZE to_write;
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +	PCRE2_UCHAR * bytes;
> +
> +	/* encode the patter for serialization */
> +	rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> +				    &bytes, &to_write, NULL);
> +	if (rc != 1)
> +		return -1;
> +
> +#else
> +	(void)regex; // silence unused parameter warning
> +	to_write = 0;
> +#endif
> +	/* write serialized pattern's size */
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1) {
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +		pcre2_serialize_free(bytes);
> +#endif
> +		return -1;
> +	}
> +
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +	/* write serialized pattern */
> +	len = fwrite(bytes, 1, to_write, fp);
> +	if (len != to_write) {
> +		pcre2_serialize_free(bytes);
> +		return -1;
> +	}
> +	pcre2_serialize_free(bytes);
> +#endif
> +#else
> +	uint32_t to_write;
> +	size_t size;
> +	pcre_extra * sd = regex->extra_owned ? regex->sd :
> +			(regex->lsd.study_data ? &regex->lsd : NULL);
> +
> +	/* determine the size of the pcre data in bytes */
> +	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> +	if (rc < 0)
> +		return -1;
> +
> +	/* write the number of bytes in the pcre data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	/* write the actual pcre data as a char array */
> +	len = fwrite(regex->regex, 1, to_write, fp);
> +	if (len != to_write)
> +		return -1;
> +
> +	if (sd) {
> +		/* determine the size of the pcre study info */
> +		rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> +				&size);
> +		if (rc < 0)
> +			return -1;
> +	} else
> +		size = 0;
> +
> +	/* write the number of bytes in the pcre study data */
> +	to_write = size;
> +	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +	if (len != 1)
> +		return -1;
> +
> +	if (sd) {
> +		/* write the actual pcre study data as a char array */
> +		len = fwrite(sd->study_data, 1, to_write, fp);
> +		if (len != to_write)
> +			return -1;
> +	}
> +#endif
> +	return 0;
> +}
> +
> +struct regex_data * regex_data_create(void) {
> +	struct regex_data * dummy = (struct regex_data*) malloc(
> +			sizeof(struct regex_data));
> +	if (dummy) {
> +		memset(dummy, 0, sizeof(struct regex_data));
> +	}
> +	return dummy;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +#ifdef USE_PCRE2
> +		if (regex->regex) {
> +			pcre2_code_free(regex->regex);
> +		}
> +		if (regex->match_data) {
> +			pcre2_match_data_free(regex->match_data);
> +		}
> +#else
> +		if (regex->regex)
> +			pcre_free(regex->regex);
> +		if (regex->extra_owned && regex->sd) {
> +			pcre_free_study(regex->sd);
> +		}
> +#endif
> +		free(regex);
> +	}
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +	int rc;
> +#ifdef USE_PCRE2
> +	rc = pcre2_match(regex->regex,
> +			(PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> +			partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
> +			NULL);
> +	if (rc > 0)
> +	return REGEX_MATCH;
> +	switch (rc) {
> +		case PCRE2_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE2_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +#else
> +	rc = pcre_exec(regex->regex,
> +			regex->extra_owned ? regex->sd : &regex->lsd, subject,
> +			strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
> +			NULL,
> +			0);
> +	switch (rc) {
> +		case 0:
> +			return REGEX_MATCH;
> +		case PCRE_ERROR_PARTIAL:
> +			return REGEX_MATCH_PARTIAL;
> +		case PCRE_ERROR_NOMATCH:
> +			return REGEX_NO_MATCH;
> +		default:
> +			return REGEX_ERROR;
> +	}
> +#endif
> +}
> +
> +/* TODO Replace this compare function with something that actually compares the
> + * regular expressions.
> + * This compare function basically just compares the binary representations of
> + * the automatons, and because this representation contains pointers and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +	int rc;
> +	size_t len1, len2;
> +#ifdef USE_PCRE2
> +	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +#else
> +	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> +	assert(rc == 0);
> +	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> +	assert(rc == 0);
> +	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +		return SELABEL_INCOMPARABLE;
> +#endif
> +	return SELABEL_EQUAL;
> +}
> +
> +void regex_format_error(struct regex_error_data const * error_data,
> +			char * buffer, size_t buf_size) {
> +	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> +	char * ptr = &buffer[buf_size - the_end_length];
> +	int rc = 0;
> +	size_t pos = 0;
> +	if (!buffer || !buf_size)
> +		return;
> +	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> +	if (rc < 0)
> +		/* If snprintf fails it constitutes a logical error that needs
> +		 * fixing.
> +		 */
> +		abort();
> +
> +	pos += rc;
> +	if (pos >= buf_size)
> +		goto truncated;
> +
> +	if (error_data->error_offset > 0) {
> +#ifdef USE_PCRE2
> +		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
> +				error_data->error_offset);
> +#else
> +		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
> +				error_data->error_offset);
> +#endif
> +		if (rc < 0)
> +			abort();
> +
> +	}
> +	pos += rc;
> +	if (pos >= buf_size)
> +		goto truncated;
> +
> +#ifdef USE_PCRE2
> +	rc = pcre2_get_error_message(error_data->error_code,
> +			(PCRE2_UCHAR*)(buffer + pos),
> +			buf_size - pos);
> +	if (rc == PCRE2_ERROR_NOMEMORY)
> +		goto truncated;
> +#else
> +	rc = snprintf(buffer + pos, buf_size - pos, "%s",
> +			error_data->error_buffer);
> +	if (rc < 0)
> +		abort();
> +
> +	if ((size_t)rc < strlen(error_data->error_buffer))
> +		goto truncated;
> +#endif
> +
> +	return;
> +
> +truncated:
> +	/* replace end of string with "..." to indicate that it was truncated */
> +	switch (the_end_length) {
> +		/* no break statements, fall-through is intended */
> +		case 4:
> +			*ptr++ = '.';
> +		case 3:
> +			*ptr++ = '.';
> +		case 2:
> +			*ptr++ = '.';
> +		case 1:
> +			*ptr++ = '\0';
> +		default:
> +			break;
> +	}
> +	return;
> +}
> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> new file mode 100644
> index 0000000..bdc10c0
> --- /dev/null
> +++ b/libselinux/src/regex.h
> @@ -0,0 +1,168 @@
> +#ifndef SRC_REGEX_H_
> +#define SRC_REGEX_H_
> +
> +#include <stdio.h>
> +
> +#ifdef USE_PCRE2
> +#include <pcre2.h>
> +#else
> +#include <pcre.h>
> +#endif
> +
> +enum {
> +	REGEX_MATCH,
> +	REGEX_MATCH_PARTIAL,
> +	REGEX_NO_MATCH,
> +	REGEX_ERROR = -1,
> +};
> +
> +#ifdef USE_PCRE2
> +struct regex_data {
> +	pcre2_code * regex; /* compiled regular expression */
> +	pcre2_match_data * match_data; /* match data block required for the compiled
> +	 pattern in regex2 */
> +};
> +
> +struct regex_error_data {
> +	int error_code;
> +	PCRE2_SIZE error_offset;
> +};
> +
> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> +#else
> +/* vvvvvv USE_PCRE vvvvvv */
> +
> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> +#define pcre_free_study  pcre_free
> +#endif
> +
> +struct regex_data {
> +	pcre *regex; /* compiled regular expression */
> +	int extra_owned; /* non zero if pcre_extra is owned by this structure
> +			  * and thus must be freed on destruction.
> +			  */
> +	union {
> +		pcre_extra *sd; /* pointer to extra compiled stuff */
> +		pcre_extra lsd; /* used to hold the mmap'd version */
> +	};
> +};
> +
> +struct regex_error_data {
> +	char const * error_buffer;
> +	int error_offset;
> +};
> +
> +#endif /* USE_PCRE2 */
> +
> +struct mmap_area;
> +
> +/**
> + * regex_verison returns the version string of the underlying regular
> + * regular expressions library. In the case of PCRE it just returns the
> + * result of pcre_version(). In the case of PCRE2, the very first time this
> + * function is called it allocates a buffer large enough to hold the version
> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> + * The allocated buffer will linger in memory until the calling process is being
> + * reaped.
> + *
> + * It may return NULL on error.
> + */
> +char const * regex_version(void);
> +/**
> + * This constructor function allocates a buffer for a regex_data structure.
> + * The buffer is being initialized with zeroes.
> + */
> +struct regex_data * regex_data_create(void);
> +/**
> + * This complementary destructor function frees the a given regex_data buffer.
> + * It also frees any non NULL member pointers with the appropriate pcreX_X_free
> + * function. For PCRE this function respects the extra_owned field and frees
> + * the pcre_extra data conditionally. Calling this function on a NULL pointer is
> + * save.
> + */
> +void regex_data_free(struct regex_data * regex);
> +/**
> + * This function compiles the regular expression. Additionally, it prepares
> + * data structures required by the different underlying engines. For PCRE
> + * it calls pcre_study to generate optional data required for optimized
> + * execution of the compiled pattern. In the case of PCRE2, it allocates
> + * a pcre2_match_data structure of appropriate size to hold all possible
> + * matches created by the pattern.
> + *
> + * @arg regex If successful, the structure returned through *regex was allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + * @arg pattern_string The pattern string that is to be compiled.
> + * @arg errordata A pointer to a regex_error_data structure must be passed
> + *                to this function. This structure depends on the underlying
> + *                implementation. It can be passed to regex_format_error
> + *                to generate a human readable error message.
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> +			struct regex_error_data * errordata);
> +/**
> + * This function loads a serialized precompiled pattern from a contiguous
> + * data region given by map_area.
> + *
> + * @arg map_area Description of the memory region holding a serialized
> + *               representation of the precompiled pattern.
> + * @arg regex If successful, the structure returned through *regex was allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + *
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
> +/**
> + * This function stores a precompiled regular expression to a file.
> + * In the case of PCRE, it just dumps the binary representation of the
> + * precomplied pattern into a file. In the case of PCRE2, it uses the
> + * serialization function provided by the library.
> + *
> + * @arg regex The precomplied regular expression data.
> + * @arg fp A file stream specifying the output file.
> + */
> +int regex_writef(struct regex_data * regex, FILE * fp);
> +/**
> + * This function applies a precompiled pattern to a subject string and
> + * returns whether or not a match was found.
> + *
> + * @arg regex The precompiled pattern.
> + * @arg subject The subject string.
> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
> + *              option to pcre_exec of pcre2_match.
> + * @retval REGEX_MATCH if a match was found
> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> + * @retval REGEX_NO_MATCH if no match was found
> + * @retval REGEX_ERROR if an error was encountered during the execution of the
> + *                     regular expression
> + */
> +int regex_match(struct regex_data * regex, char const * subject, int partial);
> +/**
> + * This function compares two compiled regular expressions (regex1 and regex2).
> + * It compares the binary representations of the compiled patterns. It is a very
> + * crude approximation because the binary representation holds data like
> + * reference counters, that has nothing to do with the actual state machine.
> + *
> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
> + *                       the same
> + * @retval SELABEL_INCOMPARABLE otherwise
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
> +/**
> + * This function takes the error data returned by regex_prepare_data and turns
> + * it in to a human readable error message.
> + * If the buffer given to hold the error message is to small it truncates the
> + * message and indicates the truncation with an ellipsis ("...") at the end of
> + * the buffer.
> + *
> + * @arg error_data Error data as returned by regex_prepare_data.
> + * @arg buffer String buffer to hold the formated error string.
> + * @arg buf_size Total size of the given bufer in bytes.
> + */
> +void regex_format_error(struct regex_error_data const * error_data,
> +			char * buffer, size_t buf_size);
> +#endif  /* SRC_REGEX_H_ */
> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> index 8497cb4..1e7a048 100644
> --- a/libselinux/utils/Makefile
> +++ b/libselinux/utils/Makefile
> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
>  
>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
>  
> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
>  
>  selinux_restorecon: LDLIBS += -lsepol
>  
> diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
> index fd6fb78..8ff73f4 100644
> --- a/libselinux/utils/sefcontext_compile.c
> +++ b/libselinux/utils/sefcontext_compile.c
> @@ -1,6 +1,5 @@
>  #include <ctype.h>
>  #include <errno.h>
> -#include <pcre.h>
>  #include <stdint.h>
>  #include <stdio.h>
>  #include <string.h>
> @@ -13,6 +12,7 @@
>  #include <sepol/sepol.h>
>  
>  #include "../src/label_file.h"
> +#include "../src/regex.h"
>  
>  const char *policy_file;
>  static int ctx_err;
> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
>  	if (len != 1)
>  		goto err;
>  
> -	/* write the pcre version */
> -	section_len = strlen(pcre_version());
> +	/* write version of the regex back-end */
> +	if (!regex_version())
> +		goto err;
> +	section_len = strlen(regex_version());
>  	len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
>  	if (len != 1)
>  		goto err;
> -	len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> +	len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
>  	if (len != section_len)
>  		goto err;
>  
> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
>  		mode_t mode = specs[i].mode;
>  		size_t prefix_len = specs[i].prefix_len;
>  		int32_t stem_id = specs[i].stem_id;
> -		pcre *re = specs[i].regex;
> -		pcre_extra *sd = get_pcre_extra(&specs[i]);
> +		struct regex_data *re = specs[i].regex;
>  		uint32_t to_write;
> -		size_t size;
>  
>  		/* length of the context string (including nul) */
>  		to_write = strlen(context) + 1;
> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
>  		if (len != 1)
>  			goto err;
>  
> -		/* determine the size of the pcre data in bytes */
> -		rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> +		/* Write regex related data */
> +		rc = regex_writef(re, bin_file);
>  		if (rc < 0)
>  			goto err;
> -
> -		/* write the number of bytes in the pcre data */
> -		to_write = size;
> -		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -		if (len != 1)
> -			goto err;
> -
> -		/* write the actual pcre data as a char array */
> -		len = fwrite(re, 1, to_write, bin_file);
> -		if (len != to_write)
> -			goto err;
> -
> -		if (sd) {
> -			/* determine the size of the pcre study info */
> -			rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
> -			if (rc < 0)
> -				goto err;
> -		} else
> -			size = 0;
> -
> -		/* write the number of bytes in the pcre study data */
> -		to_write = size;
> -		len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -		if (len != 1)
> -			goto err;
> -
> -		if (sd) {
> -			/* write the actual pcre study data as a char array */
> -			len = fwrite(sd->study_data, 1, to_write, bin_file);
> -			if (len != to_write)
> -				goto err;
> -		}
>  	}
>  
>  	rc = 0;
> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
>  		free(specs[i].lr.ctx_trans);
>  		free(specs[i].regex_str);
>  		free(specs[i].type_str);
> -		pcre_free(specs[i].regex);
> -		pcre_free_study(specs[i].sd);
> +		regex_data_free(specs[i].regex);
>  	}
>  	free(specs);
>  
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07  8:08 [PATCH 1/2] libselinux: add support for pcre2 Janis Danisevskis
  2016-09-07  8:08 ` [PATCH 2/2] libselinux: fix memory leak on pcre2 Janis Danisevskis
  2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
@ 2016-09-07 15:19 ` William Roberts
  2016-09-07 18:25 ` Stephen Smalley
  3 siblings, 0 replies; 13+ messages in thread
From: William Roberts @ 2016-09-07 15:19 UTC (permalink / raw)
  To: Janis Danisevskis
  Cc: selinux, seandroid-list, Stephen Smalley, James Carter,
	Janis Danisevskis

On Wed, Sep 7, 2016 at 1:08 AM, Janis Danisevskis <jdanis@android.com> wrote:
> From: Janis Danisevskis <jdanis@google.com>
>
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
>
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.
>
> Also, for pcre2 the persistent format is architecture dependant.

dependant -> dependent

> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used and
> sefcontext_compile shall generate portable output, it and libselinux
> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> cost of having to recompile the regular expressions at load time.
>
> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> ---
>  libselinux/Makefile                   |  13 ++
>  libselinux/src/Makefile               |   4 +-
>  libselinux/src/label_file.c           |  91 ++------
>  libselinux/src/label_file.h           |  54 ++---
>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>  libselinux/src/regex.h                | 168 ++++++++++++++
>  libselinux/utils/Makefile             |   4 +-
>  libselinux/utils/sefcontext_compile.c |  53 +----
>  8 files changed, 637 insertions(+), 155 deletions(-)
>  create mode 100644 libselinux/src/regex.c
>  create mode 100644 libselinux/src/regex.h
>
> diff --git a/libselinux/Makefile b/libselinux/Makefile
> index 6142b60..15d051e 100644
> --- a/libselinux/Makefile
> +++ b/libselinux/Makefile
> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
>  endif
>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
>
> +USE_PCRE2 ?= n
> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> +ifeq ($(USE_PCRE2),y)
> +       PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> +       ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> +               PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> +       endif
> +       PCRE_LDFLAGS := -lpcre2-8
> +else
> +       PCRE_LDFLAGS := -lpcre
> +endif
> +export PCRE_CFLAGS PCRE_LDFLAGS
> +
>  all install relabel clean distclean indent:
>         @for subdir in $(SUBDIRS); do \
>                 (cd $$subdir && $(MAKE) $@) || exit 1; \
> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> index 37d01af..66687e6 100644
> --- a/libselinux/src/Makefile
> +++ b/libselinux/src/Makefile
> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>
>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
>                 -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>         $(RANLIB) $@
>
>  $(LIBSO): $(LOBJS)
> -       $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> +       $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>         ln -sf $@ $(TARGET)
>
>  $(LIBPC): $(LIBPC).in ../VERSION
> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> index c89bb35..6698624 100644
> --- a/libselinux/src/label_file.c
> +++ b/libselinux/src/label_file.c
> @@ -15,7 +15,6 @@
>  #include <errno.h>
>  #include <limits.h>
>  #include <stdint.h>
> -#include <pcre.h>
>  #include <unistd.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>                 return -1;
>
>         if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> -               len = strlen(pcre_version());
> +               if (!regex_version()) {
> +                       return -1;
> +               }
> +               len = strlen(regex_version());
>
>                 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>                 if (rc < 0)
> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>                 }
>
>                 str_buf[entry_len] = '\0';
> -               if ((strcmp(str_buf, pcre_version()) != 0)) {
> +               if ((strcmp(str_buf, regex_version()) != 0)) {
>                         free(str_buf);
>                         return -1;
>                 }
> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>
>                 spec = &data->spec_arr[data->nspec];
>                 spec->from_mmap = 1;
> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> +               spec->regcomp = 0;
> +#else
>                 spec->regcomp = 1;
> +#endif
>
>                 /* Process context */
>                 rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>                         spec->prefix_len = prefix_len;
>                 }
>
> -               /* Process regex and study_data entries */
> -               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -               if (rc < 0 || !entry_len) {
> -                       rc = -1;
> -                       goto err;
> -               }
> -               spec->regex = (pcre *)mmap_area->next_addr;
> -               rc = next_entry(NULL, mmap_area, entry_len);
> +               rc = regex_load_mmap(mmap_area, &spec->regex);
>                 if (rc < 0)
>                         goto err;
>
> -               /* Check that regex lengths match. pcre_fullinfo()
> -                * also validates its magic number. */
> -               rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
> -               if (rc < 0 || len != entry_len) {
> -                       rc = -1;
> -                       goto err;
> -               }
> -
> -               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> -               if (rc < 0 || !entry_len) {
> -                       rc = -1;
> -                       goto err;
> -               }
> -
> -               if (entry_len) {
> -                       spec->lsd.study_data = (void *)mmap_area->next_addr;
> -                       spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> -                       rc = next_entry(NULL, mmap_area, entry_len);
> -                       if (rc < 0)
> -                               goto err;
> -
> -                       /* Check that study data lengths match. */
> -                       rc = pcre_fullinfo(spec->regex, &spec->lsd,
> -                                          PCRE_INFO_STUDYSIZE, &len);
> -                       if (rc < 0 || len != entry_len) {
> -                               rc = -1;
> -                               goto err;
> -                       }
> -               }
> -
>                 data->nspec++;
>         }
>
> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
>                         continue;
>                 free(spec->regex_str);
>                 free(spec->type_str);
> -               if (spec->regcomp) {
> -                       pcre_free(spec->regex);
> -                       pcre_free_study(spec->sd);
> -               }
> +               regex_data_free(spec->regex);
>         }
>
>         for (i = 0; i < (unsigned int)data->num_stems; i++) {
> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>  {
>         struct saved_data *data = (struct saved_data *)rec->data;
>         struct spec *spec_arr = data->spec_arr;
> -       int i, rc, file_stem, pcre_options = 0;
> +       int i, rc, file_stem;
>         mode_t mode = (mode_t)type;
>         const char *buf;
>         struct spec *ret = NULL;
>         char *clean_key = NULL;
>         const char *prev_slash, *next_slash;
>         unsigned int sofar = 0;
> +       struct regex_error_data regex_error_data;
>
>         if (!data->nspec) {
>                 errno = ENOENT;
> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>         file_stem = find_stem_from_file(data, &buf);
>         mode &= S_IFMT;
>
> -       if (partial)
> -               pcre_options |= PCRE_PARTIAL_SOFT;
> -
>         /*
>          * Check for matching specifications in reverse order, so that
>          * the last matching specification is used.
> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>                  * a regex check        */
>                 if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
>                     (!mode || !spec->mode || mode == spec->mode)) {
> -                       if (compile_regex(data, spec, NULL) < 0)
> +                       if (compile_regex(data, spec, &regex_error_data) < 0)
>                                 goto finish;
>                         if (spec->stem_id == -1)
> -                               rc = pcre_exec(spec->regex,
> -                                                   get_pcre_extra(spec),
> -                                                   key, strlen(key), 0,
> -                                                   pcre_options, NULL, 0);
> +                               rc = regex_match(spec->regex, key, partial);
>                         else
> -                               rc = pcre_exec(spec->regex,
> -                                                   get_pcre_extra(spec),
> -                                                   buf, strlen(buf), 0,
> -                                                   pcre_options, NULL, 0);
> -                       if (rc == 0) {
> +                               rc = regex_match(spec->regex, buf, partial);
> +                       if (rc == REGEX_MATCH) {
>                                 spec->matches++;
>                                 break;
> -                       } else if (partial && rc == PCRE_ERROR_PARTIAL)
> +                       } else if (partial && rc == REGEX_MATCH_PARTIAL)
>                                 break;
>
> -                       if (rc == PCRE_ERROR_NOMATCH)
> +                       if (rc == REGEX_NO_MATCH)
>                                 continue;
>
>                         errno = ENOENT;
> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
>                 }
>
>                 if (spec1->regcomp && spec2->regcomp) {
> -                       size_t len1, len2;
> -                       int rc;
> -
> -                       rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
> -                       assert(rc == 0);
> -                       rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
> -                       assert(rc == 0);
> -                       if (len1 != len2 ||
> -                           memcmp(spec1->regex, spec2->regex, len1))
> +                       if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
>                                 return incomp(spec1, spec2, "regex", i, j);
> +                       }
>                 } else {
>                         if (strcmp(spec1->regex_str, spec2->regex_str))
>                                 return incomp(spec1, spec2, "regex_str", i, j);
> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index 6d1e890..a2e30e5 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -6,6 +6,14 @@
>
>  #include <sys/stat.h>
>
> +/*
> + * regex.h/c were introduced to hold all dependencies on the regular
> + * expression back-end when we started supporting PCRE2. regex.h defines a
> + * minimal interface required by libselinux, so that the remaining code
> + * can be agnostic about the underlying implementation.
> + */
> +#include "regex.h"
> +
>  #include "callbacks.h"
>  #include "label_internal.h"
>
> @@ -19,21 +27,12 @@
>
>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS     SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
>
> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> -#define pcre_free_study  pcre_free
> -#endif
> -
>  /* A file security context specification. */
>  struct spec {
>         struct selabel_lookup_rec lr;   /* holds contexts for lookup result */
>         char *regex_str;        /* regular expession string for diagnostics */
>         char *type_str;         /* type string for diagnostic messages */
> -       pcre *regex;            /* compiled regular expression */
> -       union {
> -               pcre_extra *sd; /* pointer to extra compiled stuff */
> -               pcre_extra lsd; /* used to hold the mmap'd version */
> -       };
> +       struct regex_data * regex; /* backend dependent regular expression data */
>         mode_t mode;            /* mode format value */
>         int matches;            /* number of matching pathnames */
>         int stem_id;            /* indicates which stem-compression item */
> @@ -78,17 +77,6 @@ struct saved_data {
>         struct mmap_area *mmap_areas;
>  };
>
> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> -{
> -       if (spec->from_mmap) {
> -               if (spec->lsd.study_data)
> -                       return &spec->lsd;
> -               else
> -                       return NULL;
> -       } else
> -               return spec->sd;
> -}
> -
>  static inline mode_t string_to_mode(char *mode)
>  {
>         size_t len;
> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
>  }
>
>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
> -                                           const char **errbuf)
> +                                           struct regex_error_data * error_data)
>  {
> -       const char *tmperrbuf;
>         char *reg_buf, *anchored_regex, *cp;
>         struct stem *stem_arr = data->stem_arr;
>         size_t len;
> -       int erroff;
> +       int rc;
>
>         if (spec->regcomp)
>                 return 0; /* already done */
> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
>         *cp = '\0';
>
>         /* Compile the regular expression. */
> -       spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
> -                                                   &erroff, NULL);
> +       rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
>         free(anchored_regex);
> -       if (!spec->regex) {
> -               if (errbuf)
> -                       *errbuf = tmperrbuf;
> -               return -1;
> -       }
> -
> -       spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> -       if (!spec->sd && tmperrbuf) {
> -               if (errbuf)
> -                       *errbuf = tmperrbuf;
> +       if (rc < 0) {
>                 return -1;
>         }
>
> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>         struct saved_data *data = (struct saved_data *)rec->data;
>         struct spec *spec_arr;
>         unsigned int nspec = data->nspec;
> -       const char *errbuf = NULL;
> +       char const *errbuf;
> +       struct regex_error_data error_data;
>
>         items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>         if (items < 0) {
> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>         data->nspec++;
>
>         if (rec->validating &&
> -                           compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +                           compile_regex(data, &spec_arr[nspec], &error_data)) {
>                 COMPAT_LOG(SELINUX_ERROR,
>                            "%s:  line %u has invalid regex %s:  %s\n",
>                            path, lineno, regex,
> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> new file mode 100644
> index 0000000..6b92b04
> --- /dev/null
> +++ b/libselinux/src/regex.c
> @@ -0,0 +1,405 @@
> +#include <assert.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "regex.h"
> +#include "label_file.h"
> +
> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> +                       struct regex_error_data * errordata) {
> +       memset(errordata, 0, sizeof(struct regex_error_data));
> +       *regex = regex_data_create();
> +       if (!(*regex))
> +               return -1;
> +#ifdef USE_PCRE2
> +       (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> +                       PCRE2_ZERO_TERMINATED,
> +                       PCRE2_DOTALL,
> +                       &errordata->error_code,
> +                       &errordata->error_offset, NULL);
> +#else
> +       (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> +                                       &errordata->error_buffer,
> +                                       &errordata->error_offset, NULL);
> +#endif
> +       if (!(*regex)->regex) {
> +               goto err;
> +       }
> +
> +#ifdef USE_PCRE2
> +       (*regex)->match_data =
> +               pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> +       if (!(*regex)->match_data) {
> +               goto err;
> +       }
> +#else
> +       (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
> +       if (!(*regex)->sd && errordata->error_buffer) {
> +               goto err;
> +       }
> +       (*regex)->extra_owned = !!(*regex)->sd;
> +#endif
> +       return 0;
> +
> +err:   regex_data_free(*regex);
> +       *regex = NULL;
> +       return -1;
> +}
> +
> +char const * regex_version(void) {
> +#ifdef USE_PCRE2
> +       static int initialized = 0;
> +       static char * version_string = NULL;
> +       size_t version_string_len;
> +       if (!initialized) {
> +               version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
> +               version_string = (char*) malloc(version_string_len);
> +               if (!version_string) {
> +                       return NULL;
> +               }
> +               pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> +               initialized = 1;
> +       }
> +       return version_string;
> +#else
> +       return pcre_version();
> +#endif
> +}
> +
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> +       int rc;
> +       size_t entry_len;
> +#ifndef USE_PCRE2
> +       size_t info_len;
> +#endif
> +
> +       rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +#ifdef USE_PCRE2
> +       if (rc < 0)
> +               return -1;
> +
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +       /* this should yield exactly one because we store one pattern at a time
> +        */
> +       rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> +       if (rc != 1)
> +               return -1;
> +
> +       *regex = regex_data_create();
> +       if (!*regex)
> +               return -1;
> +
> +       rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> +                       (PCRE2_SPTR)mmap_area->next_addr, NULL);
> +       if (rc != 1)
> +               goto err;
> +
> +       (*regex)->match_data =
> +               pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> +       if (!(*regex)->match_data)
> +               goto err;
> +
> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> +       /* and skip the decoded bit */
> +       rc = next_entry(NULL, mmap_area, entry_len);
> +       if (rc < 0)
> +               goto err;
> +
> +       return 0;
> +#else
> +       if (rc < 0 || !entry_len) {
> +               return -1;
> +       }
> +       *regex = regex_data_create();
> +       if (!(*regex))
> +               return -1;
> +
> +       (*regex)->extra_owned = 0;
> +       (*regex)->regex = (pcre *) mmap_area->next_addr;
> +       rc = next_entry(NULL, mmap_area, entry_len);
> +       if (rc < 0)
> +               goto err;
> +
> +       /* Check that regex lengths match. pcre_fullinfo()
> +        * also validates its magic number. */

I didn't notice this before (sorry) but looking at all the other
files, multi line
comments do not have text on the opening line.

ie this:
/*
 * comment
 * comment
 */

not this:
/*comment
 *comment
 */

This pattern is here and throughout.

> +       rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
> +       if (rc < 0 || info_len != entry_len) {
> +               goto err;
> +       }
> +
> +       rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> +       if (rc < 0 || !entry_len) {
> +               goto err;
> +       }
> +
> +       if (entry_len) {
> +               (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> +               (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> +               rc = next_entry(NULL, mmap_area, entry_len);
> +               if (rc < 0)
> +                       goto err;
> +
> +               /* Check that study data lengths match. */
> +               rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> +                                  PCRE_INFO_STUDYSIZE, &info_len);
> +               if (rc < 0 || info_len != entry_len)
> +                       goto err;
> +       }
> +       return 0;
> +#endif
> +err:
> +       regex_data_free(*regex);
> +       *regex = NULL;
> +       return -1;
> +}
> +
> +int regex_writef(struct regex_data * regex, FILE * fp) {
> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> +       int rc;
> +#endif
> +       size_t len;
> +#ifdef USE_PCRE2
> +       PCRE2_SIZE to_write;
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +       PCRE2_UCHAR * bytes;
> +
> +       /* encode the patter for serialization */
> +       rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> +                                   &bytes, &to_write, NULL);
> +       if (rc != 1)
> +               return -1;
> +
> +#else
> +       (void)regex; // silence unused parameter warning
> +       to_write = 0;
> +#endif
> +       /* write serialized pattern's size */
> +       len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +       if (len != 1) {
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +               pcre2_serialize_free(bytes);
> +#endif
> +               return -1;
> +       }
> +
> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> +       /* write serialized pattern */
> +       len = fwrite(bytes, 1, to_write, fp);
> +       if (len != to_write) {
> +               pcre2_serialize_free(bytes);
> +               return -1;
> +       }
> +       pcre2_serialize_free(bytes);
> +#endif
> +#else
> +       uint32_t to_write;
> +       size_t size;
> +       pcre_extra * sd = regex->extra_owned ? regex->sd :
> +                       (regex->lsd.study_data ? &regex->lsd : NULL);
> +
> +       /* determine the size of the pcre data in bytes */
> +       rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> +       if (rc < 0)
> +               return -1;
> +
> +       /* write the number of bytes in the pcre data */
> +       to_write = size;
> +       len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +       if (len != 1)
> +               return -1;
> +
> +       /* write the actual pcre data as a char array */
> +       len = fwrite(regex->regex, 1, to_write, fp);
> +       if (len != to_write)
> +               return -1;
> +
> +       if (sd) {
> +               /* determine the size of the pcre study info */
> +               rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> +                               &size);
> +               if (rc < 0)
> +                       return -1;
> +       } else
> +               size = 0;
> +
> +       /* write the number of bytes in the pcre study data */
> +       to_write = size;
> +       len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> +       if (len != 1)
> +               return -1;
> +
> +       if (sd) {
> +               /* write the actual pcre study data as a char array */
> +               len = fwrite(sd->study_data, 1, to_write, fp);
> +               if (len != to_write)
> +                       return -1;
> +       }
> +#endif
> +       return 0;
> +}
> +
> +struct regex_data * regex_data_create(void) {
> +       struct regex_data * dummy = (struct regex_data*) malloc(
> +                       sizeof(struct regex_data));
> +       if (dummy) {
> +               memset(dummy, 0, sizeof(struct regex_data));
> +       }
> +       return dummy;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +       if (regex) {
> +#ifdef USE_PCRE2
> +               if (regex->regex) {
> +                       pcre2_code_free(regex->regex);
> +               }
> +               if (regex->match_data) {
> +                       pcre2_match_data_free(regex->match_data);
> +               }
> +#else
> +               if (regex->regex)
> +                       pcre_free(regex->regex);
> +               if (regex->extra_owned && regex->sd) {
> +                       pcre_free_study(regex->sd);
> +               }
> +#endif
> +               free(regex);
> +       }
> +}
> +
> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> +       int rc;
> +#ifdef USE_PCRE2
> +       rc = pcre2_match(regex->regex,
> +                       (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> +                       partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
> +                       NULL);
> +       if (rc > 0)
> +       return REGEX_MATCH;
> +       switch (rc) {
> +               case PCRE2_ERROR_PARTIAL:
> +                       return REGEX_MATCH_PARTIAL;
> +               case PCRE2_ERROR_NOMATCH:
> +                       return REGEX_NO_MATCH;
> +               default:
> +                       return REGEX_ERROR;
> +       }
> +#else
> +       rc = pcre_exec(regex->regex,
> +                       regex->extra_owned ? regex->sd : &regex->lsd, subject,
> +                       strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
> +                       NULL,
> +                       0);
> +       switch (rc) {
> +               case 0:
> +                       return REGEX_MATCH;
> +               case PCRE_ERROR_PARTIAL:
> +                       return REGEX_MATCH_PARTIAL;
> +               case PCRE_ERROR_NOMATCH:
> +                       return REGEX_NO_MATCH;
> +               default:
> +                       return REGEX_ERROR;
> +       }
> +#endif
> +}
> +
> +/* TODO Replace this compare function with something that actually compares the
> + * regular expressions.
> + * This compare function basically just compares the binary representations of
> + * the automatons, and because this representation contains pointers and
> + * metadata, it can only return a match if regex1 == regex2.
> + * Preferably, this function would be replaced with an algorithm that computes
> + * the equivalence of the automatons systematically.
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> +       int rc;
> +       size_t len1, len2;
> +#ifdef USE_PCRE2
> +       rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> +       assert(rc == 0);
> +       rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> +       assert(rc == 0);
> +       if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +               return SELABEL_INCOMPARABLE;
> +#else
> +       rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> +       assert(rc == 0);
> +       rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> +       assert(rc == 0);
> +       if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> +               return SELABEL_INCOMPARABLE;
> +#endif
> +       return SELABEL_EQUAL;
> +}
> +
> +void regex_format_error(struct regex_error_data const * error_data,
> +                       char * buffer, size_t buf_size) {
> +       unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> +       char * ptr = &buffer[buf_size - the_end_length];
> +       int rc = 0;
> +       size_t pos = 0;
> +       if (!buffer || !buf_size)
> +               return;
> +       rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> +       if (rc < 0)
> +               /* If snprintf fails it constitutes a logical error that needs
> +                * fixing.
> +                */
> +               abort();
> +
> +       pos += rc;
> +       if (pos >= buf_size)
> +               goto truncated;
> +
> +       if (error_data->error_offset > 0) {
> +#ifdef USE_PCRE2
> +               rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
> +                               error_data->error_offset);
> +#else
> +               rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
> +                               error_data->error_offset);
> +#endif
> +               if (rc < 0)
> +                       abort();
> +
> +       }
> +       pos += rc;
> +       if (pos >= buf_size)
> +               goto truncated;
> +
> +#ifdef USE_PCRE2
> +       rc = pcre2_get_error_message(error_data->error_code,
> +                       (PCRE2_UCHAR*)(buffer + pos),
> +                       buf_size - pos);
> +       if (rc == PCRE2_ERROR_NOMEMORY)
> +               goto truncated;
> +#else
> +       rc = snprintf(buffer + pos, buf_size - pos, "%s",
> +                       error_data->error_buffer);
> +       if (rc < 0)
> +               abort();
> +
> +       if ((size_t)rc < strlen(error_data->error_buffer))
> +               goto truncated;
> +#endif
> +
> +       return;
> +
> +truncated:
> +       /* replace end of string with "..." to indicate that it was truncated */
> +       switch (the_end_length) {
> +               /* no break statements, fall-through is intended */
> +               case 4:
> +                       *ptr++ = '.';
> +               case 3:
> +                       *ptr++ = '.';
> +               case 2:
> +                       *ptr++ = '.';
> +               case 1:
> +                       *ptr++ = '\0';
> +               default:
> +                       break;
> +       }
> +       return;
> +}
> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> new file mode 100644
> index 0000000..bdc10c0
> --- /dev/null
> +++ b/libselinux/src/regex.h
> @@ -0,0 +1,168 @@
> +#ifndef SRC_REGEX_H_
> +#define SRC_REGEX_H_
> +
> +#include <stdio.h>
> +
> +#ifdef USE_PCRE2
> +#include <pcre2.h>
> +#else
> +#include <pcre.h>
> +#endif
> +
> +enum {
> +       REGEX_MATCH,
> +       REGEX_MATCH_PARTIAL,
> +       REGEX_NO_MATCH,
> +       REGEX_ERROR = -1,
> +};
> +
> +#ifdef USE_PCRE2
> +struct regex_data {
> +       pcre2_code * regex; /* compiled regular expression */
> +       pcre2_match_data * match_data; /* match data block required for the compiled
> +        pattern in regex2 */
> +};
> +
> +struct regex_error_data {
> +       int error_code;
> +       PCRE2_SIZE error_offset;
> +};
> +
> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> +#else
> +/* vvvvvv USE_PCRE vvvvvv */
> +
> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> +#define pcre_free_study  pcre_free
> +#endif
> +
> +struct regex_data {
> +       pcre *regex; /* compiled regular expression */
> +       int extra_owned; /* non zero if pcre_extra is owned by this structure
> +                         * and thus must be freed on destruction.
> +                         */
> +       union {
> +               pcre_extra *sd; /* pointer to extra compiled stuff */
> +               pcre_extra lsd; /* used to hold the mmap'd version */
> +       };
> +};
> +
> +struct regex_error_data {
> +       char const * error_buffer;
> +       int error_offset;
> +};
> +
> +#endif /* USE_PCRE2 */
> +
> +struct mmap_area;
> +
> +/**
> + * regex_verison returns the version string of the underlying regular
> + * regular expressions library. In the case of PCRE it just returns the
> + * result of pcre_version(). In the case of PCRE2, the very first time this
> + * function is called it allocates a buffer large enough to hold the version
> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> + * The allocated buffer will linger in memory until the calling process is being
> + * reaped.
> + *
> + * It may return NULL on error.
> + */
> +char const * regex_version(void);
> +/**
> + * This constructor function allocates a buffer for a regex_data structure.
> + * The buffer is being initialized with zeroes.
> + */
> +struct regex_data * regex_data_create(void);
> +/**
> + * This complementary destructor function frees the a given regex_data buffer.
> + * It also frees any non NULL member pointers with the appropriate pcreX_X_free
> + * function. For PCRE this function respects the extra_owned field and frees
> + * the pcre_extra data conditionally. Calling this function on a NULL pointer is
> + * save.
> + */
> +void regex_data_free(struct regex_data * regex);
> +/**
> + * This function compiles the regular expression. Additionally, it prepares
> + * data structures required by the different underlying engines. For PCRE
> + * it calls pcre_study to generate optional data required for optimized
> + * execution of the compiled pattern. In the case of PCRE2, it allocates
> + * a pcre2_match_data structure of appropriate size to hold all possible
> + * matches created by the pattern.
> + *
> + * @arg regex If successful, the structure returned through *regex was allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + * @arg pattern_string The pattern string that is to be compiled.
> + * @arg errordata A pointer to a regex_error_data structure must be passed
> + *                to this function. This structure depends on the underlying
> + *                implementation. It can be passed to regex_format_error
> + *                to generate a human readable error message.
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> +                       struct regex_error_data * errordata);
> +/**
> + * This function loads a serialized precompiled pattern from a contiguous
> + * data region given by map_area.
> + *
> + * @arg map_area Description of the memory region holding a serialized
> + *               representation of the precompiled pattern.
> + * @arg regex If successful, the structure returned through *regex was allocated
> + *            with regex_data_create and must be freed with regex_data_free.
> + *
> + * @retval 0 on success
> + * @retval -1 on error
> + */
> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
> +/**
> + * This function stores a precompiled regular expression to a file.
> + * In the case of PCRE, it just dumps the binary representation of the
> + * precomplied pattern into a file. In the case of PCRE2, it uses the
> + * serialization function provided by the library.
> + *
> + * @arg regex The precomplied regular expression data.
> + * @arg fp A file stream specifying the output file.
> + */
> +int regex_writef(struct regex_data * regex, FILE * fp);
> +/**
> + * This function applies a precompiled pattern to a subject string and
> + * returns whether or not a match was found.
> + *
> + * @arg regex The precompiled pattern.
> + * @arg subject The subject string.
> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
> + *              option to pcre_exec of pcre2_match.
> + * @retval REGEX_MATCH if a match was found
> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> + * @retval REGEX_NO_MATCH if no match was found
> + * @retval REGEX_ERROR if an error was encountered during the execution of the
> + *                     regular expression
> + */
> +int regex_match(struct regex_data * regex, char const * subject, int partial);
> +/**
> + * This function compares two compiled regular expressions (regex1 and regex2).
> + * It compares the binary representations of the compiled patterns. It is a very
> + * crude approximation because the binary representation holds data like
> + * reference counters, that has nothing to do with the actual state machine.
> + *
> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
> + *                       the same
> + * @retval SELABEL_INCOMPARABLE otherwise
> + */
> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
> +/**
> + * This function takes the error data returned by regex_prepare_data and turns
> + * it in to a human readable error message.
> + * If the buffer given to hold the error message is to small it truncates the
> + * message and indicates the truncation with an ellipsis ("...") at the end of
> + * the buffer.
> + *
> + * @arg error_data Error data as returned by regex_prepare_data.
> + * @arg buffer String buffer to hold the formated error string.
> + * @arg buf_size Total size of the given bufer in bytes.
> + */
> +void regex_format_error(struct regex_error_data const * error_data,
> +                       char * buffer, size_t buf_size);
> +#endif  /* SRC_REGEX_H_ */
> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> index 8497cb4..1e7a048 100644
> --- a/libselinux/utils/Makefile
> +++ b/libselinux/utils/Makefile
> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
>
>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
>
> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
>
>  selinux_restorecon: LDLIBS += -lsepol
>
> diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
> index fd6fb78..8ff73f4 100644
> --- a/libselinux/utils/sefcontext_compile.c
> +++ b/libselinux/utils/sefcontext_compile.c
> @@ -1,6 +1,5 @@
>  #include <ctype.h>
>  #include <errno.h>
> -#include <pcre.h>
>  #include <stdint.h>
>  #include <stdio.h>
>  #include <string.h>
> @@ -13,6 +12,7 @@
>  #include <sepol/sepol.h>
>
>  #include "../src/label_file.h"
> +#include "../src/regex.h"
>
>  const char *policy_file;
>  static int ctx_err;
> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
>         if (len != 1)
>                 goto err;
>
> -       /* write the pcre version */
> -       section_len = strlen(pcre_version());
> +       /* write version of the regex back-end */
> +       if (!regex_version())
> +               goto err;
> +       section_len = strlen(regex_version());
>         len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
>         if (len != 1)
>                 goto err;
> -       len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> +       len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
>         if (len != section_len)
>                 goto err;
>
> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
>                 mode_t mode = specs[i].mode;
>                 size_t prefix_len = specs[i].prefix_len;
>                 int32_t stem_id = specs[i].stem_id;
> -               pcre *re = specs[i].regex;
> -               pcre_extra *sd = get_pcre_extra(&specs[i]);
> +               struct regex_data *re = specs[i].regex;
>                 uint32_t to_write;
> -               size_t size;
>
>                 /* length of the context string (including nul) */
>                 to_write = strlen(context) + 1;
> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
>                 if (len != 1)
>                         goto err;
>
> -               /* determine the size of the pcre data in bytes */
> -               rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> +               /* Write regex related data */
> +               rc = regex_writef(re, bin_file);
>                 if (rc < 0)
>                         goto err;
> -
> -               /* write the number of bytes in the pcre data */
> -               to_write = size;
> -               len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -               if (len != 1)
> -                       goto err;
> -
> -               /* write the actual pcre data as a char array */
> -               len = fwrite(re, 1, to_write, bin_file);
> -               if (len != to_write)
> -                       goto err;
> -
> -               if (sd) {
> -                       /* determine the size of the pcre study info */
> -                       rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
> -                       if (rc < 0)
> -                               goto err;
> -               } else
> -                       size = 0;
> -
> -               /* write the number of bytes in the pcre study data */
> -               to_write = size;
> -               len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> -               if (len != 1)
> -                       goto err;
> -
> -               if (sd) {
> -                       /* write the actual pcre study data as a char array */
> -                       len = fwrite(sd->study_data, 1, to_write, bin_file);
> -                       if (len != to_write)
> -                               goto err;
> -               }
>         }
>
>         rc = 0;
> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
>                 free(specs[i].lr.ctx_trans);
>                 free(specs[i].regex_str);
>                 free(specs[i].type_str);
> -               pcre_free(specs[i].regex);
> -               pcre_free_study(specs[i].sd);
> +               regex_data_free(specs[i].regex);
>         }
>         free(specs);
>
> --
> 2.8.0.rc3.226.g39d4020
>
> _______________________________________________
> Selinux mailing list
> Selinux@tycho.nsa.gov
> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Selinux-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
@ 2016-09-07 15:37   ` William Roberts
  2016-09-07 15:38     ` Janis Danisevskis
  2016-09-07 16:40   ` William Roberts
  2016-09-07 17:16   ` Janis Danisevskis
  2 siblings, 1 reply; 13+ messages in thread
From: William Roberts @ 2016-09-07 15:37 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Janis Danisevskis, selinux, seandroid-list, James Carter,
	Janis Danisevskis

On Wed, Sep 7, 2016 at 8:02 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>
> Shouldn't the pcre variant be encoded in some manner in the
> file_contexts.bin file so that libselinux can tell immediately whether
> it is supported?
>
>> Also, for pcre2 the persistent format is architecture dependant.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used and
>> sefcontext_compile shall generate portable output, it and libselinux
>> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
>> cost of having to recompile the regular expressions at load time.
>
> Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
> point of using file_contexts.bin was to move the cost of compiling the
> regexes to build time rather than load time; if we cannot do that, then
> how much do we gain from using file_contexts.bin instead of just falling
> back to file_contexts?
>
> The #ifdef maze makes it very hard to read and maintain this code; that
> needs to be refactored.
>
> valgrind is reporting numerous errors, including both use of
> uninitialised values and memory leaks with both patches applied.  Try:
> make DESTDIR=~/obj CFLAGS+=-g clean install
> LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> ~/obj/usr/sbin/matchpathcon /etc

The leak patch was only applied and tested as part of the Android build,
I am OK with that one being squashed down, there is no point in it being
separate. I can send an email from my @intel once I get my VM back
online.

I would through ASAN options into CFLAGS and build with that, its a bit faster
than valgrind.

>
> On x86_64.
>
> Will provide review of the code itself later...
>
>>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>>  libselinux/Makefile                   |  13 ++
>>  libselinux/src/Makefile               |   4 +-
>>  libselinux/src/label_file.c           |  91 ++------
>>  libselinux/src/label_file.h           |  54 ++---
>>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>>  libselinux/src/regex.h                | 168 ++++++++++++++
>>  libselinux/utils/Makefile             |   4 +-
>>  libselinux/utils/sefcontext_compile.c |  53 +----
>>  8 files changed, 637 insertions(+), 155 deletions(-)
>>  create mode 100644 libselinux/src/regex.c
>>  create mode 100644 libselinux/src/regex.h
>>
>> diff --git a/libselinux/Makefile b/libselinux/Makefile
>> index 6142b60..15d051e 100644
>> --- a/libselinux/Makefile
>> +++ b/libselinux/Makefile
>> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
>>  endif
>>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
>>
>> +USE_PCRE2 ?= n
>> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
>> +ifeq ($(USE_PCRE2),y)
>> +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
>> +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
>> +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
>> +     endif
>> +     PCRE_LDFLAGS := -lpcre2-8
>> +else
>> +     PCRE_LDFLAGS := -lpcre
>> +endif
>> +export PCRE_CFLAGS PCRE_LDFLAGS
>> +
>>  all install relabel clean distclean indent:
>>       @for subdir in $(SUBDIRS); do \
>>               (cd $$subdir && $(MAKE) $@) || exit 1; \
>> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
>> index 37d01af..66687e6 100644
>> --- a/libselinux/src/Makefile
>> +++ b/libselinux/src/Makefile
>> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>>
>> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
>> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>>
>>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
>>               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
>> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>>       $(RANLIB) $@
>>
>>  $(LIBSO): $(LOBJS)
>> -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>> +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>>       ln -sf $@ $(TARGET)
>>
>>  $(LIBPC): $(LIBPC).in ../VERSION
>> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
>> index c89bb35..6698624 100644
>> --- a/libselinux/src/label_file.c
>> +++ b/libselinux/src/label_file.c
>> @@ -15,7 +15,6 @@
>>  #include <errno.h>
>>  #include <limits.h>
>>  #include <stdint.h>
>> -#include <pcre.h>
>>  #include <unistd.h>
>>  #include <sys/mman.h>
>>  #include <sys/types.h>
>> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>               return -1;
>>
>>       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
>> -             len = strlen(pcre_version());
>> +             if (!regex_version()) {
>> +                     return -1;
>> +             }
>> +             len = strlen(regex_version());
>>
>>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>>               if (rc < 0)
>> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>               }
>>
>>               str_buf[entry_len] = '\0';
>> -             if ((strcmp(str_buf, pcre_version()) != 0)) {
>> +             if ((strcmp(str_buf, regex_version()) != 0)) {
>>                       free(str_buf);
>>                       return -1;
>>               }
>> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>
>>               spec = &data->spec_arr[data->nspec];
>>               spec->from_mmap = 1;
>> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
>> +             spec->regcomp = 0;
>> +#else
>>               spec->regcomp = 1;
>> +#endif
>>
>>               /* Process context */
>>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>                       spec->prefix_len = prefix_len;
>>               }
>>
>> -             /* Process regex and study_data entries */
>> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> -             if (rc < 0 || !entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -             spec->regex = (pcre *)mmap_area->next_addr;
>> -             rc = next_entry(NULL, mmap_area, entry_len);
>> +             rc = regex_load_mmap(mmap_area, &spec->regex);
>>               if (rc < 0)
>>                       goto err;
>>
>> -             /* Check that regex lengths match. pcre_fullinfo()
>> -              * also validates its magic number. */
>> -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
>> -             if (rc < 0 || len != entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -
>> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> -             if (rc < 0 || !entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -
>> -             if (entry_len) {
>> -                     spec->lsd.study_data = (void *)mmap_area->next_addr;
>> -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
>> -                     rc = next_entry(NULL, mmap_area, entry_len);
>> -                     if (rc < 0)
>> -                             goto err;
>> -
>> -                     /* Check that study data lengths match. */
>> -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
>> -                                        PCRE_INFO_STUDYSIZE, &len);
>> -                     if (rc < 0 || len != entry_len) {
>> -                             rc = -1;
>> -                             goto err;
>> -                     }
>> -             }
>> -
>>               data->nspec++;
>>       }
>>
>> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
>>                       continue;
>>               free(spec->regex_str);
>>               free(spec->type_str);
>> -             if (spec->regcomp) {
>> -                     pcre_free(spec->regex);
>> -                     pcre_free_study(spec->sd);
>> -             }
>> +             regex_data_free(spec->regex);
>>       }
>>
>>       for (i = 0; i < (unsigned int)data->num_stems; i++) {
>> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>  {
>>       struct saved_data *data = (struct saved_data *)rec->data;
>>       struct spec *spec_arr = data->spec_arr;
>> -     int i, rc, file_stem, pcre_options = 0;
>> +     int i, rc, file_stem;
>>       mode_t mode = (mode_t)type;
>>       const char *buf;
>>       struct spec *ret = NULL;
>>       char *clean_key = NULL;
>>       const char *prev_slash, *next_slash;
>>       unsigned int sofar = 0;
>> +     struct regex_error_data regex_error_data;
>>
>>       if (!data->nspec) {
>>               errno = ENOENT;
>> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>       file_stem = find_stem_from_file(data, &buf);
>>       mode &= S_IFMT;
>>
>> -     if (partial)
>> -             pcre_options |= PCRE_PARTIAL_SOFT;
>> -
>>       /*
>>        * Check for matching specifications in reverse order, so that
>>        * the last matching specification is used.
>> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>                * a regex check        */
>>               if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
>>                   (!mode || !spec->mode || mode == spec->mode)) {
>> -                     if (compile_regex(data, spec, NULL) < 0)
>> +                     if (compile_regex(data, spec, &regex_error_data) < 0)
>>                               goto finish;
>>                       if (spec->stem_id == -1)
>> -                             rc = pcre_exec(spec->regex,
>> -                                                 get_pcre_extra(spec),
>> -                                                 key, strlen(key), 0,
>> -                                                 pcre_options, NULL, 0);
>> +                             rc = regex_match(spec->regex, key, partial);
>>                       else
>> -                             rc = pcre_exec(spec->regex,
>> -                                                 get_pcre_extra(spec),
>> -                                                 buf, strlen(buf), 0,
>> -                                                 pcre_options, NULL, 0);
>> -                     if (rc == 0) {
>> +                             rc = regex_match(spec->regex, buf, partial);
>> +                     if (rc == REGEX_MATCH) {
>>                               spec->matches++;
>>                               break;
>> -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
>> +                     } else if (partial && rc == REGEX_MATCH_PARTIAL)
>>                               break;
>>
>> -                     if (rc == PCRE_ERROR_NOMATCH)
>> +                     if (rc == REGEX_NO_MATCH)
>>                               continue;
>>
>>                       errno = ENOENT;
>> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
>>               }
>>
>>               if (spec1->regcomp && spec2->regcomp) {
>> -                     size_t len1, len2;
>> -                     int rc;
>> -
>> -                     rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
>> -                     assert(rc == 0);
>> -                     rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
>> -                     assert(rc == 0);
>> -                     if (len1 != len2 ||
>> -                         memcmp(spec1->regex, spec2->regex, len1))
>> +                     if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
>>                               return incomp(spec1, spec2, "regex", i, j);
>> +                     }
>>               } else {
>>                       if (strcmp(spec1->regex_str, spec2->regex_str))
>>                               return incomp(spec1, spec2, "regex_str", i, j);
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..a2e30e5 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -6,6 +6,14 @@
>>
>>  #include <sys/stat.h>
>>
>> +/*
>> + * regex.h/c were introduced to hold all dependencies on the regular
>> + * expression back-end when we started supporting PCRE2. regex.h defines a
>> + * minimal interface required by libselinux, so that the remaining code
>> + * can be agnostic about the underlying implementation.
>> + */
>> +#include "regex.h"
>> +
>>  #include "callbacks.h"
>>  #include "label_internal.h"
>>
>> @@ -19,21 +27,12 @@
>>
>>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS   SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
>>
>> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
>> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
>> -#define pcre_free_study  pcre_free
>> -#endif
>> -
>>  /* A file security context specification. */
>>  struct spec {
>>       struct selabel_lookup_rec lr;   /* holds contexts for lookup result */
>>       char *regex_str;        /* regular expession string for diagnostics */
>>       char *type_str;         /* type string for diagnostic messages */
>> -     pcre *regex;            /* compiled regular expression */
>> -     union {
>> -             pcre_extra *sd; /* pointer to extra compiled stuff */
>> -             pcre_extra lsd; /* used to hold the mmap'd version */
>> -     };
>> +     struct regex_data * regex; /* backend dependent regular expression data */
>>       mode_t mode;            /* mode format value */
>>       int matches;            /* number of matching pathnames */
>>       int stem_id;            /* indicates which stem-compression item */
>> @@ -78,17 +77,6 @@ struct saved_data {
>>       struct mmap_area *mmap_areas;
>>  };
>>
>> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
>> -{
>> -     if (spec->from_mmap) {
>> -             if (spec->lsd.study_data)
>> -                     return &spec->lsd;
>> -             else
>> -                     return NULL;
>> -     } else
>> -             return spec->sd;
>> -}
>> -
>>  static inline mode_t string_to_mode(char *mode)
>>  {
>>       size_t len;
>> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
>>  }
>>
>>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
>> -                                         const char **errbuf)
>> +                                         struct regex_error_data * error_data)
>>  {
>> -     const char *tmperrbuf;
>>       char *reg_buf, *anchored_regex, *cp;
>>       struct stem *stem_arr = data->stem_arr;
>>       size_t len;
>> -     int erroff;
>> +     int rc;
>>
>>       if (spec->regcomp)
>>               return 0; /* already done */
>> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
>>       *cp = '\0';
>>
>>       /* Compile the regular expression. */
>> -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
>> -                                                 &erroff, NULL);
>> +     rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
>>       free(anchored_regex);
>> -     if (!spec->regex) {
>> -             if (errbuf)
>> -                     *errbuf = tmperrbuf;
>> -             return -1;
>> -     }
>> -
>> -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
>> -     if (!spec->sd && tmperrbuf) {
>> -             if (errbuf)
>> -                     *errbuf = tmperrbuf;
>> +     if (rc < 0) {
>>               return -1;
>>       }
>>
>> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>>       struct saved_data *data = (struct saved_data *)rec->data;
>>       struct spec *spec_arr;
>>       unsigned int nspec = data->nspec;
>> -     const char *errbuf = NULL;
>> +     char const *errbuf;
>> +     struct regex_error_data error_data;
>>
>>       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>>       if (items < 0) {
>> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>>       data->nspec++;
>>
>>       if (rec->validating &&
>> -                         compile_regex(data, &spec_arr[nspec], &errbuf)) {
>> +                         compile_regex(data, &spec_arr[nspec], &error_data)) {
>>               COMPAT_LOG(SELINUX_ERROR,
>>                          "%s:  line %u has invalid regex %s:  %s\n",
>>                          path, lineno, regex,
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..6b92b04
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
>> @@ -0,0 +1,405 @@
>> +#include <assert.h>
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +
>> +#include "regex.h"
>> +#include "label_file.h"
>> +
>> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
>> +                     struct regex_error_data * errordata) {
>> +     memset(errordata, 0, sizeof(struct regex_error_data));
>> +     *regex = regex_data_create();
>> +     if (!(*regex))
>> +             return -1;
>> +#ifdef USE_PCRE2
>> +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
>> +                     PCRE2_ZERO_TERMINATED,
>> +                     PCRE2_DOTALL,
>> +                     &errordata->error_code,
>> +                     &errordata->error_offset, NULL);
>> +#else
>> +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
>> +                                     &errordata->error_buffer,
>> +                                     &errordata->error_offset, NULL);
>> +#endif
>> +     if (!(*regex)->regex) {
>> +             goto err;
>> +     }
>> +
>> +#ifdef USE_PCRE2
>> +     (*regex)->match_data =
>> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>> +     if (!(*regex)->match_data) {
>> +             goto err;
>> +     }
>> +#else
>> +     (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
>> +     if (!(*regex)->sd && errordata->error_buffer) {
>> +             goto err;
>> +     }
>> +     (*regex)->extra_owned = !!(*regex)->sd;
>> +#endif
>> +     return 0;
>> +
>> +err: regex_data_free(*regex);
>> +     *regex = NULL;
>> +     return -1;
>> +}
>> +
>> +char const * regex_version(void) {
>> +#ifdef USE_PCRE2
>> +     static int initialized = 0;
>> +     static char * version_string = NULL;
>> +     size_t version_string_len;
>> +     if (!initialized) {
>> +             version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
>> +             version_string = (char*) malloc(version_string_len);
>> +             if (!version_string) {
>> +                     return NULL;
>> +             }
>> +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
>> +             initialized = 1;
>> +     }
>> +     return version_string;
>> +#else
>> +     return pcre_version();
>> +#endif
>> +}
>> +
>> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
>> +     int rc;
>> +     size_t entry_len;
>> +#ifndef USE_PCRE2
>> +     size_t info_len;
>> +#endif
>> +
>> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> +#ifdef USE_PCRE2
>> +     if (rc < 0)
>> +             return -1;
>> +
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     /* this should yield exactly one because we store one pattern at a time
>> +      */
>> +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
>> +     if (rc != 1)
>> +             return -1;
>> +
>> +     *regex = regex_data_create();
>> +     if (!*regex)
>> +             return -1;
>> +
>> +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
>> +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
>> +     if (rc != 1)
>> +             goto err;
>> +
>> +     (*regex)->match_data =
>> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>> +     if (!(*regex)->match_data)
>> +             goto err;
>> +
>> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
>> +     /* and skip the decoded bit */
>> +     rc = next_entry(NULL, mmap_area, entry_len);
>> +     if (rc < 0)
>> +             goto err;
>> +
>> +     return 0;
>> +#else
>> +     if (rc < 0 || !entry_len) {
>> +             return -1;
>> +     }
>> +     *regex = regex_data_create();
>> +     if (!(*regex))
>> +             return -1;
>> +
>> +     (*regex)->extra_owned = 0;
>> +     (*regex)->regex = (pcre *) mmap_area->next_addr;
>> +     rc = next_entry(NULL, mmap_area, entry_len);
>> +     if (rc < 0)
>> +             goto err;
>> +
>> +     /* Check that regex lengths match. pcre_fullinfo()
>> +      * also validates its magic number. */
>> +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
>> +     if (rc < 0 || info_len != entry_len) {
>> +             goto err;
>> +     }
>> +
>> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> +     if (rc < 0 || !entry_len) {
>> +             goto err;
>> +     }
>> +
>> +     if (entry_len) {
>> +             (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
>> +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
>> +             rc = next_entry(NULL, mmap_area, entry_len);
>> +             if (rc < 0)
>> +                     goto err;
>> +
>> +             /* Check that study data lengths match. */
>> +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
>> +                                PCRE_INFO_STUDYSIZE, &info_len);
>> +             if (rc < 0 || info_len != entry_len)
>> +                     goto err;
>> +     }
>> +     return 0;
>> +#endif
>> +err:
>> +     regex_data_free(*regex);
>> +     *regex = NULL;
>> +     return -1;
>> +}
>> +
>> +int regex_writef(struct regex_data * regex, FILE * fp) {
>> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
>> +     int rc;
>> +#endif
>> +     size_t len;
>> +#ifdef USE_PCRE2
>> +     PCRE2_SIZE to_write;
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     PCRE2_UCHAR * bytes;
>> +
>> +     /* encode the patter for serialization */
>> +     rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
>> +                                 &bytes, &to_write, NULL);
>> +     if (rc != 1)
>> +             return -1;
>> +
>> +#else
>> +     (void)regex; // silence unused parameter warning
>> +     to_write = 0;
>> +#endif
>> +     /* write serialized pattern's size */
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1) {
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +             pcre2_serialize_free(bytes);
>> +#endif
>> +             return -1;
>> +     }
>> +
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     /* write serialized pattern */
>> +     len = fwrite(bytes, 1, to_write, fp);
>> +     if (len != to_write) {
>> +             pcre2_serialize_free(bytes);
>> +             return -1;
>> +     }
>> +     pcre2_serialize_free(bytes);
>> +#endif
>> +#else
>> +     uint32_t to_write;
>> +     size_t size;
>> +     pcre_extra * sd = regex->extra_owned ? regex->sd :
>> +                     (regex->lsd.study_data ? &regex->lsd : NULL);
>> +
>> +     /* determine the size of the pcre data in bytes */
>> +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
>> +     if (rc < 0)
>> +             return -1;
>> +
>> +     /* write the number of bytes in the pcre data */
>> +     to_write = size;
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1)
>> +             return -1;
>> +
>> +     /* write the actual pcre data as a char array */
>> +     len = fwrite(regex->regex, 1, to_write, fp);
>> +     if (len != to_write)
>> +             return -1;
>> +
>> +     if (sd) {
>> +             /* determine the size of the pcre study info */
>> +             rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
>> +                             &size);
>> +             if (rc < 0)
>> +                     return -1;
>> +     } else
>> +             size = 0;
>> +
>> +     /* write the number of bytes in the pcre study data */
>> +     to_write = size;
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1)
>> +             return -1;
>> +
>> +     if (sd) {
>> +             /* write the actual pcre study data as a char array */
>> +             len = fwrite(sd->study_data, 1, to_write, fp);
>> +             if (len != to_write)
>> +                     return -1;
>> +     }
>> +#endif
>> +     return 0;
>> +}
>> +
>> +struct regex_data * regex_data_create(void) {
>> +     struct regex_data * dummy = (struct regex_data*) malloc(
>> +                     sizeof(struct regex_data));
>> +     if (dummy) {
>> +             memset(dummy, 0, sizeof(struct regex_data));
>> +     }
>> +     return dummy;
>> +}
>> +
>> +void regex_data_free(struct regex_data * regex) {
>> +     if (regex) {
>> +#ifdef USE_PCRE2
>> +             if (regex->regex) {
>> +                     pcre2_code_free(regex->regex);
>> +             }
>> +             if (regex->match_data) {
>> +                     pcre2_match_data_free(regex->match_data);
>> +             }
>> +#else
>> +             if (regex->regex)
>> +                     pcre_free(regex->regex);
>> +             if (regex->extra_owned && regex->sd) {
>> +                     pcre_free_study(regex->sd);
>> +             }
>> +#endif
>> +             free(regex);
>> +     }
>> +}
>> +
>> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
>> +     int rc;
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_match(regex->regex,
>> +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
>> +                     partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
>> +                     NULL);
>> +     if (rc > 0)
>> +     return REGEX_MATCH;
>> +     switch (rc) {
>> +             case PCRE2_ERROR_PARTIAL:
>> +                     return REGEX_MATCH_PARTIAL;
>> +             case PCRE2_ERROR_NOMATCH:
>> +                     return REGEX_NO_MATCH;
>> +             default:
>> +                     return REGEX_ERROR;
>> +     }
>> +#else
>> +     rc = pcre_exec(regex->regex,
>> +                     regex->extra_owned ? regex->sd : &regex->lsd, subject,
>> +                     strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
>> +                     NULL,
>> +                     0);
>> +     switch (rc) {
>> +             case 0:
>> +                     return REGEX_MATCH;
>> +             case PCRE_ERROR_PARTIAL:
>> +                     return REGEX_MATCH_PARTIAL;
>> +             case PCRE_ERROR_NOMATCH:
>> +                     return REGEX_NO_MATCH;
>> +             default:
>> +                     return REGEX_ERROR;
>> +     }
>> +#endif
>> +}
>> +
>> +/* TODO Replace this compare function with something that actually compares the
>> + * regular expressions.
>> + * This compare function basically just compares the binary representations of
>> + * the automatons, and because this representation contains pointers and
>> + * metadata, it can only return a match if regex1 == regex2.
>> + * Preferably, this function would be replaced with an algorithm that computes
>> + * the equivalence of the automatons systematically.
>> + */
>> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
>> +     int rc;
>> +     size_t len1, len2;
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
>> +     assert(rc == 0);
>> +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
>> +     assert(rc == 0);
>> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
>> +             return SELABEL_INCOMPARABLE;
>> +#else
>> +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
>> +     assert(rc == 0);
>> +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
>> +     assert(rc == 0);
>> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
>> +             return SELABEL_INCOMPARABLE;
>> +#endif
>> +     return SELABEL_EQUAL;
>> +}
>> +
>> +void regex_format_error(struct regex_error_data const * error_data,
>> +                     char * buffer, size_t buf_size) {
>> +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
>> +     char * ptr = &buffer[buf_size - the_end_length];
>> +     int rc = 0;
>> +     size_t pos = 0;
>> +     if (!buffer || !buf_size)
>> +             return;
>> +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
>> +     if (rc < 0)
>> +             /* If snprintf fails it constitutes a logical error that needs
>> +              * fixing.
>> +              */
>> +             abort();
>> +
>> +     pos += rc;
>> +     if (pos >= buf_size)
>> +             goto truncated;
>> +
>> +     if (error_data->error_offset > 0) {
>> +#ifdef USE_PCRE2
>> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
>> +                             error_data->error_offset);
>> +#else
>> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
>> +                             error_data->error_offset);
>> +#endif
>> +             if (rc < 0)
>> +                     abort();
>> +
>> +     }
>> +     pos += rc;
>> +     if (pos >= buf_size)
>> +             goto truncated;
>> +
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_get_error_message(error_data->error_code,
>> +                     (PCRE2_UCHAR*)(buffer + pos),
>> +                     buf_size - pos);
>> +     if (rc == PCRE2_ERROR_NOMEMORY)
>> +             goto truncated;
>> +#else
>> +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
>> +                     error_data->error_buffer);
>> +     if (rc < 0)
>> +             abort();
>> +
>> +     if ((size_t)rc < strlen(error_data->error_buffer))
>> +             goto truncated;
>> +#endif
>> +
>> +     return;
>> +
>> +truncated:
>> +     /* replace end of string with "..." to indicate that it was truncated */
>> +     switch (the_end_length) {
>> +             /* no break statements, fall-through is intended */
>> +             case 4:
>> +                     *ptr++ = '.';
>> +             case 3:
>> +                     *ptr++ = '.';
>> +             case 2:
>> +                     *ptr++ = '.';
>> +             case 1:
>> +                     *ptr++ = '\0';
>> +             default:
>> +                     break;
>> +     }
>> +     return;
>> +}
>> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
>> new file mode 100644
>> index 0000000..bdc10c0
>> --- /dev/null
>> +++ b/libselinux/src/regex.h
>> @@ -0,0 +1,168 @@
>> +#ifndef SRC_REGEX_H_
>> +#define SRC_REGEX_H_
>> +
>> +#include <stdio.h>
>> +
>> +#ifdef USE_PCRE2
>> +#include <pcre2.h>
>> +#else
>> +#include <pcre.h>
>> +#endif
>> +
>> +enum {
>> +     REGEX_MATCH,
>> +     REGEX_MATCH_PARTIAL,
>> +     REGEX_NO_MATCH,
>> +     REGEX_ERROR = -1,
>> +};
>> +
>> +#ifdef USE_PCRE2
>> +struct regex_data {
>> +     pcre2_code * regex; /* compiled regular expression */
>> +     pcre2_match_data * match_data; /* match data block required for the compiled
>> +      pattern in regex2 */
>> +};
>> +
>> +struct regex_error_data {
>> +     int error_code;
>> +     PCRE2_SIZE error_offset;
>> +};
>> +
>> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
>> +#else
>> +/* vvvvvv USE_PCRE vvvvvv */
>> +
>> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
>> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
>> +#define pcre_free_study  pcre_free
>> +#endif
>> +
>> +struct regex_data {
>> +     pcre *regex; /* compiled regular expression */
>> +     int extra_owned; /* non zero if pcre_extra is owned by this structure
>> +                       * and thus must be freed on destruction.
>> +                       */
>> +     union {
>> +             pcre_extra *sd; /* pointer to extra compiled stuff */
>> +             pcre_extra lsd; /* used to hold the mmap'd version */
>> +     };
>> +};
>> +
>> +struct regex_error_data {
>> +     char const * error_buffer;
>> +     int error_offset;
>> +};
>> +
>> +#endif /* USE_PCRE2 */
>> +
>> +struct mmap_area;
>> +
>> +/**
>> + * regex_verison returns the version string of the underlying regular
>> + * regular expressions library. In the case of PCRE it just returns the
>> + * result of pcre_version(). In the case of PCRE2, the very first time this
>> + * function is called it allocates a buffer large enough to hold the version
>> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
>> + * The allocated buffer will linger in memory until the calling process is being
>> + * reaped.
>> + *
>> + * It may return NULL on error.
>> + */
>> +char const * regex_version(void);
>> +/**
>> + * This constructor function allocates a buffer for a regex_data structure.
>> + * The buffer is being initialized with zeroes.
>> + */
>> +struct regex_data * regex_data_create(void);
>> +/**
>> + * This complementary destructor function frees the a given regex_data buffer.
>> + * It also frees any non NULL member pointers with the appropriate pcreX_X_free
>> + * function. For PCRE this function respects the extra_owned field and frees
>> + * the pcre_extra data conditionally. Calling this function on a NULL pointer is
>> + * save.
>> + */
>> +void regex_data_free(struct regex_data * regex);
>> +/**
>> + * This function compiles the regular expression. Additionally, it prepares
>> + * data structures required by the different underlying engines. For PCRE
>> + * it calls pcre_study to generate optional data required for optimized
>> + * execution of the compiled pattern. In the case of PCRE2, it allocates
>> + * a pcre2_match_data structure of appropriate size to hold all possible
>> + * matches created by the pattern.
>> + *
>> + * @arg regex If successful, the structure returned through *regex was allocated
>> + *            with regex_data_create and must be freed with regex_data_free.
>> + * @arg pattern_string The pattern string that is to be compiled.
>> + * @arg errordata A pointer to a regex_error_data structure must be passed
>> + *                to this function. This structure depends on the underlying
>> + *                implementation. It can be passed to regex_format_error
>> + *                to generate a human readable error message.
>> + * @retval 0 on success
>> + * @retval -1 on error
>> + */
>> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
>> +                     struct regex_error_data * errordata);
>> +/**
>> + * This function loads a serialized precompiled pattern from a contiguous
>> + * data region given by map_area.
>> + *
>> + * @arg map_area Description of the memory region holding a serialized
>> + *               representation of the precompiled pattern.
>> + * @arg regex If successful, the structure returned through *regex was allocated
>> + *            with regex_data_create and must be freed with regex_data_free.
>> + *
>> + * @retval 0 on success
>> + * @retval -1 on error
>> + */
>> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
>> +/**
>> + * This function stores a precompiled regular expression to a file.
>> + * In the case of PCRE, it just dumps the binary representation of the
>> + * precomplied pattern into a file. In the case of PCRE2, it uses the
>> + * serialization function provided by the library.
>> + *
>> + * @arg regex The precomplied regular expression data.
>> + * @arg fp A file stream specifying the output file.
>> + */
>> +int regex_writef(struct regex_data * regex, FILE * fp);
>> +/**
>> + * This function applies a precompiled pattern to a subject string and
>> + * returns whether or not a match was found.
>> + *
>> + * @arg regex The precompiled pattern.
>> + * @arg subject The subject string.
>> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
>> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
>> + *              option to pcre_exec of pcre2_match.
>> + * @retval REGEX_MATCH if a match was found
>> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
>> + * @retval REGEX_NO_MATCH if no match was found
>> + * @retval REGEX_ERROR if an error was encountered during the execution of the
>> + *                     regular expression
>> + */
>> +int regex_match(struct regex_data * regex, char const * subject, int partial);
>> +/**
>> + * This function compares two compiled regular expressions (regex1 and regex2).
>> + * It compares the binary representations of the compiled patterns. It is a very
>> + * crude approximation because the binary representation holds data like
>> + * reference counters, that has nothing to do with the actual state machine.
>> + *
>> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
>> + *                       the same
>> + * @retval SELABEL_INCOMPARABLE otherwise
>> + */
>> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
>> +/**
>> + * This function takes the error data returned by regex_prepare_data and turns
>> + * it in to a human readable error message.
>> + * If the buffer given to hold the error message is to small it truncates the
>> + * message and indicates the truncation with an ellipsis ("...") at the end of
>> + * the buffer.
>> + *
>> + * @arg error_data Error data as returned by regex_prepare_data.
>> + * @arg buffer String buffer to hold the formated error string.
>> + * @arg buf_size Total size of the given bufer in bytes.
>> + */
>> +void regex_format_error(struct regex_error_data const * error_data,
>> +                     char * buffer, size_t buf_size);
>> +#endif  /* SRC_REGEX_H_ */
>> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
>> index 8497cb4..1e7a048 100644
>> --- a/libselinux/utils/Makefile
>> +++ b/libselinux/utils/Makefile
>> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
>>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
>> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
>>
>>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
>>
>> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
>> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
>>
>>  selinux_restorecon: LDLIBS += -lsepol
>>
>> diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
>> index fd6fb78..8ff73f4 100644
>> --- a/libselinux/utils/sefcontext_compile.c
>> +++ b/libselinux/utils/sefcontext_compile.c
>> @@ -1,6 +1,5 @@
>>  #include <ctype.h>
>>  #include <errno.h>
>> -#include <pcre.h>
>>  #include <stdint.h>
>>  #include <stdio.h>
>>  #include <string.h>
>> @@ -13,6 +12,7 @@
>>  #include <sepol/sepol.h>
>>
>>  #include "../src/label_file.h"
>> +#include "../src/regex.h"
>>
>>  const char *policy_file;
>>  static int ctx_err;
>> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
>>       if (len != 1)
>>               goto err;
>>
>> -     /* write the pcre version */
>> -     section_len = strlen(pcre_version());
>> +     /* write version of the regex back-end */
>> +     if (!regex_version())
>> +             goto err;
>> +     section_len = strlen(regex_version());
>>       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
>>       if (len != 1)
>>               goto err;
>> -     len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
>> +     len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
>>       if (len != section_len)
>>               goto err;
>>
>> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
>>               mode_t mode = specs[i].mode;
>>               size_t prefix_len = specs[i].prefix_len;
>>               int32_t stem_id = specs[i].stem_id;
>> -             pcre *re = specs[i].regex;
>> -             pcre_extra *sd = get_pcre_extra(&specs[i]);
>> +             struct regex_data *re = specs[i].regex;
>>               uint32_t to_write;
>> -             size_t size;
>>
>>               /* length of the context string (including nul) */
>>               to_write = strlen(context) + 1;
>> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
>>               if (len != 1)
>>                       goto err;
>>
>> -             /* determine the size of the pcre data in bytes */
>> -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
>> +             /* Write regex related data */
>> +             rc = regex_writef(re, bin_file);
>>               if (rc < 0)
>>                       goto err;
>> -
>> -             /* write the number of bytes in the pcre data */
>> -             to_write = size;
>> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
>> -             if (len != 1)
>> -                     goto err;
>> -
>> -             /* write the actual pcre data as a char array */
>> -             len = fwrite(re, 1, to_write, bin_file);
>> -             if (len != to_write)
>> -                     goto err;
>> -
>> -             if (sd) {
>> -                     /* determine the size of the pcre study info */
>> -                     rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
>> -                     if (rc < 0)
>> -                             goto err;
>> -             } else
>> -                     size = 0;
>> -
>> -             /* write the number of bytes in the pcre study data */
>> -             to_write = size;
>> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
>> -             if (len != 1)
>> -                     goto err;
>> -
>> -             if (sd) {
>> -                     /* write the actual pcre study data as a char array */
>> -                     len = fwrite(sd->study_data, 1, to_write, bin_file);
>> -                     if (len != to_write)
>> -                             goto err;
>> -             }
>>       }
>>
>>       rc = 0;
>> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
>>               free(specs[i].lr.ctx_trans);
>>               free(specs[i].regex_str);
>>               free(specs[i].type_str);
>> -             pcre_free(specs[i].regex);
>> -             pcre_free_study(specs[i].sd);
>> +             regex_data_free(specs[i].regex);
>>       }
>>       free(specs);
>>
>>
>
> _______________________________________________
> Selinux mailing list
> Selinux@tycho.nsa.gov
> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Selinux-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 15:37   ` William Roberts
@ 2016-09-07 15:38     ` Janis Danisevskis
  0 siblings, 0 replies; 13+ messages in thread
From: Janis Danisevskis @ 2016-09-07 15:38 UTC (permalink / raw)
  To: William Roberts, Stephen Smalley
  Cc: Janis Danisevskis, selinux, seandroid-list, James Carter

[-- Attachment #1: Type: text/plain, Size: 46826 bytes --]

Ok, thanks.

On Wed, Sep 7, 2016 at 4:37 PM William Roberts <bill.c.roberts@gmail.com>
wrote:

> On Wed, Sep 7, 2016 at 8:02 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> > On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> >> From: Janis Danisevskis <jdanis@google.com>
> >>
> >> This patch moves all pcre1/2 dependencies into the new files regex.h
> >> and regex.c implementing the common denominator of features needed
> >> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> >> used implementations.
> >>
> >> As of this patch libselinux supports either pcre or pcre2 but not
> >> both at the same time. The persistently stored file contexts
> >> information differs. This means libselinux can only load file
> >> context files generated by sefcontext_compile build with the
> >> same pcre variant.
> >
> > Shouldn't the pcre variant be encoded in some manner in the
> > file_contexts.bin file so that libselinux can tell immediately whether
> > it is supported?
> >
> >> Also, for pcre2 the persistent format is architecture dependant.
> >> Stored precompiled regular expressions can only be used on the
> >> same architecture they were generated on. If pcre2 is used and
> >> sefcontext_compile shall generate portable output, it and libselinux
> >> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> >> cost of having to recompile the regular expressions at load time.
> >
> > Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
> > point of using file_contexts.bin was to move the cost of compiling the
> > regexes to build time rather than load time; if we cannot do that, then
> > how much do we gain from using file_contexts.bin instead of just falling
> > back to file_contexts?
> >
> > The #ifdef maze makes it very hard to read and maintain this code; that
> > needs to be refactored.
> >
> > valgrind is reporting numerous errors, including both use of
> > uninitialised values and memory leaks with both patches applied.  Try:
> > make DESTDIR=~/obj CFLAGS+=-g clean install
> > LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> > ~/obj/usr/sbin/matchpathcon /etc
>
> The leak patch was only applied and tested as part of the Android build,
> I am OK with that one being squashed down, there is no point in it being
> separate. I can send an email from my @intel once I get my VM back
> online.
>
> I would through ASAN options into CFLAGS and build with that, its a bit
> faster
> than valgrind.
>
> >
> > On x86_64.
> >
> > Will provide review of the code itself later...
> >
> >>
> >> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> >> ---
> >>  libselinux/Makefile                   |  13 ++
> >>  libselinux/src/Makefile               |   4 +-
> >>  libselinux/src/label_file.c           |  91 ++------
> >>  libselinux/src/label_file.h           |  54 ++---
> >>  libselinux/src/regex.c                | 405
> ++++++++++++++++++++++++++++++++++
> >>  libselinux/src/regex.h                | 168 ++++++++++++++
> >>  libselinux/utils/Makefile             |   4 +-
> >>  libselinux/utils/sefcontext_compile.c |  53 +----
> >>  8 files changed, 637 insertions(+), 155 deletions(-)
> >>  create mode 100644 libselinux/src/regex.c
> >>  create mode 100644 libselinux/src/regex.h
> >>
> >> diff --git a/libselinux/Makefile b/libselinux/Makefile
> >> index 6142b60..15d051e 100644
> >> --- a/libselinux/Makefile
> >> +++ b/libselinux/Makefile
> >> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
> >>  endif
> >>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
> >>
> >> +USE_PCRE2 ?= n
> >> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> >> +ifeq ($(USE_PCRE2),y)
> >> +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> >> +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> >> +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> >> +     endif
> >> +     PCRE_LDFLAGS := -lpcre2-8
> >> +else
> >> +     PCRE_LDFLAGS := -lpcre
> >> +endif
> >> +export PCRE_CFLAGS PCRE_LDFLAGS
> >> +
> >>  all install relabel clean distclean indent:
> >>       @for subdir in $(SUBDIRS); do \
> >>               (cd $$subdir && $(MAKE) $@) || exit 1; \
> >> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> >> index 37d01af..66687e6 100644
> >> --- a/libselinux/src/Makefile
> >> +++ b/libselinux/src/Makefile
> >> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
> -Wformat-security -Winit-self -Wmissi
> >>            -fipa-pure-const -Wno-suggest-attribute=pure
> -Wno-suggest-attribute=const \
> >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> >>
> >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS)
> >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS) $(PCRE_CFLAGS)
> >>
> >>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable
> -Wno-unused-but-set-variable -Wno-unused-parameter \
> >>               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes
> -Wno-missing-declarations
> >> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
> >>       $(RANLIB) $@
> >>
> >>  $(LIBSO): $(LOBJS)
> >> -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS)
> -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> >> +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS)
> -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> >>       ln -sf $@ $(TARGET)
> >>
> >>  $(LIBPC): $(LIBPC).in ../VERSION
> >> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> >> index c89bb35..6698624 100644
> >> --- a/libselinux/src/label_file.c
> >> +++ b/libselinux/src/label_file.c
> >> @@ -15,7 +15,6 @@
> >>  #include <errno.h>
> >>  #include <limits.h>
> >>  #include <stdint.h>
> >> -#include <pcre.h>
> >>  #include <unistd.h>
> >>  #include <sys/mman.h>
> >>  #include <sys/types.h>
> >> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >>               return -1;
> >>
> >>       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> >> -             len = strlen(pcre_version());
> >> +             if (!regex_version()) {
> >> +                     return -1;
> >> +             }
> >> +             len = strlen(regex_version());
> >>
> >>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >>               if (rc < 0)
> >> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >>               }
> >>
> >>               str_buf[entry_len] = '\0';
> >> -             if ((strcmp(str_buf, pcre_version()) != 0)) {
> >> +             if ((strcmp(str_buf, regex_version()) != 0)) {
> >>                       free(str_buf);
> >>                       return -1;
> >>               }
> >> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >>
> >>               spec = &data->spec_arr[data->nspec];
> >>               spec->from_mmap = 1;
> >> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> >> +             spec->regcomp = 0;
> >> +#else
> >>               spec->regcomp = 1;
> >> +#endif
> >>
> >>               /* Process context */
> >>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >>                       spec->prefix_len = prefix_len;
> >>               }
> >>
> >> -             /* Process regex and study_data entries */
> >> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> -             if (rc < 0 || !entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -             spec->regex = (pcre *)mmap_area->next_addr;
> >> -             rc = next_entry(NULL, mmap_area, entry_len);
> >> +             rc = regex_load_mmap(mmap_area, &spec->regex);
> >>               if (rc < 0)
> >>                       goto err;
> >>
> >> -             /* Check that regex lengths match. pcre_fullinfo()
> >> -              * also validates its magic number. */
> >> -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE,
> &len);
> >> -             if (rc < 0 || len != entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -
> >> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> -             if (rc < 0 || !entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -
> >> -             if (entry_len) {
> >> -                     spec->lsd.study_data = (void
> *)mmap_area->next_addr;
> >> -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> >> -                     rc = next_entry(NULL, mmap_area, entry_len);
> >> -                     if (rc < 0)
> >> -                             goto err;
> >> -
> >> -                     /* Check that study data lengths match. */
> >> -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
> >> -                                        PCRE_INFO_STUDYSIZE, &len);
> >> -                     if (rc < 0 || len != entry_len) {
> >> -                             rc = -1;
> >> -                             goto err;
> >> -                     }
> >> -             }
> >> -
> >>               data->nspec++;
> >>       }
> >>
> >> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
> >>                       continue;
> >>               free(spec->regex_str);
> >>               free(spec->type_str);
> >> -             if (spec->regcomp) {
> >> -                     pcre_free(spec->regex);
> >> -                     pcre_free_study(spec->sd);
> >> -             }
> >> +             regex_data_free(spec->regex);
> >>       }
> >>
> >>       for (i = 0; i < (unsigned int)data->num_stems; i++) {
> >> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >>  {
> >>       struct saved_data *data = (struct saved_data *)rec->data;
> >>       struct spec *spec_arr = data->spec_arr;
> >> -     int i, rc, file_stem, pcre_options = 0;
> >> +     int i, rc, file_stem;
> >>       mode_t mode = (mode_t)type;
> >>       const char *buf;
> >>       struct spec *ret = NULL;
> >>       char *clean_key = NULL;
> >>       const char *prev_slash, *next_slash;
> >>       unsigned int sofar = 0;
> >> +     struct regex_error_data regex_error_data;
> >>
> >>       if (!data->nspec) {
> >>               errno = ENOENT;
> >> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >>       file_stem = find_stem_from_file(data, &buf);
> >>       mode &= S_IFMT;
> >>
> >> -     if (partial)
> >> -             pcre_options |= PCRE_PARTIAL_SOFT;
> >> -
> >>       /*
> >>        * Check for matching specifications in reverse order, so that
> >>        * the last matching specification is used.
> >> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >>                * a regex check        */
> >>               if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
> >>                   (!mode || !spec->mode || mode == spec->mode)) {
> >> -                     if (compile_regex(data, spec, NULL) < 0)
> >> +                     if (compile_regex(data, spec, &regex_error_data)
> < 0)
> >>                               goto finish;
> >>                       if (spec->stem_id == -1)
> >> -                             rc = pcre_exec(spec->regex,
> >> -                                                 get_pcre_extra(spec),
> >> -                                                 key, strlen(key), 0,
> >> -                                                 pcre_options, NULL,
> 0);
> >> +                             rc = regex_match(spec->regex, key,
> partial);
> >>                       else
> >> -                             rc = pcre_exec(spec->regex,
> >> -                                                 get_pcre_extra(spec),
> >> -                                                 buf, strlen(buf), 0,
> >> -                                                 pcre_options, NULL,
> 0);
> >> -                     if (rc == 0) {
> >> +                             rc = regex_match(spec->regex, buf,
> partial);
> >> +                     if (rc == REGEX_MATCH) {
> >>                               spec->matches++;
> >>                               break;
> >> -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
> >> +                     } else if (partial && rc == REGEX_MATCH_PARTIAL)
> >>                               break;
> >>
> >> -                     if (rc == PCRE_ERROR_NOMATCH)
> >> +                     if (rc == REGEX_NO_MATCH)
> >>                               continue;
> >>
> >>                       errno = ENOENT;
> >> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct
> selabel_handle *h1,
> >>               }
> >>
> >>               if (spec1->regcomp && spec2->regcomp) {
> >> -                     size_t len1, len2;
> >> -                     int rc;
> >> -
> >> -                     rc = pcre_fullinfo(spec1->regex, NULL,
> PCRE_INFO_SIZE, &len1);
> >> -                     assert(rc == 0);
> >> -                     rc = pcre_fullinfo(spec2->regex, NULL,
> PCRE_INFO_SIZE, &len2);
> >> -                     assert(rc == 0);
> >> -                     if (len1 != len2 ||
> >> -                         memcmp(spec1->regex, spec2->regex, len1))
> >> +                     if (regex_cmp(spec1->regex, spec2->regex) ==
> SELABEL_INCOMPARABLE){
> >>                               return incomp(spec1, spec2, "regex", i,
> j);
> >> +                     }
> >>               } else {
> >>                       if (strcmp(spec1->regex_str, spec2->regex_str))
> >>                               return incomp(spec1, spec2, "regex_str",
> i, j);
> >> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> >> index 6d1e890..a2e30e5 100644
> >> --- a/libselinux/src/label_file.h
> >> +++ b/libselinux/src/label_file.h
> >> @@ -6,6 +6,14 @@
> >>
> >>  #include <sys/stat.h>
> >>
> >> +/*
> >> + * regex.h/c were introduced to hold all dependencies on the regular
> >> + * expression back-end when we started supporting PCRE2. regex.h
> defines a
> >> + * minimal interface required by libselinux, so that the remaining code
> >> + * can be agnostic about the underlying implementation.
> >> + */
> >> +#include "regex.h"
> >> +
> >>  #include "callbacks.h"
> >>  #include "label_internal.h"
> >>
> >> @@ -19,21 +27,12 @@
> >>
> >>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS
>  SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
> >>
> >> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> >> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> >> -#define pcre_free_study  pcre_free
> >> -#endif
> >> -
> >>  /* A file security context specification. */
> >>  struct spec {
> >>       struct selabel_lookup_rec lr;   /* holds contexts for lookup
> result */
> >>       char *regex_str;        /* regular expession string for
> diagnostics */
> >>       char *type_str;         /* type string for diagnostic messages */
> >> -     pcre *regex;            /* compiled regular expression */
> >> -     union {
> >> -             pcre_extra *sd; /* pointer to extra compiled stuff */
> >> -             pcre_extra lsd; /* used to hold the mmap'd version */
> >> -     };
> >> +     struct regex_data * regex; /* backend dependent regular
> expression data */
> >>       mode_t mode;            /* mode format value */
> >>       int matches;            /* number of matching pathnames */
> >>       int stem_id;            /* indicates which stem-compression item
> */
> >> @@ -78,17 +77,6 @@ struct saved_data {
> >>       struct mmap_area *mmap_areas;
> >>  };
> >>
> >> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> >> -{
> >> -     if (spec->from_mmap) {
> >> -             if (spec->lsd.study_data)
> >> -                     return &spec->lsd;
> >> -             else
> >> -                     return NULL;
> >> -     } else
> >> -             return spec->sd;
> >> -}
> >> -
> >>  static inline mode_t string_to_mode(char *mode)
> >>  {
> >>       size_t len;
> >> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct
> mmap_area *fp, size_t bytes)
> >>  }
> >>
> >>  static inline int compile_regex(struct saved_data *data, struct spec
> *spec,
> >> -                                         const char **errbuf)
> >> +                                         struct regex_error_data *
> error_data)
> >>  {
> >> -     const char *tmperrbuf;
> >>       char *reg_buf, *anchored_regex, *cp;
> >>       struct stem *stem_arr = data->stem_arr;
> >>       size_t len;
> >> -     int erroff;
> >> +     int rc;
> >>
> >>       if (spec->regcomp)
> >>               return 0; /* already done */
> >> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data
> *data, struct spec *spec,
> >>       *cp = '\0';
> >>
> >>       /* Compile the regular expression. */
> >> -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL,
> &tmperrbuf,
> >> -                                                 &erroff, NULL);
> >> +     rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
> >>       free(anchored_regex);
> >> -     if (!spec->regex) {
> >> -             if (errbuf)
> >> -                     *errbuf = tmperrbuf;
> >> -             return -1;
> >> -     }
> >> -
> >> -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> >> -     if (!spec->sd && tmperrbuf) {
> >> -             if (errbuf)
> >> -                     *errbuf = tmperrbuf;
> >> +     if (rc < 0) {
> >>               return -1;
> >>       }
> >>
> >> @@ -394,7 +371,8 @@ static inline int process_line(struct
> selabel_handle *rec,
> >>       struct saved_data *data = (struct saved_data *)rec->data;
> >>       struct spec *spec_arr;
> >>       unsigned int nspec = data->nspec;
> >> -     const char *errbuf = NULL;
> >> +     char const *errbuf;
> >> +     struct regex_error_data error_data;
> >>
> >>       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type,
> &context);
> >>       if (items < 0) {
> >> @@ -454,7 +432,7 @@ static inline int process_line(struct
> selabel_handle *rec,
> >>       data->nspec++;
> >>
> >>       if (rec->validating &&
> >> -                         compile_regex(data, &spec_arr[nspec],
> &errbuf)) {
> >> +                         compile_regex(data, &spec_arr[nspec],
> &error_data)) {
> >>               COMPAT_LOG(SELINUX_ERROR,
> >>                          "%s:  line %u has invalid regex %s:  %s\n",
> >>                          path, lineno, regex,
> >> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> >> new file mode 100644
> >> index 0000000..6b92b04
> >> --- /dev/null
> >> +++ b/libselinux/src/regex.c
> >> @@ -0,0 +1,405 @@
> >> +#include <assert.h>
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <string.h>
> >> +
> >> +#include "regex.h"
> >> +#include "label_file.h"
> >> +
> >> +int regex_prepare_data(struct regex_data ** regex, char const *
> pattern_string,
> >> +                     struct regex_error_data * errordata) {
> >> +     memset(errordata, 0, sizeof(struct regex_error_data));
> >> +     *regex = regex_data_create();
> >> +     if (!(*regex))
> >> +             return -1;
> >> +#ifdef USE_PCRE2
> >> +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> >> +                     PCRE2_ZERO_TERMINATED,
> >> +                     PCRE2_DOTALL,
> >> +                     &errordata->error_code,
> >> +                     &errordata->error_offset, NULL);
> >> +#else
> >> +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> >> +                                     &errordata->error_buffer,
> >> +                                     &errordata->error_offset, NULL);
> >> +#endif
> >> +     if (!(*regex)->regex) {
> >> +             goto err;
> >> +     }
> >> +
> >> +#ifdef USE_PCRE2
> >> +     (*regex)->match_data =
> >> +             pcre2_match_data_create_from_pattern((*regex)->regex,
> NULL);
> >> +     if (!(*regex)->match_data) {
> >> +             goto err;
> >> +     }
> >> +#else
> >> +     (*regex)->sd = pcre_study((*regex)->regex, 0,
> &errordata->error_buffer);
> >> +     if (!(*regex)->sd && errordata->error_buffer) {
> >> +             goto err;
> >> +     }
> >> +     (*regex)->extra_owned = !!(*regex)->sd;
> >> +#endif
> >> +     return 0;
> >> +
> >> +err: regex_data_free(*regex);
> >> +     *regex = NULL;
> >> +     return -1;
> >> +}
> >> +
> >> +char const * regex_version(void) {
> >> +#ifdef USE_PCRE2
> >> +     static int initialized = 0;
> >> +     static char * version_string = NULL;
> >> +     size_t version_string_len;
> >> +     if (!initialized) {
> >> +             version_string_len = pcre2_config(PCRE2_CONFIG_VERSION,
> NULL);
> >> +             version_string = (char*) malloc(version_string_len);
> >> +             if (!version_string) {
> >> +                     return NULL;
> >> +             }
> >> +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> >> +             initialized = 1;
> >> +     }
> >> +     return version_string;
> >> +#else
> >> +     return pcre_version();
> >> +#endif
> >> +}
> >> +
> >> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data **
> regex) {
> >> +     int rc;
> >> +     size_t entry_len;
> >> +#ifndef USE_PCRE2
> >> +     size_t info_len;
> >> +#endif
> >> +
> >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> +#ifdef USE_PCRE2
> >> +     if (rc < 0)
> >> +             return -1;
> >> +
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     /* this should yield exactly one because we store one pattern at
> a time
> >> +      */
> >> +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> >> +     if (rc != 1)
> >> +             return -1;
> >> +
> >> +     *regex = regex_data_create();
> >> +     if (!*regex)
> >> +             return -1;
> >> +
> >> +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> >> +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
> >> +     if (rc != 1)
> >> +             goto err;
> >> +
> >> +     (*regex)->match_data =
> >> +             pcre2_match_data_create_from_pattern((*regex)->regex,
> NULL);
> >> +     if (!(*regex)->match_data)
> >> +             goto err;
> >> +
> >> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> >> +     /* and skip the decoded bit */
> >> +     rc = next_entry(NULL, mmap_area, entry_len);
> >> +     if (rc < 0)
> >> +             goto err;
> >> +
> >> +     return 0;
> >> +#else
> >> +     if (rc < 0 || !entry_len) {
> >> +             return -1;
> >> +     }
> >> +     *regex = regex_data_create();
> >> +     if (!(*regex))
> >> +             return -1;
> >> +
> >> +     (*regex)->extra_owned = 0;
> >> +     (*regex)->regex = (pcre *) mmap_area->next_addr;
> >> +     rc = next_entry(NULL, mmap_area, entry_len);
> >> +     if (rc < 0)
> >> +             goto err;
> >> +
> >> +     /* Check that regex lengths match. pcre_fullinfo()
> >> +      * also validates its magic number. */
> >> +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE,
> &info_len);
> >> +     if (rc < 0 || info_len != entry_len) {
> >> +             goto err;
> >> +     }
> >> +
> >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> +     if (rc < 0 || !entry_len) {
> >> +             goto err;
> >> +     }
> >> +
> >> +     if (entry_len) {
> >> +             (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> >> +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> >> +             rc = next_entry(NULL, mmap_area, entry_len);
> >> +             if (rc < 0)
> >> +                     goto err;
> >> +
> >> +             /* Check that study data lengths match. */
> >> +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> >> +                                PCRE_INFO_STUDYSIZE, &info_len);
> >> +             if (rc < 0 || info_len != entry_len)
> >> +                     goto err;
> >> +     }
> >> +     return 0;
> >> +#endif
> >> +err:
> >> +     regex_data_free(*regex);
> >> +     *regex = NULL;
> >> +     return -1;
> >> +}
> >> +
> >> +int regex_writef(struct regex_data * regex, FILE * fp) {
> >> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> >> +     int rc;
> >> +#endif
> >> +     size_t len;
> >> +#ifdef USE_PCRE2
> >> +     PCRE2_SIZE to_write;
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     PCRE2_UCHAR * bytes;
> >> +
> >> +     /* encode the patter for serialization */
> >> +     rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> >> +                                 &bytes, &to_write, NULL);
> >> +     if (rc != 1)
> >> +             return -1;
> >> +
> >> +#else
> >> +     (void)regex; // silence unused parameter warning
> >> +     to_write = 0;
> >> +#endif
> >> +     /* write serialized pattern's size */
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1) {
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +             pcre2_serialize_free(bytes);
> >> +#endif
> >> +             return -1;
> >> +     }
> >> +
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     /* write serialized pattern */
> >> +     len = fwrite(bytes, 1, to_write, fp);
> >> +     if (len != to_write) {
> >> +             pcre2_serialize_free(bytes);
> >> +             return -1;
> >> +     }
> >> +     pcre2_serialize_free(bytes);
> >> +#endif
> >> +#else
> >> +     uint32_t to_write;
> >> +     size_t size;
> >> +     pcre_extra * sd = regex->extra_owned ? regex->sd :
> >> +                     (regex->lsd.study_data ? &regex->lsd : NULL);
> >> +
> >> +     /* determine the size of the pcre data in bytes */
> >> +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> >> +     if (rc < 0)
> >> +             return -1;
> >> +
> >> +     /* write the number of bytes in the pcre data */
> >> +     to_write = size;
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1)
> >> +             return -1;
> >> +
> >> +     /* write the actual pcre data as a char array */
> >> +     len = fwrite(regex->regex, 1, to_write, fp);
> >> +     if (len != to_write)
> >> +             return -1;
> >> +
> >> +     if (sd) {
> >> +             /* determine the size of the pcre study info */
> >> +             rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> >> +                             &size);
> >> +             if (rc < 0)
> >> +                     return -1;
> >> +     } else
> >> +             size = 0;
> >> +
> >> +     /* write the number of bytes in the pcre study data */
> >> +     to_write = size;
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1)
> >> +             return -1;
> >> +
> >> +     if (sd) {
> >> +             /* write the actual pcre study data as a char array */
> >> +             len = fwrite(sd->study_data, 1, to_write, fp);
> >> +             if (len != to_write)
> >> +                     return -1;
> >> +     }
> >> +#endif
> >> +     return 0;
> >> +}
> >> +
> >> +struct regex_data * regex_data_create(void) {
> >> +     struct regex_data * dummy = (struct regex_data*) malloc(
> >> +                     sizeof(struct regex_data));
> >> +     if (dummy) {
> >> +             memset(dummy, 0, sizeof(struct regex_data));
> >> +     }
> >> +     return dummy;
> >> +}
> >> +
> >> +void regex_data_free(struct regex_data * regex) {
> >> +     if (regex) {
> >> +#ifdef USE_PCRE2
> >> +             if (regex->regex) {
> >> +                     pcre2_code_free(regex->regex);
> >> +             }
> >> +             if (regex->match_data) {
> >> +                     pcre2_match_data_free(regex->match_data);
> >> +             }
> >> +#else
> >> +             if (regex->regex)
> >> +                     pcre_free(regex->regex);
> >> +             if (regex->extra_owned && regex->sd) {
> >> +                     pcre_free_study(regex->sd);
> >> +             }
> >> +#endif
> >> +             free(regex);
> >> +     }
> >> +}
> >> +
> >> +int regex_match(struct regex_data * regex, char const * subject, int
> partial) {
> >> +     int rc;
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_match(regex->regex,
> >> +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> >> +                     partial ? PCRE2_PARTIAL_SOFT : 0,
> regex->match_data,
> >> +                     NULL);
> >> +     if (rc > 0)
> >> +     return REGEX_MATCH;
> >> +     switch (rc) {
> >> +             case PCRE2_ERROR_PARTIAL:
> >> +                     return REGEX_MATCH_PARTIAL;
> >> +             case PCRE2_ERROR_NOMATCH:
> >> +                     return REGEX_NO_MATCH;
> >> +             default:
> >> +                     return REGEX_ERROR;
> >> +     }
> >> +#else
> >> +     rc = pcre_exec(regex->regex,
> >> +                     regex->extra_owned ? regex->sd : &regex->lsd,
> subject,
> >> +                     strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT :
> 0,
> >> +                     NULL,
> >> +                     0);
> >> +     switch (rc) {
> >> +             case 0:
> >> +                     return REGEX_MATCH;
> >> +             case PCRE_ERROR_PARTIAL:
> >> +                     return REGEX_MATCH_PARTIAL;
> >> +             case PCRE_ERROR_NOMATCH:
> >> +                     return REGEX_NO_MATCH;
> >> +             default:
> >> +                     return REGEX_ERROR;
> >> +     }
> >> +#endif
> >> +}
> >> +
> >> +/* TODO Replace this compare function with something that actually
> compares the
> >> + * regular expressions.
> >> + * This compare function basically just compares the binary
> representations of
> >> + * the automatons, and because this representation contains pointers
> and
> >> + * metadata, it can only return a match if regex1 == regex2.
> >> + * Preferably, this function would be replaced with an algorithm that
> computes
> >> + * the equivalence of the automatons systematically.
> >> + */
> >> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> >> +     int rc;
> >> +     size_t len1, len2;
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> >> +     assert(rc == 0);
> >> +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> >> +     assert(rc == 0);
> >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> >> +             return SELABEL_INCOMPARABLE;
> >> +#else
> >> +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> >> +     assert(rc == 0);
> >> +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> >> +     assert(rc == 0);
> >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> >> +             return SELABEL_INCOMPARABLE;
> >> +#endif
> >> +     return SELABEL_EQUAL;
> >> +}
> >> +
> >> +void regex_format_error(struct regex_error_data const * error_data,
> >> +                     char * buffer, size_t buf_size) {
> >> +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> >> +     char * ptr = &buffer[buf_size - the_end_length];
> >> +     int rc = 0;
> >> +     size_t pos = 0;
> >> +     if (!buffer || !buf_size)
> >> +             return;
> >> +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> >> +     if (rc < 0)
> >> +             /* If snprintf fails it constitutes a logical error that
> needs
> >> +              * fixing.
> >> +              */
> >> +             abort();
> >> +
> >> +     pos += rc;
> >> +     if (pos >= buf_size)
> >> +             goto truncated;
> >> +
> >> +     if (error_data->error_offset > 0) {
> >> +#ifdef USE_PCRE2
> >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset
> %zu: ",
> >> +                             error_data->error_offset);
> >> +#else
> >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset
> %d: ",
> >> +                             error_data->error_offset);
> >> +#endif
> >> +             if (rc < 0)
> >> +                     abort();
> >> +
> >> +     }
> >> +     pos += rc;
> >> +     if (pos >= buf_size)
> >> +             goto truncated;
> >> +
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_get_error_message(error_data->error_code,
> >> +                     (PCRE2_UCHAR*)(buffer + pos),
> >> +                     buf_size - pos);
> >> +     if (rc == PCRE2_ERROR_NOMEMORY)
> >> +             goto truncated;
> >> +#else
> >> +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
> >> +                     error_data->error_buffer);
> >> +     if (rc < 0)
> >> +             abort();
> >> +
> >> +     if ((size_t)rc < strlen(error_data->error_buffer))
> >> +             goto truncated;
> >> +#endif
> >> +
> >> +     return;
> >> +
> >> +truncated:
> >> +     /* replace end of string with "..." to indicate that it was
> truncated */
> >> +     switch (the_end_length) {
> >> +             /* no break statements, fall-through is intended */
> >> +             case 4:
> >> +                     *ptr++ = '.';
> >> +             case 3:
> >> +                     *ptr++ = '.';
> >> +             case 2:
> >> +                     *ptr++ = '.';
> >> +             case 1:
> >> +                     *ptr++ = '\0';
> >> +             default:
> >> +                     break;
> >> +     }
> >> +     return;
> >> +}
> >> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> >> new file mode 100644
> >> index 0000000..bdc10c0
> >> --- /dev/null
> >> +++ b/libselinux/src/regex.h
> >> @@ -0,0 +1,168 @@
> >> +#ifndef SRC_REGEX_H_
> >> +#define SRC_REGEX_H_
> >> +
> >> +#include <stdio.h>
> >> +
> >> +#ifdef USE_PCRE2
> >> +#include <pcre2.h>
> >> +#else
> >> +#include <pcre.h>
> >> +#endif
> >> +
> >> +enum {
> >> +     REGEX_MATCH,
> >> +     REGEX_MATCH_PARTIAL,
> >> +     REGEX_NO_MATCH,
> >> +     REGEX_ERROR = -1,
> >> +};
> >> +
> >> +#ifdef USE_PCRE2
> >> +struct regex_data {
> >> +     pcre2_code * regex; /* compiled regular expression */
> >> +     pcre2_match_data * match_data; /* match data block required for
> the compiled
> >> +      pattern in regex2 */
> >> +};
> >> +
> >> +struct regex_error_data {
> >> +     int error_code;
> >> +     PCRE2_SIZE error_offset;
> >> +};
> >> +
> >> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> >> +#else
> >> +/* vvvvvv USE_PCRE vvvvvv */
> >> +
> >> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> >> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> >> +#define pcre_free_study  pcre_free
> >> +#endif
> >> +
> >> +struct regex_data {
> >> +     pcre *regex; /* compiled regular expression */
> >> +     int extra_owned; /* non zero if pcre_extra is owned by this
> structure
> >> +                       * and thus must be freed on destruction.
> >> +                       */
> >> +     union {
> >> +             pcre_extra *sd; /* pointer to extra compiled stuff */
> >> +             pcre_extra lsd; /* used to hold the mmap'd version */
> >> +     };
> >> +};
> >> +
> >> +struct regex_error_data {
> >> +     char const * error_buffer;
> >> +     int error_offset;
> >> +};
> >> +
> >> +#endif /* USE_PCRE2 */
> >> +
> >> +struct mmap_area;
> >> +
> >> +/**
> >> + * regex_verison returns the version string of the underlying regular
> >> + * regular expressions library. In the case of PCRE it just returns the
> >> + * result of pcre_version(). In the case of PCRE2, the very first time
> this
> >> + * function is called it allocates a buffer large enough to hold the
> version
> >> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> >> + * The allocated buffer will linger in memory until the calling
> process is being
> >> + * reaped.
> >> + *
> >> + * It may return NULL on error.
> >> + */
> >> +char const * regex_version(void);
> >> +/**
> >> + * This constructor function allocates a buffer for a regex_data
> structure.
> >> + * The buffer is being initialized with zeroes.
> >> + */
> >> +struct regex_data * regex_data_create(void);
> >> +/**
> >> + * This complementary destructor function frees the a given regex_data
> buffer.
> >> + * It also frees any non NULL member pointers with the appropriate
> pcreX_X_free
> >> + * function. For PCRE this function respects the extra_owned field and
> frees
> >> + * the pcre_extra data conditionally. Calling this function on a NULL
> pointer is
> >> + * save.
> >> + */
> >> +void regex_data_free(struct regex_data * regex);
> >> +/**
> >> + * This function compiles the regular expression. Additionally, it
> prepares
> >> + * data structures required by the different underlying engines. For
> PCRE
> >> + * it calls pcre_study to generate optional data required for optimized
> >> + * execution of the compiled pattern. In the case of PCRE2, it
> allocates
> >> + * a pcre2_match_data structure of appropriate size to hold all
> possible
> >> + * matches created by the pattern.
> >> + *
> >> + * @arg regex If successful, the structure returned through *regex was
> allocated
> >> + *            with regex_data_create and must be freed with
> regex_data_free.
> >> + * @arg pattern_string The pattern string that is to be compiled.
> >> + * @arg errordata A pointer to a regex_error_data structure must be
> passed
> >> + *                to this function. This structure depends on the
> underlying
> >> + *                implementation. It can be passed to
> regex_format_error
> >> + *                to generate a human readable error message.
> >> + * @retval 0 on success
> >> + * @retval -1 on error
> >> + */
> >> +int regex_prepare_data(struct regex_data ** regex, char const *
> pattern_string,
> >> +                     struct regex_error_data * errordata);
> >> +/**
> >> + * This function loads a serialized precompiled pattern from a
> contiguous
> >> + * data region given by map_area.
> >> + *
> >> + * @arg map_area Description of the memory region holding a serialized
> >> + *               representation of the precompiled pattern.
> >> + * @arg regex If successful, the structure returned through *regex was
> allocated
> >> + *            with regex_data_create and must be freed with
> regex_data_free.
> >> + *
> >> + * @retval 0 on success
> >> + * @retval -1 on error
> >> + */
> >> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data **
> regex);
> >> +/**
> >> + * This function stores a precompiled regular expression to a file.
> >> + * In the case of PCRE, it just dumps the binary representation of the
> >> + * precomplied pattern into a file. In the case of PCRE2, it uses the
> >> + * serialization function provided by the library.
> >> + *
> >> + * @arg regex The precomplied regular expression data.
> >> + * @arg fp A file stream specifying the output file.
> >> + */
> >> +int regex_writef(struct regex_data * regex, FILE * fp);
> >> +/**
> >> + * This function applies a precompiled pattern to a subject string and
> >> + * returns whether or not a match was found.
> >> + *
> >> + * @arg regex The precompiled pattern.
> >> + * @arg subject The subject string.
> >> + * @arg partial Boolean indicating if partial matches are wanted. A
> nonzero
> >> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT
> as
> >> + *              option to pcre_exec of pcre2_match.
> >> + * @retval REGEX_MATCH if a match was found
> >> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> >> + * @retval REGEX_NO_MATCH if no match was found
> >> + * @retval REGEX_ERROR if an error was encountered during the
> execution of the
> >> + *                     regular expression
> >> + */
> >> +int regex_match(struct regex_data * regex, char const * subject, int
> partial);
> >> +/**
> >> + * This function compares two compiled regular expressions (regex1 and
> regex2).
> >> + * It compares the binary representations of the compiled patterns. It
> is a very
> >> + * crude approximation because the binary representation holds data
> like
> >> + * reference counters, that has nothing to do with the actual state
> machine.
> >> + *
> >> + * @retval SELABEL_EQUAL if the pattern's binary representations are
> exactly
> >> + *                       the same
> >> + * @retval SELABEL_INCOMPARABLE otherwise
> >> + */
> >> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
> >> +/**
> >> + * This function takes the error data returned by regex_prepare_data
> and turns
> >> + * it in to a human readable error message.
> >> + * If the buffer given to hold the error message is to small it
> truncates the
> >> + * message and indicates the truncation with an ellipsis ("...") at
> the end of
> >> + * the buffer.
> >> + *
> >> + * @arg error_data Error data as returned by regex_prepare_data.
> >> + * @arg buffer String buffer to hold the formated error string.
> >> + * @arg buf_size Total size of the given bufer in bytes.
> >> + */
> >> +void regex_format_error(struct regex_error_data const * error_data,
> >> +                     char * buffer, size_t buf_size);
> >> +#endif  /* SRC_REGEX_H_ */
> >> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> >> index 8497cb4..1e7a048 100644
> >> --- a/libselinux/utils/Makefile
> >> +++ b/libselinux/utils/Makefile
> >> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
> -Wformat-security -Winit-self -Wmissi
> >>            -fasynchronous-unwind-tables -fdiagnostics-show-option
> -funit-at-a-time \
> >>            -fipa-pure-const -Wno-suggest-attribute=pure
> -Wno-suggest-attribute=const \
> >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS)
> >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS) $(PCRE_CFLAGS)
> >>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
> >>
> >>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
> >>
> >> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> >> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a
> -lsepol
> >>
> >>  selinux_restorecon: LDLIBS += -lsepol
> >>
> >> diff --git a/libselinux/utils/sefcontext_compile.c
> b/libselinux/utils/sefcontext_compile.c
> >> index fd6fb78..8ff73f4 100644
> >> --- a/libselinux/utils/sefcontext_compile.c
> >> +++ b/libselinux/utils/sefcontext_compile.c
> >> @@ -1,6 +1,5 @@
> >>  #include <ctype.h>
> >>  #include <errno.h>
> >> -#include <pcre.h>
> >>  #include <stdint.h>
> >>  #include <stdio.h>
> >>  #include <string.h>
> >> @@ -13,6 +12,7 @@
> >>  #include <sepol/sepol.h>
> >>
> >>  #include "../src/label_file.h"
> >> +#include "../src/regex.h"
> >>
> >>  const char *policy_file;
> >>  static int ctx_err;
> >> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >>       if (len != 1)
> >>               goto err;
> >>
> >> -     /* write the pcre version */
> >> -     section_len = strlen(pcre_version());
> >> +     /* write version of the regex back-end */
> >> +     if (!regex_version())
> >> +             goto err;
> >> +     section_len = strlen(regex_version());
> >>       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
> >>       if (len != 1)
> >>               goto err;
> >> -     len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> >> +     len = fwrite(regex_version(), sizeof(char), section_len,
> bin_file);
> >>       if (len != section_len)
> >>               goto err;
> >>
> >> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >>               mode_t mode = specs[i].mode;
> >>               size_t prefix_len = specs[i].prefix_len;
> >>               int32_t stem_id = specs[i].stem_id;
> >> -             pcre *re = specs[i].regex;
> >> -             pcre_extra *sd = get_pcre_extra(&specs[i]);
> >> +             struct regex_data *re = specs[i].regex;
> >>               uint32_t to_write;
> >> -             size_t size;
> >>
> >>               /* length of the context string (including nul) */
> >>               to_write = strlen(context) + 1;
> >> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >>               if (len != 1)
> >>                       goto err;
> >>
> >> -             /* determine the size of the pcre data in bytes */
> >> -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> >> +             /* Write regex related data */
> >> +             rc = regex_writef(re, bin_file);
> >>               if (rc < 0)
> >>                       goto err;
> >> -
> >> -             /* write the number of bytes in the pcre data */
> >> -             to_write = size;
> >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> >> -             if (len != 1)
> >> -                     goto err;
> >> -
> >> -             /* write the actual pcre data as a char array */
> >> -             len = fwrite(re, 1, to_write, bin_file);
> >> -             if (len != to_write)
> >> -                     goto err;
> >> -
> >> -             if (sd) {
> >> -                     /* determine the size of the pcre study info */
> >> -                     rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE,
> &size);
> >> -                     if (rc < 0)
> >> -                             goto err;
> >> -             } else
> >> -                     size = 0;
> >> -
> >> -             /* write the number of bytes in the pcre study data */
> >> -             to_write = size;
> >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> >> -             if (len != 1)
> >> -                     goto err;
> >> -
> >> -             if (sd) {
> >> -                     /* write the actual pcre study data as a char
> array */
> >> -                     len = fwrite(sd->study_data, 1, to_write,
> bin_file);
> >> -                     if (len != to_write)
> >> -                             goto err;
> >> -             }
> >>       }
> >>
> >>       rc = 0;
> >> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
> >>               free(specs[i].lr.ctx_trans);
> >>               free(specs[i].regex_str);
> >>               free(specs[i].type_str);
> >> -             pcre_free(specs[i].regex);
> >> -             pcre_free_study(specs[i].sd);
> >> +             regex_data_free(specs[i].regex);
> >>       }
> >>       free(specs);
> >>
> >>
> >
> > _______________________________________________
> > Selinux mailing list
> > Selinux@tycho.nsa.gov
> > To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> > To get help, send an email containing "help" to
> Selinux-request@tycho.nsa.gov.
>
>
>
> --
> Respectfully,
>
> William C Roberts
>

[-- Attachment #2: Type: text/html, Size: 83348 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
  2016-09-07 15:37   ` William Roberts
@ 2016-09-07 16:40   ` William Roberts
  2016-09-07 18:29     ` Jason Zaman
  2016-09-07 17:16   ` Janis Danisevskis
  2 siblings, 1 reply; 13+ messages in thread
From: William Roberts @ 2016-09-07 16:40 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Janis Danisevskis, selinux, seandroid-list, James Carter,
	Janis Danisevskis

On Wed, Sep 7, 2016 at 8:02 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>
> Shouldn't the pcre variant be encoded in some manner in the
> file_contexts.bin file so that libselinux can tell immediately whether
> it is supported?

Don't we have that in pcre_version()?

>
>> Also, for pcre2 the persistent format is architecture dependant.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used and
>> sefcontext_compile shall generate portable output, it and libselinux
>> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
>> cost of having to recompile the regular expressions at load time.
>
> Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
> point of using file_contexts.bin was to move the cost of compiling the
> regexes to build time rather than load time; if we cannot do that, then
> how much do we gain from using file_contexts.bin instead of just falling
> back to file_contexts?
>
> The #ifdef maze makes it very hard to read and maintain this code; that
> needs to be refactored.

Perhaps set up some function pointers and hide the regex structure to be opaque
to the rest of selinux. This makes me think, should we just dlopen the
correct version of libpcre based on pcre_version()? Not sure how you
feel about dlopen calls....

>
> valgrind is reporting numerous errors, including both use of
> uninitialised values and memory leaks with both patches applied.  Try:
> make DESTDIR=~/obj CFLAGS+=-g clean install
> LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> ~/obj/usr/sbin/matchpathcon /etc
>
> On x86_64.
>
> Will provide review of the code itself later...
>
>>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>>  libselinux/Makefile                   |  13 ++
>>  libselinux/src/Makefile               |   4 +-
>>  libselinux/src/label_file.c           |  91 ++------
>>  libselinux/src/label_file.h           |  54 ++---
>>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>>  libselinux/src/regex.h                | 168 ++++++++++++++
>>  libselinux/utils/Makefile             |   4 +-
>>  libselinux/utils/sefcontext_compile.c |  53 +----
>>  8 files changed, 637 insertions(+), 155 deletions(-)
>>  create mode 100644 libselinux/src/regex.c
>>  create mode 100644 libselinux/src/regex.h
>>
>> diff --git a/libselinux/Makefile b/libselinux/Makefile
>> index 6142b60..15d051e 100644
>> --- a/libselinux/Makefile
>> +++ b/libselinux/Makefile
>> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
>>  endif
>>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
>>
>> +USE_PCRE2 ?= n
>> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
>> +ifeq ($(USE_PCRE2),y)
>> +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
>> +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
>> +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
>> +     endif
>> +     PCRE_LDFLAGS := -lpcre2-8
>> +else
>> +     PCRE_LDFLAGS := -lpcre
>> +endif
>> +export PCRE_CFLAGS PCRE_LDFLAGS
>> +
>>  all install relabel clean distclean indent:
>>       @for subdir in $(SUBDIRS); do \
>>               (cd $$subdir && $(MAKE) $@) || exit 1; \
>> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
>> index 37d01af..66687e6 100644
>> --- a/libselinux/src/Makefile
>> +++ b/libselinux/src/Makefile
>> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>>
>> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
>> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>>
>>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
>>               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
>> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
>>       $(RANLIB) $@
>>
>>  $(LIBSO): $(LOBJS)
>> -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>> +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
>>       ln -sf $@ $(TARGET)
>>
>>  $(LIBPC): $(LIBPC).in ../VERSION
>> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
>> index c89bb35..6698624 100644
>> --- a/libselinux/src/label_file.c
>> +++ b/libselinux/src/label_file.c
>> @@ -15,7 +15,6 @@
>>  #include <errno.h>
>>  #include <limits.h>
>>  #include <stdint.h>
>> -#include <pcre.h>
>>  #include <unistd.h>
>>  #include <sys/mman.h>
>>  #include <sys/types.h>
>> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>               return -1;
>>
>>       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
>> -             len = strlen(pcre_version());
>> +             if (!regex_version()) {
>> +                     return -1;
>> +             }
>> +             len = strlen(regex_version());
>>
>>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>>               if (rc < 0)
>> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>               }
>>
>>               str_buf[entry_len] = '\0';
>> -             if ((strcmp(str_buf, pcre_version()) != 0)) {
>> +             if ((strcmp(str_buf, regex_version()) != 0)) {
>>                       free(str_buf);
>>                       return -1;
>>               }
>> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>
>>               spec = &data->spec_arr[data->nspec];
>>               spec->from_mmap = 1;
>> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
>> +             spec->regcomp = 0;
>> +#else
>>               spec->regcomp = 1;
>> +#endif
>>
>>               /* Process context */
>>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>                       spec->prefix_len = prefix_len;
>>               }
>>
>> -             /* Process regex and study_data entries */
>> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> -             if (rc < 0 || !entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -             spec->regex = (pcre *)mmap_area->next_addr;
>> -             rc = next_entry(NULL, mmap_area, entry_len);
>> +             rc = regex_load_mmap(mmap_area, &spec->regex);
>>               if (rc < 0)
>>                       goto err;
>>
>> -             /* Check that regex lengths match. pcre_fullinfo()
>> -              * also validates its magic number. */
>> -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
>> -             if (rc < 0 || len != entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -
>> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> -             if (rc < 0 || !entry_len) {
>> -                     rc = -1;
>> -                     goto err;
>> -             }
>> -
>> -             if (entry_len) {
>> -                     spec->lsd.study_data = (void *)mmap_area->next_addr;
>> -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
>> -                     rc = next_entry(NULL, mmap_area, entry_len);
>> -                     if (rc < 0)
>> -                             goto err;
>> -
>> -                     /* Check that study data lengths match. */
>> -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
>> -                                        PCRE_INFO_STUDYSIZE, &len);
>> -                     if (rc < 0 || len != entry_len) {
>> -                             rc = -1;
>> -                             goto err;
>> -                     }
>> -             }
>> -
>>               data->nspec++;
>>       }
>>
>> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
>>                       continue;
>>               free(spec->regex_str);
>>               free(spec->type_str);
>> -             if (spec->regcomp) {
>> -                     pcre_free(spec->regex);
>> -                     pcre_free_study(spec->sd);
>> -             }
>> +             regex_data_free(spec->regex);
>>       }
>>
>>       for (i = 0; i < (unsigned int)data->num_stems; i++) {
>> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>  {
>>       struct saved_data *data = (struct saved_data *)rec->data;
>>       struct spec *spec_arr = data->spec_arr;
>> -     int i, rc, file_stem, pcre_options = 0;
>> +     int i, rc, file_stem;
>>       mode_t mode = (mode_t)type;
>>       const char *buf;
>>       struct spec *ret = NULL;
>>       char *clean_key = NULL;
>>       const char *prev_slash, *next_slash;
>>       unsigned int sofar = 0;
>> +     struct regex_error_data regex_error_data;
>>
>>       if (!data->nspec) {
>>               errno = ENOENT;
>> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>       file_stem = find_stem_from_file(data, &buf);
>>       mode &= S_IFMT;
>>
>> -     if (partial)
>> -             pcre_options |= PCRE_PARTIAL_SOFT;
>> -
>>       /*
>>        * Check for matching specifications in reverse order, so that
>>        * the last matching specification is used.
>> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
>>                * a regex check        */
>>               if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
>>                   (!mode || !spec->mode || mode == spec->mode)) {
>> -                     if (compile_regex(data, spec, NULL) < 0)
>> +                     if (compile_regex(data, spec, &regex_error_data) < 0)
>>                               goto finish;
>>                       if (spec->stem_id == -1)
>> -                             rc = pcre_exec(spec->regex,
>> -                                                 get_pcre_extra(spec),
>> -                                                 key, strlen(key), 0,
>> -                                                 pcre_options, NULL, 0);
>> +                             rc = regex_match(spec->regex, key, partial);
>>                       else
>> -                             rc = pcre_exec(spec->regex,
>> -                                                 get_pcre_extra(spec),
>> -                                                 buf, strlen(buf), 0,
>> -                                                 pcre_options, NULL, 0);
>> -                     if (rc == 0) {
>> +                             rc = regex_match(spec->regex, buf, partial);
>> +                     if (rc == REGEX_MATCH) {
>>                               spec->matches++;
>>                               break;
>> -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
>> +                     } else if (partial && rc == REGEX_MATCH_PARTIAL)
>>                               break;
>>
>> -                     if (rc == PCRE_ERROR_NOMATCH)
>> +                     if (rc == REGEX_NO_MATCH)
>>                               continue;
>>
>>                       errno = ENOENT;
>> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
>>               }
>>
>>               if (spec1->regcomp && spec2->regcomp) {
>> -                     size_t len1, len2;
>> -                     int rc;
>> -
>> -                     rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
>> -                     assert(rc == 0);
>> -                     rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
>> -                     assert(rc == 0);
>> -                     if (len1 != len2 ||
>> -                         memcmp(spec1->regex, spec2->regex, len1))
>> +                     if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
>>                               return incomp(spec1, spec2, "regex", i, j);
>> +                     }
>>               } else {
>>                       if (strcmp(spec1->regex_str, spec2->regex_str))
>>                               return incomp(spec1, spec2, "regex_str", i, j);
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..a2e30e5 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -6,6 +6,14 @@
>>
>>  #include <sys/stat.h>
>>
>> +/*
>> + * regex.h/c were introduced to hold all dependencies on the regular
>> + * expression back-end when we started supporting PCRE2. regex.h defines a
>> + * minimal interface required by libselinux, so that the remaining code
>> + * can be agnostic about the underlying implementation.
>> + */
>> +#include "regex.h"
>> +
>>  #include "callbacks.h"
>>  #include "label_internal.h"
>>
>> @@ -19,21 +27,12 @@
>>
>>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS   SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
>>
>> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
>> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
>> -#define pcre_free_study  pcre_free
>> -#endif
>> -
>>  /* A file security context specification. */
>>  struct spec {
>>       struct selabel_lookup_rec lr;   /* holds contexts for lookup result */
>>       char *regex_str;        /* regular expession string for diagnostics */
>>       char *type_str;         /* type string for diagnostic messages */
>> -     pcre *regex;            /* compiled regular expression */
>> -     union {
>> -             pcre_extra *sd; /* pointer to extra compiled stuff */
>> -             pcre_extra lsd; /* used to hold the mmap'd version */
>> -     };
>> +     struct regex_data * regex; /* backend dependent regular expression data */
>>       mode_t mode;            /* mode format value */
>>       int matches;            /* number of matching pathnames */
>>       int stem_id;            /* indicates which stem-compression item */
>> @@ -78,17 +77,6 @@ struct saved_data {
>>       struct mmap_area *mmap_areas;
>>  };
>>
>> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
>> -{
>> -     if (spec->from_mmap) {
>> -             if (spec->lsd.study_data)
>> -                     return &spec->lsd;
>> -             else
>> -                     return NULL;
>> -     } else
>> -             return spec->sd;
>> -}
>> -
>>  static inline mode_t string_to_mode(char *mode)
>>  {
>>       size_t len;
>> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
>>  }
>>
>>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
>> -                                         const char **errbuf)
>> +                                         struct regex_error_data * error_data)
>>  {
>> -     const char *tmperrbuf;
>>       char *reg_buf, *anchored_regex, *cp;
>>       struct stem *stem_arr = data->stem_arr;
>>       size_t len;
>> -     int erroff;
>> +     int rc;
>>
>>       if (spec->regcomp)
>>               return 0; /* already done */
>> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
>>       *cp = '\0';
>>
>>       /* Compile the regular expression. */
>> -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
>> -                                                 &erroff, NULL);
>> +     rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
>>       free(anchored_regex);
>> -     if (!spec->regex) {
>> -             if (errbuf)
>> -                     *errbuf = tmperrbuf;
>> -             return -1;
>> -     }
>> -
>> -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
>> -     if (!spec->sd && tmperrbuf) {
>> -             if (errbuf)
>> -                     *errbuf = tmperrbuf;
>> +     if (rc < 0) {
>>               return -1;
>>       }
>>
>> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>>       struct saved_data *data = (struct saved_data *)rec->data;
>>       struct spec *spec_arr;
>>       unsigned int nspec = data->nspec;
>> -     const char *errbuf = NULL;
>> +     char const *errbuf;
>> +     struct regex_error_data error_data;
>>
>>       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>>       if (items < 0) {
>> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>>       data->nspec++;
>>
>>       if (rec->validating &&
>> -                         compile_regex(data, &spec_arr[nspec], &errbuf)) {
>> +                         compile_regex(data, &spec_arr[nspec], &error_data)) {
>>               COMPAT_LOG(SELINUX_ERROR,
>>                          "%s:  line %u has invalid regex %s:  %s\n",
>>                          path, lineno, regex,
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..6b92b04
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
>> @@ -0,0 +1,405 @@
>> +#include <assert.h>
>> +#include <stdint.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +
>> +#include "regex.h"
>> +#include "label_file.h"
>> +
>> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
>> +                     struct regex_error_data * errordata) {
>> +     memset(errordata, 0, sizeof(struct regex_error_data));
>> +     *regex = regex_data_create();
>> +     if (!(*regex))
>> +             return -1;
>> +#ifdef USE_PCRE2
>> +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
>> +                     PCRE2_ZERO_TERMINATED,
>> +                     PCRE2_DOTALL,
>> +                     &errordata->error_code,
>> +                     &errordata->error_offset, NULL);
>> +#else
>> +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
>> +                                     &errordata->error_buffer,
>> +                                     &errordata->error_offset, NULL);
>> +#endif
>> +     if (!(*regex)->regex) {
>> +             goto err;
>> +     }
>> +
>> +#ifdef USE_PCRE2
>> +     (*regex)->match_data =
>> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>> +     if (!(*regex)->match_data) {
>> +             goto err;
>> +     }
>> +#else
>> +     (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
>> +     if (!(*regex)->sd && errordata->error_buffer) {
>> +             goto err;
>> +     }
>> +     (*regex)->extra_owned = !!(*regex)->sd;
>> +#endif
>> +     return 0;
>> +
>> +err: regex_data_free(*regex);
>> +     *regex = NULL;
>> +     return -1;
>> +}
>> +
>> +char const * regex_version(void) {
>> +#ifdef USE_PCRE2
>> +     static int initialized = 0;
>> +     static char * version_string = NULL;
>> +     size_t version_string_len;
>> +     if (!initialized) {
>> +             version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
>> +             version_string = (char*) malloc(version_string_len);
>> +             if (!version_string) {
>> +                     return NULL;
>> +             }
>> +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
>> +             initialized = 1;
>> +     }
>> +     return version_string;
>> +#else
>> +     return pcre_version();
>> +#endif
>> +}
>> +
>> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
>> +     int rc;
>> +     size_t entry_len;
>> +#ifndef USE_PCRE2
>> +     size_t info_len;
>> +#endif
>> +
>> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> +#ifdef USE_PCRE2
>> +     if (rc < 0)
>> +             return -1;
>> +
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     /* this should yield exactly one because we store one pattern at a time
>> +      */
>> +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
>> +     if (rc != 1)
>> +             return -1;
>> +
>> +     *regex = regex_data_create();
>> +     if (!*regex)
>> +             return -1;
>> +
>> +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
>> +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
>> +     if (rc != 1)
>> +             goto err;
>> +
>> +     (*regex)->match_data =
>> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
>> +     if (!(*regex)->match_data)
>> +             goto err;
>> +
>> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
>> +     /* and skip the decoded bit */
>> +     rc = next_entry(NULL, mmap_area, entry_len);
>> +     if (rc < 0)
>> +             goto err;
>> +
>> +     return 0;
>> +#else
>> +     if (rc < 0 || !entry_len) {
>> +             return -1;
>> +     }
>> +     *regex = regex_data_create();
>> +     if (!(*regex))
>> +             return -1;
>> +
>> +     (*regex)->extra_owned = 0;
>> +     (*regex)->regex = (pcre *) mmap_area->next_addr;
>> +     rc = next_entry(NULL, mmap_area, entry_len);
>> +     if (rc < 0)
>> +             goto err;
>> +
>> +     /* Check that regex lengths match. pcre_fullinfo()
>> +      * also validates its magic number. */
>> +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
>> +     if (rc < 0 || info_len != entry_len) {
>> +             goto err;
>> +     }
>> +
>> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
>> +     if (rc < 0 || !entry_len) {
>> +             goto err;
>> +     }
>> +
>> +     if (entry_len) {
>> +             (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
>> +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
>> +             rc = next_entry(NULL, mmap_area, entry_len);
>> +             if (rc < 0)
>> +                     goto err;
>> +
>> +             /* Check that study data lengths match. */
>> +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
>> +                                PCRE_INFO_STUDYSIZE, &info_len);
>> +             if (rc < 0 || info_len != entry_len)
>> +                     goto err;
>> +     }
>> +     return 0;
>> +#endif
>> +err:
>> +     regex_data_free(*regex);
>> +     *regex = NULL;
>> +     return -1;
>> +}
>> +
>> +int regex_writef(struct regex_data * regex, FILE * fp) {
>> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
>> +     int rc;
>> +#endif
>> +     size_t len;
>> +#ifdef USE_PCRE2
>> +     PCRE2_SIZE to_write;
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     PCRE2_UCHAR * bytes;
>> +
>> +     /* encode the patter for serialization */
>> +     rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
>> +                                 &bytes, &to_write, NULL);
>> +     if (rc != 1)
>> +             return -1;
>> +
>> +#else
>> +     (void)regex; // silence unused parameter warning
>> +     to_write = 0;
>> +#endif
>> +     /* write serialized pattern's size */
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1) {
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +             pcre2_serialize_free(bytes);
>> +#endif
>> +             return -1;
>> +     }
>> +
>> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
>> +     /* write serialized pattern */
>> +     len = fwrite(bytes, 1, to_write, fp);
>> +     if (len != to_write) {
>> +             pcre2_serialize_free(bytes);
>> +             return -1;
>> +     }
>> +     pcre2_serialize_free(bytes);
>> +#endif
>> +#else
>> +     uint32_t to_write;
>> +     size_t size;
>> +     pcre_extra * sd = regex->extra_owned ? regex->sd :
>> +                     (regex->lsd.study_data ? &regex->lsd : NULL);
>> +
>> +     /* determine the size of the pcre data in bytes */
>> +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
>> +     if (rc < 0)
>> +             return -1;
>> +
>> +     /* write the number of bytes in the pcre data */
>> +     to_write = size;
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1)
>> +             return -1;
>> +
>> +     /* write the actual pcre data as a char array */
>> +     len = fwrite(regex->regex, 1, to_write, fp);
>> +     if (len != to_write)
>> +             return -1;
>> +
>> +     if (sd) {
>> +             /* determine the size of the pcre study info */
>> +             rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
>> +                             &size);
>> +             if (rc < 0)
>> +                     return -1;
>> +     } else
>> +             size = 0;
>> +
>> +     /* write the number of bytes in the pcre study data */
>> +     to_write = size;
>> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
>> +     if (len != 1)
>> +             return -1;
>> +
>> +     if (sd) {
>> +             /* write the actual pcre study data as a char array */
>> +             len = fwrite(sd->study_data, 1, to_write, fp);
>> +             if (len != to_write)
>> +                     return -1;
>> +     }
>> +#endif
>> +     return 0;
>> +}
>> +
>> +struct regex_data * regex_data_create(void) {
>> +     struct regex_data * dummy = (struct regex_data*) malloc(
>> +                     sizeof(struct regex_data));
>> +     if (dummy) {
>> +             memset(dummy, 0, sizeof(struct regex_data));
>> +     }
>> +     return dummy;
>> +}
>> +
>> +void regex_data_free(struct regex_data * regex) {
>> +     if (regex) {
>> +#ifdef USE_PCRE2
>> +             if (regex->regex) {
>> +                     pcre2_code_free(regex->regex);
>> +             }
>> +             if (regex->match_data) {
>> +                     pcre2_match_data_free(regex->match_data);
>> +             }
>> +#else
>> +             if (regex->regex)
>> +                     pcre_free(regex->regex);
>> +             if (regex->extra_owned && regex->sd) {
>> +                     pcre_free_study(regex->sd);
>> +             }
>> +#endif
>> +             free(regex);
>> +     }
>> +}
>> +
>> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
>> +     int rc;
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_match(regex->regex,
>> +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
>> +                     partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
>> +                     NULL);
>> +     if (rc > 0)
>> +     return REGEX_MATCH;
>> +     switch (rc) {
>> +             case PCRE2_ERROR_PARTIAL:
>> +                     return REGEX_MATCH_PARTIAL;
>> +             case PCRE2_ERROR_NOMATCH:
>> +                     return REGEX_NO_MATCH;
>> +             default:
>> +                     return REGEX_ERROR;
>> +     }
>> +#else
>> +     rc = pcre_exec(regex->regex,
>> +                     regex->extra_owned ? regex->sd : &regex->lsd, subject,
>> +                     strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
>> +                     NULL,
>> +                     0);
>> +     switch (rc) {
>> +             case 0:
>> +                     return REGEX_MATCH;
>> +             case PCRE_ERROR_PARTIAL:
>> +                     return REGEX_MATCH_PARTIAL;
>> +             case PCRE_ERROR_NOMATCH:
>> +                     return REGEX_NO_MATCH;
>> +             default:
>> +                     return REGEX_ERROR;
>> +     }
>> +#endif
>> +}
>> +
>> +/* TODO Replace this compare function with something that actually compares the
>> + * regular expressions.
>> + * This compare function basically just compares the binary representations of
>> + * the automatons, and because this representation contains pointers and
>> + * metadata, it can only return a match if regex1 == regex2.
>> + * Preferably, this function would be replaced with an algorithm that computes
>> + * the equivalence of the automatons systematically.
>> + */
>> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
>> +     int rc;
>> +     size_t len1, len2;
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
>> +     assert(rc == 0);
>> +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
>> +     assert(rc == 0);
>> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
>> +             return SELABEL_INCOMPARABLE;
>> +#else
>> +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
>> +     assert(rc == 0);
>> +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
>> +     assert(rc == 0);
>> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
>> +             return SELABEL_INCOMPARABLE;
>> +#endif
>> +     return SELABEL_EQUAL;
>> +}
>> +
>> +void regex_format_error(struct regex_error_data const * error_data,
>> +                     char * buffer, size_t buf_size) {
>> +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
>> +     char * ptr = &buffer[buf_size - the_end_length];
>> +     int rc = 0;
>> +     size_t pos = 0;
>> +     if (!buffer || !buf_size)
>> +             return;
>> +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
>> +     if (rc < 0)
>> +             /* If snprintf fails it constitutes a logical error that needs
>> +              * fixing.
>> +              */
>> +             abort();
>> +
>> +     pos += rc;
>> +     if (pos >= buf_size)
>> +             goto truncated;
>> +
>> +     if (error_data->error_offset > 0) {
>> +#ifdef USE_PCRE2
>> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
>> +                             error_data->error_offset);
>> +#else
>> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
>> +                             error_data->error_offset);
>> +#endif
>> +             if (rc < 0)
>> +                     abort();
>> +
>> +     }
>> +     pos += rc;
>> +     if (pos >= buf_size)
>> +             goto truncated;
>> +
>> +#ifdef USE_PCRE2
>> +     rc = pcre2_get_error_message(error_data->error_code,
>> +                     (PCRE2_UCHAR*)(buffer + pos),
>> +                     buf_size - pos);
>> +     if (rc == PCRE2_ERROR_NOMEMORY)
>> +             goto truncated;
>> +#else
>> +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
>> +                     error_data->error_buffer);
>> +     if (rc < 0)
>> +             abort();
>> +
>> +     if ((size_t)rc < strlen(error_data->error_buffer))
>> +             goto truncated;
>> +#endif
>> +
>> +     return;
>> +
>> +truncated:
>> +     /* replace end of string with "..." to indicate that it was truncated */
>> +     switch (the_end_length) {
>> +             /* no break statements, fall-through is intended */
>> +             case 4:
>> +                     *ptr++ = '.';
>> +             case 3:
>> +                     *ptr++ = '.';
>> +             case 2:
>> +                     *ptr++ = '.';
>> +             case 1:
>> +                     *ptr++ = '\0';
>> +             default:
>> +                     break;
>> +     }
>> +     return;
>> +}
>> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
>> new file mode 100644
>> index 0000000..bdc10c0
>> --- /dev/null
>> +++ b/libselinux/src/regex.h
>> @@ -0,0 +1,168 @@
>> +#ifndef SRC_REGEX_H_
>> +#define SRC_REGEX_H_
>> +
>> +#include <stdio.h>
>> +
>> +#ifdef USE_PCRE2
>> +#include <pcre2.h>
>> +#else
>> +#include <pcre.h>
>> +#endif
>> +
>> +enum {
>> +     REGEX_MATCH,
>> +     REGEX_MATCH_PARTIAL,
>> +     REGEX_NO_MATCH,
>> +     REGEX_ERROR = -1,
>> +};
>> +
>> +#ifdef USE_PCRE2
>> +struct regex_data {
>> +     pcre2_code * regex; /* compiled regular expression */
>> +     pcre2_match_data * match_data; /* match data block required for the compiled
>> +      pattern in regex2 */
>> +};
>> +
>> +struct regex_error_data {
>> +     int error_code;
>> +     PCRE2_SIZE error_offset;
>> +};
>> +
>> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
>> +#else
>> +/* vvvvvv USE_PCRE vvvvvv */
>> +
>> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
>> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
>> +#define pcre_free_study  pcre_free
>> +#endif
>> +
>> +struct regex_data {
>> +     pcre *regex; /* compiled regular expression */
>> +     int extra_owned; /* non zero if pcre_extra is owned by this structure
>> +                       * and thus must be freed on destruction.
>> +                       */
>> +     union {
>> +             pcre_extra *sd; /* pointer to extra compiled stuff */
>> +             pcre_extra lsd; /* used to hold the mmap'd version */
>> +     };
>> +};
>> +
>> +struct regex_error_data {
>> +     char const * error_buffer;
>> +     int error_offset;
>> +};
>> +
>> +#endif /* USE_PCRE2 */
>> +
>> +struct mmap_area;
>> +
>> +/**
>> + * regex_verison returns the version string of the underlying regular
>> + * regular expressions library. In the case of PCRE it just returns the
>> + * result of pcre_version(). In the case of PCRE2, the very first time this
>> + * function is called it allocates a buffer large enough to hold the version
>> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
>> + * The allocated buffer will linger in memory until the calling process is being
>> + * reaped.
>> + *
>> + * It may return NULL on error.
>> + */
>> +char const * regex_version(void);
>> +/**
>> + * This constructor function allocates a buffer for a regex_data structure.
>> + * The buffer is being initialized with zeroes.
>> + */
>> +struct regex_data * regex_data_create(void);
>> +/**
>> + * This complementary destructor function frees the a given regex_data buffer.
>> + * It also frees any non NULL member pointers with the appropriate pcreX_X_free
>> + * function. For PCRE this function respects the extra_owned field and frees
>> + * the pcre_extra data conditionally. Calling this function on a NULL pointer is
>> + * save.
>> + */
>> +void regex_data_free(struct regex_data * regex);
>> +/**
>> + * This function compiles the regular expression. Additionally, it prepares
>> + * data structures required by the different underlying engines. For PCRE
>> + * it calls pcre_study to generate optional data required for optimized
>> + * execution of the compiled pattern. In the case of PCRE2, it allocates
>> + * a pcre2_match_data structure of appropriate size to hold all possible
>> + * matches created by the pattern.
>> + *
>> + * @arg regex If successful, the structure returned through *regex was allocated
>> + *            with regex_data_create and must be freed with regex_data_free.
>> + * @arg pattern_string The pattern string that is to be compiled.
>> + * @arg errordata A pointer to a regex_error_data structure must be passed
>> + *                to this function. This structure depends on the underlying
>> + *                implementation. It can be passed to regex_format_error
>> + *                to generate a human readable error message.
>> + * @retval 0 on success
>> + * @retval -1 on error
>> + */
>> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
>> +                     struct regex_error_data * errordata);
>> +/**
>> + * This function loads a serialized precompiled pattern from a contiguous
>> + * data region given by map_area.
>> + *
>> + * @arg map_area Description of the memory region holding a serialized
>> + *               representation of the precompiled pattern.
>> + * @arg regex If successful, the structure returned through *regex was allocated
>> + *            with regex_data_create and must be freed with regex_data_free.
>> + *
>> + * @retval 0 on success
>> + * @retval -1 on error
>> + */
>> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
>> +/**
>> + * This function stores a precompiled regular expression to a file.
>> + * In the case of PCRE, it just dumps the binary representation of the
>> + * precomplied pattern into a file. In the case of PCRE2, it uses the
>> + * serialization function provided by the library.
>> + *
>> + * @arg regex The precomplied regular expression data.
>> + * @arg fp A file stream specifying the output file.
>> + */
>> +int regex_writef(struct regex_data * regex, FILE * fp);
>> +/**
>> + * This function applies a precompiled pattern to a subject string and
>> + * returns whether or not a match was found.
>> + *
>> + * @arg regex The precompiled pattern.
>> + * @arg subject The subject string.
>> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
>> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
>> + *              option to pcre_exec of pcre2_match.
>> + * @retval REGEX_MATCH if a match was found
>> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
>> + * @retval REGEX_NO_MATCH if no match was found
>> + * @retval REGEX_ERROR if an error was encountered during the execution of the
>> + *                     regular expression
>> + */
>> +int regex_match(struct regex_data * regex, char const * subject, int partial);
>> +/**
>> + * This function compares two compiled regular expressions (regex1 and regex2).
>> + * It compares the binary representations of the compiled patterns. It is a very
>> + * crude approximation because the binary representation holds data like
>> + * reference counters, that has nothing to do with the actual state machine.
>> + *
>> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
>> + *                       the same
>> + * @retval SELABEL_INCOMPARABLE otherwise
>> + */
>> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
>> +/**
>> + * This function takes the error data returned by regex_prepare_data and turns
>> + * it in to a human readable error message.
>> + * If the buffer given to hold the error message is to small it truncates the
>> + * message and indicates the truncation with an ellipsis ("...") at the end of
>> + * the buffer.
>> + *
>> + * @arg error_data Error data as returned by regex_prepare_data.
>> + * @arg buffer String buffer to hold the formated error string.
>> + * @arg buf_size Total size of the given bufer in bytes.
>> + */
>> +void regex_format_error(struct regex_error_data const * error_data,
>> +                     char * buffer, size_t buf_size);
>> +#endif  /* SRC_REGEX_H_ */
>> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
>> index 8497cb4..1e7a048 100644
>> --- a/libselinux/utils/Makefile
>> +++ b/libselinux/utils/Makefile
>> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
>>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
>>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
>>            -Werror -Wno-aggregate-return -Wno-redundant-decls
>> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
>> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
>>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
>>
>>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
>>
>> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
>> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
>>
>>  selinux_restorecon: LDLIBS += -lsepol
>>
>> diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
>> index fd6fb78..8ff73f4 100644
>> --- a/libselinux/utils/sefcontext_compile.c
>> +++ b/libselinux/utils/sefcontext_compile.c
>> @@ -1,6 +1,5 @@
>>  #include <ctype.h>
>>  #include <errno.h>
>> -#include <pcre.h>
>>  #include <stdint.h>
>>  #include <stdio.h>
>>  #include <string.h>
>> @@ -13,6 +12,7 @@
>>  #include <sepol/sepol.h>
>>
>>  #include "../src/label_file.h"
>> +#include "../src/regex.h"
>>
>>  const char *policy_file;
>>  static int ctx_err;
>> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
>>       if (len != 1)
>>               goto err;
>>
>> -     /* write the pcre version */
>> -     section_len = strlen(pcre_version());
>> +     /* write version of the regex back-end */
>> +     if (!regex_version())
>> +             goto err;
>> +     section_len = strlen(regex_version());
>>       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
>>       if (len != 1)
>>               goto err;
>> -     len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
>> +     len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
>>       if (len != section_len)
>>               goto err;
>>
>> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
>>               mode_t mode = specs[i].mode;
>>               size_t prefix_len = specs[i].prefix_len;
>>               int32_t stem_id = specs[i].stem_id;
>> -             pcre *re = specs[i].regex;
>> -             pcre_extra *sd = get_pcre_extra(&specs[i]);
>> +             struct regex_data *re = specs[i].regex;
>>               uint32_t to_write;
>> -             size_t size;
>>
>>               /* length of the context string (including nul) */
>>               to_write = strlen(context) + 1;
>> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
>>               if (len != 1)
>>                       goto err;
>>
>> -             /* determine the size of the pcre data in bytes */
>> -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
>> +             /* Write regex related data */
>> +             rc = regex_writef(re, bin_file);
>>               if (rc < 0)
>>                       goto err;
>> -
>> -             /* write the number of bytes in the pcre data */
>> -             to_write = size;
>> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
>> -             if (len != 1)
>> -                     goto err;
>> -
>> -             /* write the actual pcre data as a char array */
>> -             len = fwrite(re, 1, to_write, bin_file);
>> -             if (len != to_write)
>> -                     goto err;
>> -
>> -             if (sd) {
>> -                     /* determine the size of the pcre study info */
>> -                     rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
>> -                     if (rc < 0)
>> -                             goto err;
>> -             } else
>> -                     size = 0;
>> -
>> -             /* write the number of bytes in the pcre study data */
>> -             to_write = size;
>> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
>> -             if (len != 1)
>> -                     goto err;
>> -
>> -             if (sd) {
>> -                     /* write the actual pcre study data as a char array */
>> -                     len = fwrite(sd->study_data, 1, to_write, bin_file);
>> -                     if (len != to_write)
>> -                             goto err;
>> -             }
>>       }
>>
>>       rc = 0;
>> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
>>               free(specs[i].lr.ctx_trans);
>>               free(specs[i].regex_str);
>>               free(specs[i].type_str);
>> -             pcre_free(specs[i].regex);
>> -             pcre_free_study(specs[i].sd);
>> +             regex_data_free(specs[i].regex);
>>       }
>>       free(specs);
>>
>>
>
> _______________________________________________
> Selinux mailing list
> Selinux@tycho.nsa.gov
> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Selinux-request@tycho.nsa.gov.



-- 
Respectfully,

William C Roberts

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
  2016-09-07 15:37   ` William Roberts
  2016-09-07 16:40   ` William Roberts
@ 2016-09-07 17:16   ` Janis Danisevskis
  2 siblings, 0 replies; 13+ messages in thread
From: Janis Danisevskis @ 2016-09-07 17:16 UTC (permalink / raw)
  To: Stephen Smalley, Janis Danisevskis, selinux, seandroid-list, jwcart2

[-- Attachment #1: Type: text/plain, Size: 45800 bytes --]

On Wed, Sep 7, 2016 at 4:00 PM Stephen Smalley <sds@tycho.nsa.gov> wrote:

> On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> > From: Janis Danisevskis <jdanis@google.com>
> >
> > This patch moves all pcre1/2 dependencies into the new files regex.h
> > and regex.c implementing the common denominator of features needed
> > by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> > used implementations.
> >
> > As of this patch libselinux supports either pcre or pcre2 but not
> > both at the same time. The persistently stored file contexts
> > information differs. This means libselinux can only load file
> > context files generated by sefcontext_compile build with the
> > same pcre variant.
>
> Shouldn't the pcre variant be encoded in some manner in the
> file_contexts.bin file so that libselinux can tell immediately whether
> it is supported?
>

As the version of pcre is 8.x and pcre2 deliberately starts at 10.x, the
version string should be sufficient.


>
> > Also, for pcre2 the persistent format is architecture dependant.
> > Stored precompiled regular expressions can only be used on the
> > same architecture they were generated on. If pcre2 is used and
> > sefcontext_compile shall generate portable output, it and libselinux
> > must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> > cost of having to recompile the regular expressions at load time.
>
> Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
> point of using file_contexts.bin was to move the cost of compiling the
> regexes to build time rather than load time; if we cannot do that, then
> how much do we gain from using file_contexts.bin instead of just falling
> back to file_contexts?
>

We were thinking along the lines of tuning the build process to generate
the correct output for the corresponding target. But until this is done
this seamed a viable trade-off.
I could imagine the following alternative solutions:
- Add an option to sefcompile_context to ommit the prepared pattern
  and make libselinux aware that it may be missing.
- Add the target architecture and make libselinux fall back to
  run-time generation in case of a mismatch.
- Keep the NO_PERSISTENTLY_STORED_PATTERNS flag only
  in the android fork until we don't need it any more.

>
> The #ifdef maze makes it very hard to read and maintain this code; that
> needs to be refactored.
>

Ack. On it.
[upddate]
I published the refactored version here. But did not valgrind sanitize it
yet.
https://github.com/werwurm/selinux/commit/23838ba22fcbc658cad205a7d9924892a56f1ddf


>
> valgrind is reporting numerous errors, including both use of
> uninitialised values and memory leaks with both patches applied.  Try:
> make DESTDIR=~/obj CFLAGS+=-g clean install
> LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> ~/obj/usr/sbin/matchpathcon /etc
>
> On x86_64.
>
> Will provide review of the code itself later...
>
> >
> > Signed-off-by: Janis Danisevskis <jdanis@google.com>
> > ---
> >  libselinux/Makefile                   |  13 ++
> >  libselinux/src/Makefile               |   4 +-
> >  libselinux/src/label_file.c           |  91 ++------
> >  libselinux/src/label_file.h           |  54 ++---
> >  libselinux/src/regex.c                | 405
> ++++++++++++++++++++++++++++++++++
> >  libselinux/src/regex.h                | 168 ++++++++++++++
> >  libselinux/utils/Makefile             |   4 +-
> >  libselinux/utils/sefcontext_compile.c |  53 +----
> >  8 files changed, 637 insertions(+), 155 deletions(-)
> >  create mode 100644 libselinux/src/regex.c
> >  create mode 100644 libselinux/src/regex.h
> >
> > diff --git a/libselinux/Makefile b/libselinux/Makefile
> > index 6142b60..15d051e 100644
> > --- a/libselinux/Makefile
> > +++ b/libselinux/Makefile
> > @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
> >  endif
> >  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
> >
> > +USE_PCRE2 ?= n
> > +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> > +ifeq ($(USE_PCRE2),y)
> > +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> > +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> > +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> > +     endif
> > +     PCRE_LDFLAGS := -lpcre2-8
> > +else
> > +     PCRE_LDFLAGS := -lpcre
> > +endif
> > +export PCRE_CFLAGS PCRE_LDFLAGS
> > +
> >  all install relabel clean distclean indent:
> >       @for subdir in $(SUBDIRS); do \
> >               (cd $$subdir && $(MAKE) $@) || exit 1; \
> > diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> > index 37d01af..66687e6 100644
> > --- a/libselinux/src/Makefile
> > +++ b/libselinux/src/Makefile
> > @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
> -Wformat-security -Winit-self -Wmissi
> >            -fipa-pure-const -Wno-suggest-attribute=pure
> -Wno-suggest-attribute=const \
> >            -Werror -Wno-aggregate-return -Wno-redundant-decls
> >
> > -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS) $(PCRE_CFLAGS)
> >
> >  SWIG_CFLAGS += -Wno-error -Wno-unused-variable
> -Wno-unused-but-set-variable -Wno-unused-parameter \
> >               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes
> -Wno-missing-declarations
> > @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
> >       $(RANLIB) $@
> >
> >  $(LIBSO): $(LOBJS)
> > -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS)
> -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> > +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS)
> -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> >       ln -sf $@ $(TARGET)
> >
> >  $(LIBPC): $(LIBPC).in ../VERSION
> > diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> > index c89bb35..6698624 100644
> > --- a/libselinux/src/label_file.c
> > +++ b/libselinux/src/label_file.c
> > @@ -15,7 +15,6 @@
> >  #include <errno.h>
> >  #include <limits.h>
> >  #include <stdint.h>
> > -#include <pcre.h>
> >  #include <unistd.h>
> >  #include <sys/mman.h>
> >  #include <sys/types.h>
> > @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >               return -1;
> >
> >       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> > -             len = strlen(pcre_version());
> > +             if (!regex_version()) {
> > +                     return -1;
> > +             }
> > +             len = strlen(regex_version());
> >
> >               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >               if (rc < 0)
> > @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >               }
> >
> >               str_buf[entry_len] = '\0';
> > -             if ((strcmp(str_buf, pcre_version()) != 0)) {
> > +             if ((strcmp(str_buf, regex_version()) != 0)) {
> >                       free(str_buf);
> >                       return -1;
> >               }
> > @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >
> >               spec = &data->spec_arr[data->nspec];
> >               spec->from_mmap = 1;
> > +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> > +             spec->regcomp = 0;
> > +#else
> >               spec->regcomp = 1;
> > +#endif
> >
> >               /* Process context */
> >               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec,
> const char *path,
> >                       spec->prefix_len = prefix_len;
> >               }
> >
> > -             /* Process regex and study_data entries */
> > -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > -             if (rc < 0 || !entry_len) {
> > -                     rc = -1;
> > -                     goto err;
> > -             }
> > -             spec->regex = (pcre *)mmap_area->next_addr;
> > -             rc = next_entry(NULL, mmap_area, entry_len);
> > +             rc = regex_load_mmap(mmap_area, &spec->regex);
> >               if (rc < 0)
> >                       goto err;
> >
> > -             /* Check that regex lengths match. pcre_fullinfo()
> > -              * also validates its magic number. */
> > -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE,
> &len);
> > -             if (rc < 0 || len != entry_len) {
> > -                     rc = -1;
> > -                     goto err;
> > -             }
> > -
> > -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > -             if (rc < 0 || !entry_len) {
> > -                     rc = -1;
> > -                     goto err;
> > -             }
> > -
> > -             if (entry_len) {
> > -                     spec->lsd.study_data = (void
> *)mmap_area->next_addr;
> > -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> > -                     rc = next_entry(NULL, mmap_area, entry_len);
> > -                     if (rc < 0)
> > -                             goto err;
> > -
> > -                     /* Check that study data lengths match. */
> > -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
> > -                                        PCRE_INFO_STUDYSIZE, &len);
> > -                     if (rc < 0 || len != entry_len) {
> > -                             rc = -1;
> > -                             goto err;
> > -                     }
> > -             }
> > -
> >               data->nspec++;
> >       }
> >
> > @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
> >                       continue;
> >               free(spec->regex_str);
> >               free(spec->type_str);
> > -             if (spec->regcomp) {
> > -                     pcre_free(spec->regex);
> > -                     pcre_free_study(spec->sd);
> > -             }
> > +             regex_data_free(spec->regex);
> >       }
> >
> >       for (i = 0; i < (unsigned int)data->num_stems; i++) {
> > @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >  {
> >       struct saved_data *data = (struct saved_data *)rec->data;
> >       struct spec *spec_arr = data->spec_arr;
> > -     int i, rc, file_stem, pcre_options = 0;
> > +     int i, rc, file_stem;
> >       mode_t mode = (mode_t)type;
> >       const char *buf;
> >       struct spec *ret = NULL;
> >       char *clean_key = NULL;
> >       const char *prev_slash, *next_slash;
> >       unsigned int sofar = 0;
> > +     struct regex_error_data regex_error_data;
> >
> >       if (!data->nspec) {
> >               errno = ENOENT;
> > @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >       file_stem = find_stem_from_file(data, &buf);
> >       mode &= S_IFMT;
> >
> > -     if (partial)
> > -             pcre_options |= PCRE_PARTIAL_SOFT;
> > -
> >       /*
> >        * Check for matching specifications in reverse order, so that
> >        * the last matching specification is used.
> > @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct
> selabel_handle *rec,
> >                * a regex check        */
> >               if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
> >                   (!mode || !spec->mode || mode == spec->mode)) {
> > -                     if (compile_regex(data, spec, NULL) < 0)
> > +                     if (compile_regex(data, spec, &regex_error_data) <
> 0)
> >                               goto finish;
> >                       if (spec->stem_id == -1)
> > -                             rc = pcre_exec(spec->regex,
> > -                                                 get_pcre_extra(spec),
> > -                                                 key, strlen(key), 0,
> > -                                                 pcre_options, NULL, 0);
> > +                             rc = regex_match(spec->regex, key,
> partial);
> >                       else
> > -                             rc = pcre_exec(spec->regex,
> > -                                                 get_pcre_extra(spec),
> > -                                                 buf, strlen(buf), 0,
> > -                                                 pcre_options, NULL, 0);
> > -                     if (rc == 0) {
> > +                             rc = regex_match(spec->regex, buf,
> partial);
> > +                     if (rc == REGEX_MATCH) {
> >                               spec->matches++;
> >                               break;
> > -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
> > +                     } else if (partial && rc == REGEX_MATCH_PARTIAL)
> >                               break;
> >
> > -                     if (rc == PCRE_ERROR_NOMATCH)
> > +                     if (rc == REGEX_NO_MATCH)
> >                               continue;
> >
> >                       errno = ENOENT;
> > @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct
> selabel_handle *h1,
> >               }
> >
> >               if (spec1->regcomp && spec2->regcomp) {
> > -                     size_t len1, len2;
> > -                     int rc;
> > -
> > -                     rc = pcre_fullinfo(spec1->regex, NULL,
> PCRE_INFO_SIZE, &len1);
> > -                     assert(rc == 0);
> > -                     rc = pcre_fullinfo(spec2->regex, NULL,
> PCRE_INFO_SIZE, &len2);
> > -                     assert(rc == 0);
> > -                     if (len1 != len2 ||
> > -                         memcmp(spec1->regex, spec2->regex, len1))
> > +                     if (regex_cmp(spec1->regex, spec2->regex) ==
> SELABEL_INCOMPARABLE){
> >                               return incomp(spec1, spec2, "regex", i, j);
> > +                     }
> >               } else {
> >                       if (strcmp(spec1->regex_str, spec2->regex_str))
> >                               return incomp(spec1, spec2, "regex_str",
> i, j);
> > diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> > index 6d1e890..a2e30e5 100644
> > --- a/libselinux/src/label_file.h
> > +++ b/libselinux/src/label_file.h
> > @@ -6,6 +6,14 @@
> >
> >  #include <sys/stat.h>
> >
> > +/*
> > + * regex.h/c were introduced to hold all dependencies on the regular
> > + * expression back-end when we started supporting PCRE2. regex.h
> defines a
> > + * minimal interface required by libselinux, so that the remaining code
> > + * can be agnostic about the underlying implementation.
> > + */
> > +#include "regex.h"
> > +
> >  #include "callbacks.h"
> >  #include "label_internal.h"
> >
> > @@ -19,21 +27,12 @@
> >
> >  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS
>  SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
> >
> > -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> > -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> > -#define pcre_free_study  pcre_free
> > -#endif
> > -
> >  /* A file security context specification. */
> >  struct spec {
> >       struct selabel_lookup_rec lr;   /* holds contexts for lookup
> result */
> >       char *regex_str;        /* regular expession string for
> diagnostics */
> >       char *type_str;         /* type string for diagnostic messages */
> > -     pcre *regex;            /* compiled regular expression */
> > -     union {
> > -             pcre_extra *sd; /* pointer to extra compiled stuff */
> > -             pcre_extra lsd; /* used to hold the mmap'd version */
> > -     };
> > +     struct regex_data * regex; /* backend dependent regular expression
> data */
> >       mode_t mode;            /* mode format value */
> >       int matches;            /* number of matching pathnames */
> >       int stem_id;            /* indicates which stem-compression item */
> > @@ -78,17 +77,6 @@ struct saved_data {
> >       struct mmap_area *mmap_areas;
> >  };
> >
> > -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> > -{
> > -     if (spec->from_mmap) {
> > -             if (spec->lsd.study_data)
> > -                     return &spec->lsd;
> > -             else
> > -                     return NULL;
> > -     } else
> > -             return spec->sd;
> > -}
> > -
> >  static inline mode_t string_to_mode(char *mode)
> >  {
> >       size_t len;
> > @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct
> mmap_area *fp, size_t bytes)
> >  }
> >
> >  static inline int compile_regex(struct saved_data *data, struct spec
> *spec,
> > -                                         const char **errbuf)
> > +                                         struct regex_error_data *
> error_data)
> >  {
> > -     const char *tmperrbuf;
> >       char *reg_buf, *anchored_regex, *cp;
> >       struct stem *stem_arr = data->stem_arr;
> >       size_t len;
> > -     int erroff;
> > +     int rc;
> >
> >       if (spec->regcomp)
> >               return 0; /* already done */
> > @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data
> *data, struct spec *spec,
> >       *cp = '\0';
> >
> >       /* Compile the regular expression. */
> > -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
> > -                                                 &erroff, NULL);
> > +     rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
> >       free(anchored_regex);
> > -     if (!spec->regex) {
> > -             if (errbuf)
> > -                     *errbuf = tmperrbuf;
> > -             return -1;
> > -     }
> > -
> > -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> > -     if (!spec->sd && tmperrbuf) {
> > -             if (errbuf)
> > -                     *errbuf = tmperrbuf;
> > +     if (rc < 0) {
> >               return -1;
> >       }
> >
> > @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle
> *rec,
> >       struct saved_data *data = (struct saved_data *)rec->data;
> >       struct spec *spec_arr;
> >       unsigned int nspec = data->nspec;
> > -     const char *errbuf = NULL;
> > +     char const *errbuf;
> > +     struct regex_error_data error_data;
> >
> >       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type,
> &context);
> >       if (items < 0) {
> > @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle
> *rec,
> >       data->nspec++;
> >
> >       if (rec->validating &&
> > -                         compile_regex(data, &spec_arr[nspec],
> &errbuf)) {
> > +                         compile_regex(data, &spec_arr[nspec],
> &error_data)) {
> >               COMPAT_LOG(SELINUX_ERROR,
> >                          "%s:  line %u has invalid regex %s:  %s\n",
> >                          path, lineno, regex,
> > diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> > new file mode 100644
> > index 0000000..6b92b04
> > --- /dev/null
> > +++ b/libselinux/src/regex.c
> > @@ -0,0 +1,405 @@
> > +#include <assert.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> > +#include <string.h>
> > +
> > +#include "regex.h"
> > +#include "label_file.h"
> > +
> > +int regex_prepare_data(struct regex_data ** regex, char const *
> pattern_string,
> > +                     struct regex_error_data * errordata) {
> > +     memset(errordata, 0, sizeof(struct regex_error_data));
> > +     *regex = regex_data_create();
> > +     if (!(*regex))
> > +             return -1;
> > +#ifdef USE_PCRE2
> > +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> > +                     PCRE2_ZERO_TERMINATED,
> > +                     PCRE2_DOTALL,
> > +                     &errordata->error_code,
> > +                     &errordata->error_offset, NULL);
> > +#else
> > +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> > +                                     &errordata->error_buffer,
> > +                                     &errordata->error_offset, NULL);
> > +#endif
> > +     if (!(*regex)->regex) {
> > +             goto err;
> > +     }
> > +
> > +#ifdef USE_PCRE2
> > +     (*regex)->match_data =
> > +             pcre2_match_data_create_from_pattern((*regex)->regex,
> NULL);
> > +     if (!(*regex)->match_data) {
> > +             goto err;
> > +     }
> > +#else
> > +     (*regex)->sd = pcre_study((*regex)->regex, 0,
> &errordata->error_buffer);
> > +     if (!(*regex)->sd && errordata->error_buffer) {
> > +             goto err;
> > +     }
> > +     (*regex)->extra_owned = !!(*regex)->sd;
> > +#endif
> > +     return 0;
> > +
> > +err: regex_data_free(*regex);
> > +     *regex = NULL;
> > +     return -1;
> > +}
> > +
> > +char const * regex_version(void) {
> > +#ifdef USE_PCRE2
> > +     static int initialized = 0;
> > +     static char * version_string = NULL;
> > +     size_t version_string_len;
> > +     if (!initialized) {
> > +             version_string_len = pcre2_config(PCRE2_CONFIG_VERSION,
> NULL);
> > +             version_string = (char*) malloc(version_string_len);
> > +             if (!version_string) {
> > +                     return NULL;
> > +             }
> > +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> > +             initialized = 1;
> > +     }
> > +     return version_string;
> > +#else
> > +     return pcre_version();
> > +#endif
> > +}
> > +
> > +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data **
> regex) {
> > +     int rc;
> > +     size_t entry_len;
> > +#ifndef USE_PCRE2
> > +     size_t info_len;
> > +#endif
> > +
> > +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > +#ifdef USE_PCRE2
> > +     if (rc < 0)
> > +             return -1;
> > +
> > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > +     /* this should yield exactly one because we store one pattern at a
> time
> > +      */
> > +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> > +     if (rc != 1)
> > +             return -1;
> > +
> > +     *regex = regex_data_create();
> > +     if (!*regex)
> > +             return -1;
> > +
> > +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> > +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
> > +     if (rc != 1)
> > +             goto err;
> > +
> > +     (*regex)->match_data =
> > +             pcre2_match_data_create_from_pattern((*regex)->regex,
> NULL);
> > +     if (!(*regex)->match_data)
> > +             goto err;
> > +
> > +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> > +     /* and skip the decoded bit */
> > +     rc = next_entry(NULL, mmap_area, entry_len);
> > +     if (rc < 0)
> > +             goto err;
> > +
> > +     return 0;
> > +#else
> > +     if (rc < 0 || !entry_len) {
> > +             return -1;
> > +     }
> > +     *regex = regex_data_create();
> > +     if (!(*regex))
> > +             return -1;
> > +
> > +     (*regex)->extra_owned = 0;
> > +     (*regex)->regex = (pcre *) mmap_area->next_addr;
> > +     rc = next_entry(NULL, mmap_area, entry_len);
> > +     if (rc < 0)
> > +             goto err;
> > +
> > +     /* Check that regex lengths match. pcre_fullinfo()
> > +      * also validates its magic number. */
> > +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE,
> &info_len);
> > +     if (rc < 0 || info_len != entry_len) {
> > +             goto err;
> > +     }
> > +
> > +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > +     if (rc < 0 || !entry_len) {
> > +             goto err;
> > +     }
> > +
> > +     if (entry_len) {
> > +             (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> > +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> > +             rc = next_entry(NULL, mmap_area, entry_len);
> > +             if (rc < 0)
> > +                     goto err;
> > +
> > +             /* Check that study data lengths match. */
> > +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> > +                                PCRE_INFO_STUDYSIZE, &info_len);
> > +             if (rc < 0 || info_len != entry_len)
> > +                     goto err;
> > +     }
> > +     return 0;
> > +#endif
> > +err:
> > +     regex_data_free(*regex);
> > +     *regex = NULL;
> > +     return -1;
> > +}
> > +
> > +int regex_writef(struct regex_data * regex, FILE * fp) {
> > +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> > +     int rc;
> > +#endif
> > +     size_t len;
> > +#ifdef USE_PCRE2
> > +     PCRE2_SIZE to_write;
> > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > +     PCRE2_UCHAR * bytes;
> > +
> > +     /* encode the patter for serialization */
> > +     rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> > +                                 &bytes, &to_write, NULL);
> > +     if (rc != 1)
> > +             return -1;
> > +
> > +#else
> > +     (void)regex; // silence unused parameter warning
> > +     to_write = 0;
> > +#endif
> > +     /* write serialized pattern's size */
> > +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > +     if (len != 1) {
> > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > +             pcre2_serialize_free(bytes);
> > +#endif
> > +             return -1;
> > +     }
> > +
> > +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > +     /* write serialized pattern */
> > +     len = fwrite(bytes, 1, to_write, fp);
> > +     if (len != to_write) {
> > +             pcre2_serialize_free(bytes);
> > +             return -1;
> > +     }
> > +     pcre2_serialize_free(bytes);
> > +#endif
> > +#else
> > +     uint32_t to_write;
> > +     size_t size;
> > +     pcre_extra * sd = regex->extra_owned ? regex->sd :
> > +                     (regex->lsd.study_data ? &regex->lsd : NULL);
> > +
> > +     /* determine the size of the pcre data in bytes */
> > +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> > +     if (rc < 0)
> > +             return -1;
> > +
> > +     /* write the number of bytes in the pcre data */
> > +     to_write = size;
> > +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > +     if (len != 1)
> > +             return -1;
> > +
> > +     /* write the actual pcre data as a char array */
> > +     len = fwrite(regex->regex, 1, to_write, fp);
> > +     if (len != to_write)
> > +             return -1;
> > +
> > +     if (sd) {
> > +             /* determine the size of the pcre study info */
> > +             rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> > +                             &size);
> > +             if (rc < 0)
> > +                     return -1;
> > +     } else
> > +             size = 0;
> > +
> > +     /* write the number of bytes in the pcre study data */
> > +     to_write = size;
> > +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > +     if (len != 1)
> > +             return -1;
> > +
> > +     if (sd) {
> > +             /* write the actual pcre study data as a char array */
> > +             len = fwrite(sd->study_data, 1, to_write, fp);
> > +             if (len != to_write)
> > +                     return -1;
> > +     }
> > +#endif
> > +     return 0;
> > +}
> > +
> > +struct regex_data * regex_data_create(void) {
> > +     struct regex_data * dummy = (struct regex_data*) malloc(
> > +                     sizeof(struct regex_data));
> > +     if (dummy) {
> > +             memset(dummy, 0, sizeof(struct regex_data));
> > +     }
> > +     return dummy;
> > +}
> > +
> > +void regex_data_free(struct regex_data * regex) {
> > +     if (regex) {
> > +#ifdef USE_PCRE2
> > +             if (regex->regex) {
> > +                     pcre2_code_free(regex->regex);
> > +             }
> > +             if (regex->match_data) {
> > +                     pcre2_match_data_free(regex->match_data);
> > +             }
> > +#else
> > +             if (regex->regex)
> > +                     pcre_free(regex->regex);
> > +             if (regex->extra_owned && regex->sd) {
> > +                     pcre_free_study(regex->sd);
> > +             }
> > +#endif
> > +             free(regex);
> > +     }
> > +}
> > +
> > +int regex_match(struct regex_data * regex, char const * subject, int
> partial) {
> > +     int rc;
> > +#ifdef USE_PCRE2
> > +     rc = pcre2_match(regex->regex,
> > +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> > +                     partial ? PCRE2_PARTIAL_SOFT : 0,
> regex->match_data,
> > +                     NULL);
> > +     if (rc > 0)
> > +     return REGEX_MATCH;
> > +     switch (rc) {
> > +             case PCRE2_ERROR_PARTIAL:
> > +                     return REGEX_MATCH_PARTIAL;
> > +             case PCRE2_ERROR_NOMATCH:
> > +                     return REGEX_NO_MATCH;
> > +             default:
> > +                     return REGEX_ERROR;
> > +     }
> > +#else
> > +     rc = pcre_exec(regex->regex,
> > +                     regex->extra_owned ? regex->sd : &regex->lsd,
> subject,
> > +                     strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT :
> 0,
> > +                     NULL,
> > +                     0);
> > +     switch (rc) {
> > +             case 0:
> > +                     return REGEX_MATCH;
> > +             case PCRE_ERROR_PARTIAL:
> > +                     return REGEX_MATCH_PARTIAL;
> > +             case PCRE_ERROR_NOMATCH:
> > +                     return REGEX_NO_MATCH;
> > +             default:
> > +                     return REGEX_ERROR;
> > +     }
> > +#endif
> > +}
> > +
> > +/* TODO Replace this compare function with something that actually
> compares the
> > + * regular expressions.
> > + * This compare function basically just compares the binary
> representations of
> > + * the automatons, and because this representation contains pointers and
> > + * metadata, it can only return a match if regex1 == regex2.
> > + * Preferably, this function would be replaced with an algorithm that
> computes
> > + * the equivalence of the automatons systematically.
> > + */
> > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> > +     int rc;
> > +     size_t len1, len2;
> > +#ifdef USE_PCRE2
> > +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> > +     assert(rc == 0);
> > +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> > +     assert(rc == 0);
> > +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> > +             return SELABEL_INCOMPARABLE;
> > +#else
> > +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> > +     assert(rc == 0);
> > +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> > +     assert(rc == 0);
> > +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> > +             return SELABEL_INCOMPARABLE;
> > +#endif
> > +     return SELABEL_EQUAL;
> > +}
> > +
> > +void regex_format_error(struct regex_error_data const * error_data,
> > +                     char * buffer, size_t buf_size) {
> > +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> > +     char * ptr = &buffer[buf_size - the_end_length];
> > +     int rc = 0;
> > +     size_t pos = 0;
> > +     if (!buffer || !buf_size)
> > +             return;
> > +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> > +     if (rc < 0)
> > +             /* If snprintf fails it constitutes a logical error that
> needs
> > +              * fixing.
> > +              */
> > +             abort();
> > +
> > +     pos += rc;
> > +     if (pos >= buf_size)
> > +             goto truncated;
> > +
> > +     if (error_data->error_offset > 0) {
> > +#ifdef USE_PCRE2
> > +             rc = snprintf(buffer + pos, buf_size - pos, "At offset
> %zu: ",
> > +                             error_data->error_offset);
> > +#else
> > +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %d:
> ",
> > +                             error_data->error_offset);
> > +#endif
> > +             if (rc < 0)
> > +                     abort();
> > +
> > +     }
> > +     pos += rc;
> > +     if (pos >= buf_size)
> > +             goto truncated;
> > +
> > +#ifdef USE_PCRE2
> > +     rc = pcre2_get_error_message(error_data->error_code,
> > +                     (PCRE2_UCHAR*)(buffer + pos),
> > +                     buf_size - pos);
> > +     if (rc == PCRE2_ERROR_NOMEMORY)
> > +             goto truncated;
> > +#else
> > +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
> > +                     error_data->error_buffer);
> > +     if (rc < 0)
> > +             abort();
> > +
> > +     if ((size_t)rc < strlen(error_data->error_buffer))
> > +             goto truncated;
> > +#endif
> > +
> > +     return;
> > +
> > +truncated:
> > +     /* replace end of string with "..." to indicate that it was
> truncated */
> > +     switch (the_end_length) {
> > +             /* no break statements, fall-through is intended */
> > +             case 4:
> > +                     *ptr++ = '.';
> > +             case 3:
> > +                     *ptr++ = '.';
> > +             case 2:
> > +                     *ptr++ = '.';
> > +             case 1:
> > +                     *ptr++ = '\0';
> > +             default:
> > +                     break;
> > +     }
> > +     return;
> > +}
> > diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> > new file mode 100644
> > index 0000000..bdc10c0
> > --- /dev/null
> > +++ b/libselinux/src/regex.h
> > @@ -0,0 +1,168 @@
> > +#ifndef SRC_REGEX_H_
> > +#define SRC_REGEX_H_
> > +
> > +#include <stdio.h>
> > +
> > +#ifdef USE_PCRE2
> > +#include <pcre2.h>
> > +#else
> > +#include <pcre.h>
> > +#endif
> > +
> > +enum {
> > +     REGEX_MATCH,
> > +     REGEX_MATCH_PARTIAL,
> > +     REGEX_NO_MATCH,
> > +     REGEX_ERROR = -1,
> > +};
> > +
> > +#ifdef USE_PCRE2
> > +struct regex_data {
> > +     pcre2_code * regex; /* compiled regular expression */
> > +     pcre2_match_data * match_data; /* match data block required for
> the compiled
> > +      pattern in regex2 */
> > +};
> > +
> > +struct regex_error_data {
> > +     int error_code;
> > +     PCRE2_SIZE error_offset;
> > +};
> > +
> > +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> > +#else
> > +/* vvvvvv USE_PCRE vvvvvv */
> > +
> > +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> > +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> > +#define pcre_free_study  pcre_free
> > +#endif
> > +
> > +struct regex_data {
> > +     pcre *regex; /* compiled regular expression */
> > +     int extra_owned; /* non zero if pcre_extra is owned by this
> structure
> > +                       * and thus must be freed on destruction.
> > +                       */
> > +     union {
> > +             pcre_extra *sd; /* pointer to extra compiled stuff */
> > +             pcre_extra lsd; /* used to hold the mmap'd version */
> > +     };
> > +};
> > +
> > +struct regex_error_data {
> > +     char const * error_buffer;
> > +     int error_offset;
> > +};
> > +
> > +#endif /* USE_PCRE2 */
> > +
> > +struct mmap_area;
> > +
> > +/**
> > + * regex_verison returns the version string of the underlying regular
> > + * regular expressions library. In the case of PCRE it just returns the
> > + * result of pcre_version(). In the case of PCRE2, the very first time
> this
> > + * function is called it allocates a buffer large enough to hold the
> version
> > + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> > + * The allocated buffer will linger in memory until the calling process
> is being
> > + * reaped.
> > + *
> > + * It may return NULL on error.
> > + */
> > +char const * regex_version(void);
> > +/**
> > + * This constructor function allocates a buffer for a regex_data
> structure.
> > + * The buffer is being initialized with zeroes.
> > + */
> > +struct regex_data * regex_data_create(void);
> > +/**
> > + * This complementary destructor function frees the a given regex_data
> buffer.
> > + * It also frees any non NULL member pointers with the appropriate
> pcreX_X_free
> > + * function. For PCRE this function respects the extra_owned field and
> frees
> > + * the pcre_extra data conditionally. Calling this function on a NULL
> pointer is
> > + * save.
> > + */
> > +void regex_data_free(struct regex_data * regex);
> > +/**
> > + * This function compiles the regular expression. Additionally, it
> prepares
> > + * data structures required by the different underlying engines. For
> PCRE
> > + * it calls pcre_study to generate optional data required for optimized
> > + * execution of the compiled pattern. In the case of PCRE2, it allocates
> > + * a pcre2_match_data structure of appropriate size to hold all possible
> > + * matches created by the pattern.
> > + *
> > + * @arg regex If successful, the structure returned through *regex was
> allocated
> > + *            with regex_data_create and must be freed with
> regex_data_free.
> > + * @arg pattern_string The pattern string that is to be compiled.
> > + * @arg errordata A pointer to a regex_error_data structure must be
> passed
> > + *                to this function. This structure depends on the
> underlying
> > + *                implementation. It can be passed to regex_format_error
> > + *                to generate a human readable error message.
> > + * @retval 0 on success
> > + * @retval -1 on error
> > + */
> > +int regex_prepare_data(struct regex_data ** regex, char const *
> pattern_string,
> > +                     struct regex_error_data * errordata);
> > +/**
> > + * This function loads a serialized precompiled pattern from a
> contiguous
> > + * data region given by map_area.
> > + *
> > + * @arg map_area Description of the memory region holding a serialized
> > + *               representation of the precompiled pattern.
> > + * @arg regex If successful, the structure returned through *regex was
> allocated
> > + *            with regex_data_create and must be freed with
> regex_data_free.
> > + *
> > + * @retval 0 on success
> > + * @retval -1 on error
> > + */
> > +int regex_load_mmap(struct mmap_area * map_area, struct regex_data **
> regex);
> > +/**
> > + * This function stores a precompiled regular expression to a file.
> > + * In the case of PCRE, it just dumps the binary representation of the
> > + * precomplied pattern into a file. In the case of PCRE2, it uses the
> > + * serialization function provided by the library.
> > + *
> > + * @arg regex The precomplied regular expression data.
> > + * @arg fp A file stream specifying the output file.
> > + */
> > +int regex_writef(struct regex_data * regex, FILE * fp);
> > +/**
> > + * This function applies a precompiled pattern to a subject string and
> > + * returns whether or not a match was found.
> > + *
> > + * @arg regex The precompiled pattern.
> > + * @arg subject The subject string.
> > + * @arg partial Boolean indicating if partial matches are wanted. A
> nonzero
> > + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT
> as
> > + *              option to pcre_exec of pcre2_match.
> > + * @retval REGEX_MATCH if a match was found
> > + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> > + * @retval REGEX_NO_MATCH if no match was found
> > + * @retval REGEX_ERROR if an error was encountered during the execution
> of the
> > + *                     regular expression
> > + */
> > +int regex_match(struct regex_data * regex, char const * subject, int
> partial);
> > +/**
> > + * This function compares two compiled regular expressions (regex1 and
> regex2).
> > + * It compares the binary representations of the compiled patterns. It
> is a very
> > + * crude approximation because the binary representation holds data like
> > + * reference counters, that has nothing to do with the actual state
> machine.
> > + *
> > + * @retval SELABEL_EQUAL if the pattern's binary representations are
> exactly
> > + *                       the same
> > + * @retval SELABEL_INCOMPARABLE otherwise
> > + */
> > +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
> > +/**
> > + * This function takes the error data returned by regex_prepare_data
> and turns
> > + * it in to a human readable error message.
> > + * If the buffer given to hold the error message is to small it
> truncates the
> > + * message and indicates the truncation with an ellipsis ("...") at the
> end of
> > + * the buffer.
> > + *
> > + * @arg error_data Error data as returned by regex_prepare_data.
> > + * @arg buffer String buffer to hold the formated error string.
> > + * @arg buf_size Total size of the given bufer in bytes.
> > + */
> > +void regex_format_error(struct regex_error_data const * error_data,
> > +                     char * buffer, size_t buf_size);
> > +#endif  /* SRC_REGEX_H_ */
> > diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> > index 8497cb4..1e7a048 100644
> > --- a/libselinux/utils/Makefile
> > +++ b/libselinux/utils/Makefile
> > @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
> -Wformat-security -Winit-self -Wmissi
> >            -fasynchronous-unwind-tables -fdiagnostics-show-option
> -funit-at-a-time \
> >            -fipa-pure-const -Wno-suggest-attribute=pure
> -Wno-suggest-attribute=const \
> >            -Werror -Wno-aggregate-return -Wno-redundant-decls
> > -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> > +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
> $(EMFLAGS) $(PCRE_CFLAGS)
> >  LDLIBS += -L../src -lselinux -L$(LIBDIR)
> >
> >  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
> >
> > -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> > +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a
> -lsepol
> >
> >  selinux_restorecon: LDLIBS += -lsepol
> >
> > diff --git a/libselinux/utils/sefcontext_compile.c
> b/libselinux/utils/sefcontext_compile.c
> > index fd6fb78..8ff73f4 100644
> > --- a/libselinux/utils/sefcontext_compile.c
> > +++ b/libselinux/utils/sefcontext_compile.c
> > @@ -1,6 +1,5 @@
> >  #include <ctype.h>
> >  #include <errno.h>
> > -#include <pcre.h>
> >  #include <stdint.h>
> >  #include <stdio.h>
> >  #include <string.h>
> > @@ -13,6 +12,7 @@
> >  #include <sepol/sepol.h>
> >
> >  #include "../src/label_file.h"
> > +#include "../src/regex.h"
> >
> >  const char *policy_file;
> >  static int ctx_err;
> > @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >       if (len != 1)
> >               goto err;
> >
> > -     /* write the pcre version */
> > -     section_len = strlen(pcre_version());
> > +     /* write version of the regex back-end */
> > +     if (!regex_version())
> > +             goto err;
> > +     section_len = strlen(regex_version());
> >       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
> >       if (len != 1)
> >               goto err;
> > -     len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> > +     len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
> >       if (len != section_len)
> >               goto err;
> >
> > @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >               mode_t mode = specs[i].mode;
> >               size_t prefix_len = specs[i].prefix_len;
> >               int32_t stem_id = specs[i].stem_id;
> > -             pcre *re = specs[i].regex;
> > -             pcre_extra *sd = get_pcre_extra(&specs[i]);
> > +             struct regex_data *re = specs[i].regex;
> >               uint32_t to_write;
> > -             size_t size;
> >
> >               /* length of the context string (including nul) */
> >               to_write = strlen(context) + 1;
> > @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data
> *data, int fd)
> >               if (len != 1)
> >                       goto err;
> >
> > -             /* determine the size of the pcre data in bytes */
> > -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> > +             /* Write regex related data */
> > +             rc = regex_writef(re, bin_file);
> >               if (rc < 0)
> >                       goto err;
> > -
> > -             /* write the number of bytes in the pcre data */
> > -             to_write = size;
> > -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> > -             if (len != 1)
> > -                     goto err;
> > -
> > -             /* write the actual pcre data as a char array */
> > -             len = fwrite(re, 1, to_write, bin_file);
> > -             if (len != to_write)
> > -                     goto err;
> > -
> > -             if (sd) {
> > -                     /* determine the size of the pcre study info */
> > -                     rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE,
> &size);
> > -                     if (rc < 0)
> > -                             goto err;
> > -             } else
> > -                     size = 0;
> > -
> > -             /* write the number of bytes in the pcre study data */
> > -             to_write = size;
> > -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> > -             if (len != 1)
> > -                     goto err;
> > -
> > -             if (sd) {
> > -                     /* write the actual pcre study data as a char
> array */
> > -                     len = fwrite(sd->study_data, 1, to_write,
> bin_file);
> > -                     if (len != to_write)
> > -                             goto err;
> > -             }
> >       }
> >
> >       rc = 0;
> > @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
> >               free(specs[i].lr.ctx_trans);
> >               free(specs[i].regex_str);
> >               free(specs[i].type_str);
> > -             pcre_free(specs[i].regex);
> > -             pcre_free_study(specs[i].sd);
> > +             regex_data_free(specs[i].regex);
> >       }
> >       free(specs);
> >
> >
>
>

[-- Attachment #2: Type: text/html, Size: 78895 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07  8:08 [PATCH 1/2] libselinux: add support for pcre2 Janis Danisevskis
                   ` (2 preceding siblings ...)
  2016-09-07 15:19 ` William Roberts
@ 2016-09-07 18:25 ` Stephen Smalley
  2016-09-07 18:38   ` Stephen Smalley
  2016-09-07 19:29   ` Stephen Smalley
  3 siblings, 2 replies; 13+ messages in thread
From: Stephen Smalley @ 2016-09-07 18:25 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2; +Cc: Janis Danisevskis

On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> From: Janis Danisevskis <jdanis@google.com>
> 
> This patch moves all pcre1/2 dependencies into the new files regex.h
> and regex.c implementing the common denominator of features needed
> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> used implementations.
> 
> As of this patch libselinux supports either pcre or pcre2 but not
> both at the same time. The persistently stored file contexts
> information differs. This means libselinux can only load file
> context files generated by sefcontext_compile build with the
> same pcre variant.
> 
> Also, for pcre2 the persistent format is architecture dependant.
> Stored precompiled regular expressions can only be used on the
> same architecture they were generated on. If pcre2 is used and
> sefcontext_compile shall generate portable output, it and libselinux
> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> cost of having to recompile the regular expressions at load time.
> 
> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> ---
>  libselinux/Makefile                   |  13 ++
>  libselinux/src/Makefile               |   4 +-
>  libselinux/src/label_file.c           |  91 ++------
>  libselinux/src/label_file.h           |  54 ++---
>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>  libselinux/src/regex.h                | 168 ++++++++++++++
>  libselinux/utils/Makefile             |   4 +-
>  libselinux/utils/sefcontext_compile.c |  53 +----
>  8 files changed, 637 insertions(+), 155 deletions(-)
>  create mode 100644 libselinux/src/regex.c
>  create mode 100644 libselinux/src/regex.h
> 

> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> index c89bb35..6698624 100644
> --- a/libselinux/src/label_file.c
> +++ b/libselinux/src/label_file.c
> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>  
>  		spec = &data->spec_arr[data->nspec];
>  		spec->from_mmap = 1;
> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> +		spec->regcomp = 0;
> +#else
>  		spec->regcomp = 1;
> +#endif

If we still need this, maybe regex_load_mmap() should take
&spec->regcomp as an argument and set it internally so that we don't
need to litter this file with #ifdefs?

> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index 6d1e890..a2e30e5 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>  	struct saved_data *data = (struct saved_data *)rec->data;
>  	struct spec *spec_arr;
>  	unsigned int nspec = data->nspec;
> -	const char *errbuf = NULL;
> +	char const *errbuf;
> +	struct regex_error_data error_data;
>  
>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>  	if (items < 0) {
> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>  	data->nspec++;
>  
>  	if (rec->validating &&
> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {
>  		COMPAT_LOG(SELINUX_ERROR,
>  			   "%s:  line %u has invalid regex %s:  %s\n",
>  			   path, lineno, regex,

On the next line (omitted from the diff) we pass errbuf if set as the
error string.  But your error is hidden in error_data.  Looks like we
need to use regex_format_error() here?

> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> new file mode 100644
> index 0000000..6b92b04
> --- /dev/null
> +++ b/libselinux/src/regex.c
> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> +	int rc;
> +	size_t entry_len;
> +#ifndef USE_PCRE2
> +	size_t info_len;
> +#endif
> +
> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));

This and similar statements are the cause of your uninitialised variable
use warnings.  entry_len needs to be a uint32_t here.  size_t is 64 bits
on 64-bit architectures.  Same for info_len.

> +struct regex_data * regex_data_create(void) {
> +	struct regex_data * dummy = (struct regex_data*) malloc(
> +			sizeof(struct regex_data));
> +	if (dummy) {
> +		memset(dummy, 0, sizeof(struct regex_data));
> +	}
> +	return dummy;
> +}
> +
> +void regex_data_free(struct regex_data * regex) {
> +	if (regex) {
> +#ifdef USE_PCRE2
> +		if (regex->regex) {
> +			pcre2_code_free(regex->regex);
> +		}
> +		if (regex->match_data) {
> +			pcre2_match_data_free(regex->match_data);
> +		}
> +#else
> +		if (regex->regex)
> +			pcre_free(regex->regex);
> +		if (regex->extra_owned && regex->sd) {
> +			pcre_free_study(regex->sd);
> +		}
> +#endif
> +		free(regex);
> +	}
> +}

The reason you are leaking memory is that regex_data_free() is only ever
called if !spec->from_mmap.  The old code in closef() to free the
compiled regexes was only necessary when the regexes were compiled at
runtime, but you have introduced a memory allocation for regex_data even
for the mmap'd file that needs to be freed.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 16:40   ` William Roberts
@ 2016-09-07 18:29     ` Jason Zaman
       [not found]       ` <CAFftDdqSUHPVVn7megAAHyjn_14XvVqn+8ukywr8nCgwMH4X3g@mail.gmail.com>
  0 siblings, 1 reply; 13+ messages in thread
From: Jason Zaman @ 2016-09-07 18:29 UTC (permalink / raw)
  To: William Roberts
  Cc: Stephen Smalley, Janis Danisevskis, seandroid-list,
	Janis Danisevskis, selinux

On Wed, Sep 07, 2016 at 09:40:43AM -0700, William Roberts wrote:
> On Wed, Sep 7, 2016 at 8:02 AM, Stephen Smalley <sds@tycho.nsa.gov> wrote:
> > On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> >> From: Janis Danisevskis <jdanis@google.com>
> >>
> >> This patch moves all pcre1/2 dependencies into the new files regex.h
> >> and regex.c implementing the common denominator of features needed
> >> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> >> used implementations.
> >>
> >> As of this patch libselinux supports either pcre or pcre2 but not
> >> both at the same time. The persistently stored file contexts
> >> information differs. This means libselinux can only load file
> >> context files generated by sefcontext_compile build with the
> >> same pcre variant.
> >
> > Shouldn't the pcre variant be encoded in some manner in the
> > file_contexts.bin file so that libselinux can tell immediately whether
> > it is supported?
> 
> Don't we have that in pcre_version()?
> 
> >
> >> Also, for pcre2 the persistent format is architecture dependant.
> >> Stored precompiled regular expressions can only be used on the
> >> same architecture they were generated on. If pcre2 is used and
> >> sefcontext_compile shall generate portable output, it and libselinux
> >> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> >> cost of having to recompile the regular expressions at load time.

I'd rather the arch was just added to the compiled files like is done
for pcre version. It may not be a bad idea to add arch even for pcre1
since those also seemed quite fragile in the past.

> > Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?  The
> > point of using file_contexts.bin was to move the cost of compiling the
> > regexes to build time rather than load time; if we cannot do that, then
> > how much do we gain from using file_contexts.bin instead of just falling
> > back to file_contexts?
> >
> > The #ifdef maze makes it very hard to read and maintain this code; that
> > needs to be refactored.
> 
> Perhaps set up some function pointers and hide the regex structure to be opaque
> to the rest of selinux. This makes me think, should we just dlopen the
> correct version of libpcre based on pcre_version()? Not sure how you
> feel about dlopen calls....

Please no :(. dlopen makes things harder to track in a distro. there are
a ton of tools in gentoo that figure things out with ldd and dont work
at all with dlopen (like keeping old libs around if other packages link
to old .so versions and whatnot).

-- Jason

> > valgrind is reporting numerous errors, including both use of
> > uninitialised values and memory leaks with both patches applied.  Try:
> > make DESTDIR=~/obj CFLAGS+=-g clean install
> > LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> > ~/obj/usr/sbin/matchpathcon /etc
> >
> > On x86_64.
> >
> > Will provide review of the code itself later...
> >
> >>
> >> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> >> ---
> >>  libselinux/Makefile                   |  13 ++
> >>  libselinux/src/Makefile               |   4 +-
> >>  libselinux/src/label_file.c           |  91 ++------
> >>  libselinux/src/label_file.h           |  54 ++---
> >>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
> >>  libselinux/src/regex.h                | 168 ++++++++++++++
> >>  libselinux/utils/Makefile             |   4 +-
> >>  libselinux/utils/sefcontext_compile.c |  53 +----
> >>  8 files changed, 637 insertions(+), 155 deletions(-)
> >>  create mode 100644 libselinux/src/regex.c
> >>  create mode 100644 libselinux/src/regex.h
> >>
> >> diff --git a/libselinux/Makefile b/libselinux/Makefile
> >> index 6142b60..15d051e 100644
> >> --- a/libselinux/Makefile
> >> +++ b/libselinux/Makefile
> >> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
> >>  endif
> >>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
> >>
> >> +USE_PCRE2 ?= n
> >> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> >> +ifeq ($(USE_PCRE2),y)
> >> +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> >> +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> >> +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> >> +     endif
> >> +     PCRE_LDFLAGS := -lpcre2-8
> >> +else
> >> +     PCRE_LDFLAGS := -lpcre
> >> +endif
> >> +export PCRE_CFLAGS PCRE_LDFLAGS
> >> +
> >>  all install relabel clean distclean indent:
> >>       @for subdir in $(SUBDIRS); do \
> >>               (cd $$subdir && $(MAKE) $@) || exit 1; \
> >> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> >> index 37d01af..66687e6 100644
> >> --- a/libselinux/src/Makefile
> >> +++ b/libselinux/src/Makefile
> >> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
> >>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
> >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> >>
> >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
> >>
> >>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable -Wno-unused-but-set-variable -Wno-unused-parameter \
> >>               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes -Wno-missing-declarations
> >> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
> >>       $(RANLIB) $@
> >>
> >>  $(LIBSO): $(LOBJS)
> >> -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> >> +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl $(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> >>       ln -sf $@ $(TARGET)
> >>
> >>  $(LIBPC): $(LIBPC).in ../VERSION
> >> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
> >> index c89bb35..6698624 100644
> >> --- a/libselinux/src/label_file.c
> >> +++ b/libselinux/src/label_file.c
> >> @@ -15,7 +15,6 @@
> >>  #include <errno.h>
> >>  #include <limits.h>
> >>  #include <stdint.h>
> >> -#include <pcre.h>
> >>  #include <unistd.h>
> >>  #include <sys/mman.h>
> >>  #include <sys/types.h>
> >> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
> >>               return -1;
> >>
> >>       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> >> -             len = strlen(pcre_version());
> >> +             if (!regex_version()) {
> >> +                     return -1;
> >> +             }
> >> +             len = strlen(regex_version());
> >>
> >>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >>               if (rc < 0)
> >> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
> >>               }
> >>
> >>               str_buf[entry_len] = '\0';
> >> -             if ((strcmp(str_buf, pcre_version()) != 0)) {
> >> +             if ((strcmp(str_buf, regex_version()) != 0)) {
> >>                       free(str_buf);
> >>                       return -1;
> >>               }
> >> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
> >>
> >>               spec = &data->spec_arr[data->nspec];
> >>               spec->from_mmap = 1;
> >> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> >> +             spec->regcomp = 0;
> >> +#else
> >>               spec->regcomp = 1;
> >> +#endif
> >>
> >>               /* Process context */
> >>               rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
> >>                       spec->prefix_len = prefix_len;
> >>               }
> >>
> >> -             /* Process regex and study_data entries */
> >> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> -             if (rc < 0 || !entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -             spec->regex = (pcre *)mmap_area->next_addr;
> >> -             rc = next_entry(NULL, mmap_area, entry_len);
> >> +             rc = regex_load_mmap(mmap_area, &spec->regex);
> >>               if (rc < 0)
> >>                       goto err;
> >>
> >> -             /* Check that regex lengths match. pcre_fullinfo()
> >> -              * also validates its magic number. */
> >> -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE, &len);
> >> -             if (rc < 0 || len != entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -
> >> -             rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> -             if (rc < 0 || !entry_len) {
> >> -                     rc = -1;
> >> -                     goto err;
> >> -             }
> >> -
> >> -             if (entry_len) {
> >> -                     spec->lsd.study_data = (void *)mmap_area->next_addr;
> >> -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> >> -                     rc = next_entry(NULL, mmap_area, entry_len);
> >> -                     if (rc < 0)
> >> -                             goto err;
> >> -
> >> -                     /* Check that study data lengths match. */
> >> -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
> >> -                                        PCRE_INFO_STUDYSIZE, &len);
> >> -                     if (rc < 0 || len != entry_len) {
> >> -                             rc = -1;
> >> -                             goto err;
> >> -                     }
> >> -             }
> >> -
> >>               data->nspec++;
> >>       }
> >>
> >> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
> >>                       continue;
> >>               free(spec->regex_str);
> >>               free(spec->type_str);
> >> -             if (spec->regcomp) {
> >> -                     pcre_free(spec->regex);
> >> -                     pcre_free_study(spec->sd);
> >> -             }
> >> +             regex_data_free(spec->regex);
> >>       }
> >>
> >>       for (i = 0; i < (unsigned int)data->num_stems; i++) {
> >> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct selabel_handle *rec,
> >>  {
> >>       struct saved_data *data = (struct saved_data *)rec->data;
> >>       struct spec *spec_arr = data->spec_arr;
> >> -     int i, rc, file_stem, pcre_options = 0;
> >> +     int i, rc, file_stem;
> >>       mode_t mode = (mode_t)type;
> >>       const char *buf;
> >>       struct spec *ret = NULL;
> >>       char *clean_key = NULL;
> >>       const char *prev_slash, *next_slash;
> >>       unsigned int sofar = 0;
> >> +     struct regex_error_data regex_error_data;
> >>
> >>       if (!data->nspec) {
> >>               errno = ENOENT;
> >> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct selabel_handle *rec,
> >>       file_stem = find_stem_from_file(data, &buf);
> >>       mode &= S_IFMT;
> >>
> >> -     if (partial)
> >> -             pcre_options |= PCRE_PARTIAL_SOFT;
> >> -
> >>       /*
> >>        * Check for matching specifications in reverse order, so that
> >>        * the last matching specification is used.
> >> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct selabel_handle *rec,
> >>                * a regex check        */
> >>               if ((spec->stem_id == -1 || spec->stem_id == file_stem) &&
> >>                   (!mode || !spec->mode || mode == spec->mode)) {
> >> -                     if (compile_regex(data, spec, NULL) < 0)
> >> +                     if (compile_regex(data, spec, &regex_error_data) < 0)
> >>                               goto finish;
> >>                       if (spec->stem_id == -1)
> >> -                             rc = pcre_exec(spec->regex,
> >> -                                                 get_pcre_extra(spec),
> >> -                                                 key, strlen(key), 0,
> >> -                                                 pcre_options, NULL, 0);
> >> +                             rc = regex_match(spec->regex, key, partial);
> >>                       else
> >> -                             rc = pcre_exec(spec->regex,
> >> -                                                 get_pcre_extra(spec),
> >> -                                                 buf, strlen(buf), 0,
> >> -                                                 pcre_options, NULL, 0);
> >> -                     if (rc == 0) {
> >> +                             rc = regex_match(spec->regex, buf, partial);
> >> +                     if (rc == REGEX_MATCH) {
> >>                               spec->matches++;
> >>                               break;
> >> -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
> >> +                     } else if (partial && rc == REGEX_MATCH_PARTIAL)
> >>                               break;
> >>
> >> -                     if (rc == PCRE_ERROR_NOMATCH)
> >> +                     if (rc == REGEX_NO_MATCH)
> >>                               continue;
> >>
> >>                       errno = ENOENT;
> >> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct selabel_handle *h1,
> >>               }
> >>
> >>               if (spec1->regcomp && spec2->regcomp) {
> >> -                     size_t len1, len2;
> >> -                     int rc;
> >> -
> >> -                     rc = pcre_fullinfo(spec1->regex, NULL, PCRE_INFO_SIZE, &len1);
> >> -                     assert(rc == 0);
> >> -                     rc = pcre_fullinfo(spec2->regex, NULL, PCRE_INFO_SIZE, &len2);
> >> -                     assert(rc == 0);
> >> -                     if (len1 != len2 ||
> >> -                         memcmp(spec1->regex, spec2->regex, len1))
> >> +                     if (regex_cmp(spec1->regex, spec2->regex) == SELABEL_INCOMPARABLE){
> >>                               return incomp(spec1, spec2, "regex", i, j);
> >> +                     }
> >>               } else {
> >>                       if (strcmp(spec1->regex_str, spec2->regex_str))
> >>                               return incomp(spec1, spec2, "regex_str", i, j);
> >> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> >> index 6d1e890..a2e30e5 100644
> >> --- a/libselinux/src/label_file.h
> >> +++ b/libselinux/src/label_file.h
> >> @@ -6,6 +6,14 @@
> >>
> >>  #include <sys/stat.h>
> >>
> >> +/*
> >> + * regex.h/c were introduced to hold all dependencies on the regular
> >> + * expression back-end when we started supporting PCRE2. regex.h defines a
> >> + * minimal interface required by libselinux, so that the remaining code
> >> + * can be agnostic about the underlying implementation.
> >> + */
> >> +#include "regex.h"
> >> +
> >>  #include "callbacks.h"
> >>  #include "label_internal.h"
> >>
> >> @@ -19,21 +27,12 @@
> >>
> >>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS   SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
> >>
> >> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> >> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> >> -#define pcre_free_study  pcre_free
> >> -#endif
> >> -
> >>  /* A file security context specification. */
> >>  struct spec {
> >>       struct selabel_lookup_rec lr;   /* holds contexts for lookup result */
> >>       char *regex_str;        /* regular expession string for diagnostics */
> >>       char *type_str;         /* type string for diagnostic messages */
> >> -     pcre *regex;            /* compiled regular expression */
> >> -     union {
> >> -             pcre_extra *sd; /* pointer to extra compiled stuff */
> >> -             pcre_extra lsd; /* used to hold the mmap'd version */
> >> -     };
> >> +     struct regex_data * regex; /* backend dependent regular expression data */
> >>       mode_t mode;            /* mode format value */
> >>       int matches;            /* number of matching pathnames */
> >>       int stem_id;            /* indicates which stem-compression item */
> >> @@ -78,17 +77,6 @@ struct saved_data {
> >>       struct mmap_area *mmap_areas;
> >>  };
> >>
> >> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> >> -{
> >> -     if (spec->from_mmap) {
> >> -             if (spec->lsd.study_data)
> >> -                     return &spec->lsd;
> >> -             else
> >> -                     return NULL;
> >> -     } else
> >> -             return spec->sd;
> >> -}
> >> -
> >>  static inline mode_t string_to_mode(char *mode)
> >>  {
> >>       size_t len;
> >> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct mmap_area *fp, size_t bytes)
> >>  }
> >>
> >>  static inline int compile_regex(struct saved_data *data, struct spec *spec,
> >> -                                         const char **errbuf)
> >> +                                         struct regex_error_data * error_data)
> >>  {
> >> -     const char *tmperrbuf;
> >>       char *reg_buf, *anchored_regex, *cp;
> >>       struct stem *stem_arr = data->stem_arr;
> >>       size_t len;
> >> -     int erroff;
> >> +     int rc;
> >>
> >>       if (spec->regcomp)
> >>               return 0; /* already done */
> >> @@ -361,19 +348,9 @@ static inline int compile_regex(struct saved_data *data, struct spec *spec,
> >>       *cp = '\0';
> >>
> >>       /* Compile the regular expression. */
> >> -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL, &tmperrbuf,
> >> -                                                 &erroff, NULL);
> >> +     rc = regex_prepare_data(&spec->regex, anchored_regex, error_data);
> >>       free(anchored_regex);
> >> -     if (!spec->regex) {
> >> -             if (errbuf)
> >> -                     *errbuf = tmperrbuf;
> >> -             return -1;
> >> -     }
> >> -
> >> -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> >> -     if (!spec->sd && tmperrbuf) {
> >> -             if (errbuf)
> >> -                     *errbuf = tmperrbuf;
> >> +     if (rc < 0) {
> >>               return -1;
> >>       }
> >>
> >> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
> >>       struct saved_data *data = (struct saved_data *)rec->data;
> >>       struct spec *spec_arr;
> >>       unsigned int nspec = data->nspec;
> >> -     const char *errbuf = NULL;
> >> +     char const *errbuf;
> >> +     struct regex_error_data error_data;
> >>
> >>       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
> >>       if (items < 0) {
> >> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
> >>       data->nspec++;
> >>
> >>       if (rec->validating &&
> >> -                         compile_regex(data, &spec_arr[nspec], &errbuf)) {
> >> +                         compile_regex(data, &spec_arr[nspec], &error_data)) {
> >>               COMPAT_LOG(SELINUX_ERROR,
> >>                          "%s:  line %u has invalid regex %s:  %s\n",
> >>                          path, lineno, regex,
> >> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> >> new file mode 100644
> >> index 0000000..6b92b04
> >> --- /dev/null
> >> +++ b/libselinux/src/regex.c
> >> @@ -0,0 +1,405 @@
> >> +#include <assert.h>
> >> +#include <stdint.h>
> >> +#include <stdio.h>
> >> +#include <string.h>
> >> +
> >> +#include "regex.h"
> >> +#include "label_file.h"
> >> +
> >> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> >> +                     struct regex_error_data * errordata) {
> >> +     memset(errordata, 0, sizeof(struct regex_error_data));
> >> +     *regex = regex_data_create();
> >> +     if (!(*regex))
> >> +             return -1;
> >> +#ifdef USE_PCRE2
> >> +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> >> +                     PCRE2_ZERO_TERMINATED,
> >> +                     PCRE2_DOTALL,
> >> +                     &errordata->error_code,
> >> +                     &errordata->error_offset, NULL);
> >> +#else
> >> +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> >> +                                     &errordata->error_buffer,
> >> +                                     &errordata->error_offset, NULL);
> >> +#endif
> >> +     if (!(*regex)->regex) {
> >> +             goto err;
> >> +     }
> >> +
> >> +#ifdef USE_PCRE2
> >> +     (*regex)->match_data =
> >> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> >> +     if (!(*regex)->match_data) {
> >> +             goto err;
> >> +     }
> >> +#else
> >> +     (*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
> >> +     if (!(*regex)->sd && errordata->error_buffer) {
> >> +             goto err;
> >> +     }
> >> +     (*regex)->extra_owned = !!(*regex)->sd;
> >> +#endif
> >> +     return 0;
> >> +
> >> +err: regex_data_free(*regex);
> >> +     *regex = NULL;
> >> +     return -1;
> >> +}
> >> +
> >> +char const * regex_version(void) {
> >> +#ifdef USE_PCRE2
> >> +     static int initialized = 0;
> >> +     static char * version_string = NULL;
> >> +     size_t version_string_len;
> >> +     if (!initialized) {
> >> +             version_string_len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
> >> +             version_string = (char*) malloc(version_string_len);
> >> +             if (!version_string) {
> >> +                     return NULL;
> >> +             }
> >> +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> >> +             initialized = 1;
> >> +     }
> >> +     return version_string;
> >> +#else
> >> +     return pcre_version();
> >> +#endif
> >> +}
> >> +
> >> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
> >> +     int rc;
> >> +     size_t entry_len;
> >> +#ifndef USE_PCRE2
> >> +     size_t info_len;
> >> +#endif
> >> +
> >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> +#ifdef USE_PCRE2
> >> +     if (rc < 0)
> >> +             return -1;
> >> +
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     /* this should yield exactly one because we store one pattern at a time
> >> +      */
> >> +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> >> +     if (rc != 1)
> >> +             return -1;
> >> +
> >> +     *regex = regex_data_create();
> >> +     if (!*regex)
> >> +             return -1;
> >> +
> >> +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> >> +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
> >> +     if (rc != 1)
> >> +             goto err;
> >> +
> >> +     (*regex)->match_data =
> >> +             pcre2_match_data_create_from_pattern((*regex)->regex, NULL);
> >> +     if (!(*regex)->match_data)
> >> +             goto err;
> >> +
> >> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> >> +     /* and skip the decoded bit */
> >> +     rc = next_entry(NULL, mmap_area, entry_len);
> >> +     if (rc < 0)
> >> +             goto err;
> >> +
> >> +     return 0;
> >> +#else
> >> +     if (rc < 0 || !entry_len) {
> >> +             return -1;
> >> +     }
> >> +     *regex = regex_data_create();
> >> +     if (!(*regex))
> >> +             return -1;
> >> +
> >> +     (*regex)->extra_owned = 0;
> >> +     (*regex)->regex = (pcre *) mmap_area->next_addr;
> >> +     rc = next_entry(NULL, mmap_area, entry_len);
> >> +     if (rc < 0)
> >> +             goto err;
> >> +
> >> +     /* Check that regex lengths match. pcre_fullinfo()
> >> +      * also validates its magic number. */
> >> +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
> >> +     if (rc < 0 || info_len != entry_len) {
> >> +             goto err;
> >> +     }
> >> +
> >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> >> +     if (rc < 0 || !entry_len) {
> >> +             goto err;
> >> +     }
> >> +
> >> +     if (entry_len) {
> >> +             (*regex)->lsd.study_data = (void *) mmap_area->next_addr;
> >> +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> >> +             rc = next_entry(NULL, mmap_area, entry_len);
> >> +             if (rc < 0)
> >> +                     goto err;
> >> +
> >> +             /* Check that study data lengths match. */
> >> +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> >> +                                PCRE_INFO_STUDYSIZE, &info_len);
> >> +             if (rc < 0 || info_len != entry_len)
> >> +                     goto err;
> >> +     }
> >> +     return 0;
> >> +#endif
> >> +err:
> >> +     regex_data_free(*regex);
> >> +     *regex = NULL;
> >> +     return -1;
> >> +}
> >> +
> >> +int regex_writef(struct regex_data * regex, FILE * fp) {
> >> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> >> +     int rc;
> >> +#endif
> >> +     size_t len;
> >> +#ifdef USE_PCRE2
> >> +     PCRE2_SIZE to_write;
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     PCRE2_UCHAR * bytes;
> >> +
> >> +     /* encode the patter for serialization */
> >> +     rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex, 1,
> >> +                                 &bytes, &to_write, NULL);
> >> +     if (rc != 1)
> >> +             return -1;
> >> +
> >> +#else
> >> +     (void)regex; // silence unused parameter warning
> >> +     to_write = 0;
> >> +#endif
> >> +     /* write serialized pattern's size */
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1) {
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +             pcre2_serialize_free(bytes);
> >> +#endif
> >> +             return -1;
> >> +     }
> >> +
> >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> >> +     /* write serialized pattern */
> >> +     len = fwrite(bytes, 1, to_write, fp);
> >> +     if (len != to_write) {
> >> +             pcre2_serialize_free(bytes);
> >> +             return -1;
> >> +     }
> >> +     pcre2_serialize_free(bytes);
> >> +#endif
> >> +#else
> >> +     uint32_t to_write;
> >> +     size_t size;
> >> +     pcre_extra * sd = regex->extra_owned ? regex->sd :
> >> +                     (regex->lsd.study_data ? &regex->lsd : NULL);
> >> +
> >> +     /* determine the size of the pcre data in bytes */
> >> +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> >> +     if (rc < 0)
> >> +             return -1;
> >> +
> >> +     /* write the number of bytes in the pcre data */
> >> +     to_write = size;
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1)
> >> +             return -1;
> >> +
> >> +     /* write the actual pcre data as a char array */
> >> +     len = fwrite(regex->regex, 1, to_write, fp);
> >> +     if (len != to_write)
> >> +             return -1;
> >> +
> >> +     if (sd) {
> >> +             /* determine the size of the pcre study info */
> >> +             rc = pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE,
> >> +                             &size);
> >> +             if (rc < 0)
> >> +                     return -1;
> >> +     } else
> >> +             size = 0;
> >> +
> >> +     /* write the number of bytes in the pcre study data */
> >> +     to_write = size;
> >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> >> +     if (len != 1)
> >> +             return -1;
> >> +
> >> +     if (sd) {
> >> +             /* write the actual pcre study data as a char array */
> >> +             len = fwrite(sd->study_data, 1, to_write, fp);
> >> +             if (len != to_write)
> >> +                     return -1;
> >> +     }
> >> +#endif
> >> +     return 0;
> >> +}
> >> +
> >> +struct regex_data * regex_data_create(void) {
> >> +     struct regex_data * dummy = (struct regex_data*) malloc(
> >> +                     sizeof(struct regex_data));
> >> +     if (dummy) {
> >> +             memset(dummy, 0, sizeof(struct regex_data));
> >> +     }
> >> +     return dummy;
> >> +}
> >> +
> >> +void regex_data_free(struct regex_data * regex) {
> >> +     if (regex) {
> >> +#ifdef USE_PCRE2
> >> +             if (regex->regex) {
> >> +                     pcre2_code_free(regex->regex);
> >> +             }
> >> +             if (regex->match_data) {
> >> +                     pcre2_match_data_free(regex->match_data);
> >> +             }
> >> +#else
> >> +             if (regex->regex)
> >> +                     pcre_free(regex->regex);
> >> +             if (regex->extra_owned && regex->sd) {
> >> +                     pcre_free_study(regex->sd);
> >> +             }
> >> +#endif
> >> +             free(regex);
> >> +     }
> >> +}
> >> +
> >> +int regex_match(struct regex_data * regex, char const * subject, int partial) {
> >> +     int rc;
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_match(regex->regex,
> >> +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> >> +                     partial ? PCRE2_PARTIAL_SOFT : 0, regex->match_data,
> >> +                     NULL);
> >> +     if (rc > 0)
> >> +     return REGEX_MATCH;
> >> +     switch (rc) {
> >> +             case PCRE2_ERROR_PARTIAL:
> >> +                     return REGEX_MATCH_PARTIAL;
> >> +             case PCRE2_ERROR_NOMATCH:
> >> +                     return REGEX_NO_MATCH;
> >> +             default:
> >> +                     return REGEX_ERROR;
> >> +     }
> >> +#else
> >> +     rc = pcre_exec(regex->regex,
> >> +                     regex->extra_owned ? regex->sd : &regex->lsd, subject,
> >> +                     strlen(subject), 0, partial ? PCRE_PARTIAL_SOFT : 0,
> >> +                     NULL,
> >> +                     0);
> >> +     switch (rc) {
> >> +             case 0:
> >> +                     return REGEX_MATCH;
> >> +             case PCRE_ERROR_PARTIAL:
> >> +                     return REGEX_MATCH_PARTIAL;
> >> +             case PCRE_ERROR_NOMATCH:
> >> +                     return REGEX_NO_MATCH;
> >> +             default:
> >> +                     return REGEX_ERROR;
> >> +     }
> >> +#endif
> >> +}
> >> +
> >> +/* TODO Replace this compare function with something that actually compares the
> >> + * regular expressions.
> >> + * This compare function basically just compares the binary representations of
> >> + * the automatons, and because this representation contains pointers and
> >> + * metadata, it can only return a match if regex1 == regex2.
> >> + * Preferably, this function would be replaced with an algorithm that computes
> >> + * the equivalence of the automatons systematically.
> >> + */
> >> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2) {
> >> +     int rc;
> >> +     size_t len1, len2;
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> >> +     assert(rc == 0);
> >> +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> >> +     assert(rc == 0);
> >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> >> +             return SELABEL_INCOMPARABLE;
> >> +#else
> >> +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> >> +     assert(rc == 0);
> >> +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> >> +     assert(rc == 0);
> >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> >> +             return SELABEL_INCOMPARABLE;
> >> +#endif
> >> +     return SELABEL_EQUAL;
> >> +}
> >> +
> >> +void regex_format_error(struct regex_error_data const * error_data,
> >> +                     char * buffer, size_t buf_size) {
> >> +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> >> +     char * ptr = &buffer[buf_size - the_end_length];
> >> +     int rc = 0;
> >> +     size_t pos = 0;
> >> +     if (!buffer || !buf_size)
> >> +             return;
> >> +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> >> +     if (rc < 0)
> >> +             /* If snprintf fails it constitutes a logical error that needs
> >> +              * fixing.
> >> +              */
> >> +             abort();
> >> +
> >> +     pos += rc;
> >> +     if (pos >= buf_size)
> >> +             goto truncated;
> >> +
> >> +     if (error_data->error_offset > 0) {
> >> +#ifdef USE_PCRE2
> >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
> >> +                             error_data->error_offset);
> >> +#else
> >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
> >> +                             error_data->error_offset);
> >> +#endif
> >> +             if (rc < 0)
> >> +                     abort();
> >> +
> >> +     }
> >> +     pos += rc;
> >> +     if (pos >= buf_size)
> >> +             goto truncated;
> >> +
> >> +#ifdef USE_PCRE2
> >> +     rc = pcre2_get_error_message(error_data->error_code,
> >> +                     (PCRE2_UCHAR*)(buffer + pos),
> >> +                     buf_size - pos);
> >> +     if (rc == PCRE2_ERROR_NOMEMORY)
> >> +             goto truncated;
> >> +#else
> >> +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
> >> +                     error_data->error_buffer);
> >> +     if (rc < 0)
> >> +             abort();
> >> +
> >> +     if ((size_t)rc < strlen(error_data->error_buffer))
> >> +             goto truncated;
> >> +#endif
> >> +
> >> +     return;
> >> +
> >> +truncated:
> >> +     /* replace end of string with "..." to indicate that it was truncated */
> >> +     switch (the_end_length) {
> >> +             /* no break statements, fall-through is intended */
> >> +             case 4:
> >> +                     *ptr++ = '.';
> >> +             case 3:
> >> +                     *ptr++ = '.';
> >> +             case 2:
> >> +                     *ptr++ = '.';
> >> +             case 1:
> >> +                     *ptr++ = '\0';
> >> +             default:
> >> +                     break;
> >> +     }
> >> +     return;
> >> +}
> >> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> >> new file mode 100644
> >> index 0000000..bdc10c0
> >> --- /dev/null
> >> +++ b/libselinux/src/regex.h
> >> @@ -0,0 +1,168 @@
> >> +#ifndef SRC_REGEX_H_
> >> +#define SRC_REGEX_H_
> >> +
> >> +#include <stdio.h>
> >> +
> >> +#ifdef USE_PCRE2
> >> +#include <pcre2.h>
> >> +#else
> >> +#include <pcre.h>
> >> +#endif
> >> +
> >> +enum {
> >> +     REGEX_MATCH,
> >> +     REGEX_MATCH_PARTIAL,
> >> +     REGEX_NO_MATCH,
> >> +     REGEX_ERROR = -1,
> >> +};
> >> +
> >> +#ifdef USE_PCRE2
> >> +struct regex_data {
> >> +     pcre2_code * regex; /* compiled regular expression */
> >> +     pcre2_match_data * match_data; /* match data block required for the compiled
> >> +      pattern in regex2 */
> >> +};
> >> +
> >> +struct regex_error_data {
> >> +     int error_code;
> >> +     PCRE2_SIZE error_offset;
> >> +};
> >> +
> >> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> >> +#else
> >> +/* vvvvvv USE_PCRE vvvvvv */
> >> +
> >> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> >> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> >> +#define pcre_free_study  pcre_free
> >> +#endif
> >> +
> >> +struct regex_data {
> >> +     pcre *regex; /* compiled regular expression */
> >> +     int extra_owned; /* non zero if pcre_extra is owned by this structure
> >> +                       * and thus must be freed on destruction.
> >> +                       */
> >> +     union {
> >> +             pcre_extra *sd; /* pointer to extra compiled stuff */
> >> +             pcre_extra lsd; /* used to hold the mmap'd version */
> >> +     };
> >> +};
> >> +
> >> +struct regex_error_data {
> >> +     char const * error_buffer;
> >> +     int error_offset;
> >> +};
> >> +
> >> +#endif /* USE_PCRE2 */
> >> +
> >> +struct mmap_area;
> >> +
> >> +/**
> >> + * regex_verison returns the version string of the underlying regular
> >> + * regular expressions library. In the case of PCRE it just returns the
> >> + * result of pcre_version(). In the case of PCRE2, the very first time this
> >> + * function is called it allocates a buffer large enough to hold the version
> >> + * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
> >> + * The allocated buffer will linger in memory until the calling process is being
> >> + * reaped.
> >> + *
> >> + * It may return NULL on error.
> >> + */
> >> +char const * regex_version(void);
> >> +/**
> >> + * This constructor function allocates a buffer for a regex_data structure.
> >> + * The buffer is being initialized with zeroes.
> >> + */
> >> +struct regex_data * regex_data_create(void);
> >> +/**
> >> + * This complementary destructor function frees the a given regex_data buffer.
> >> + * It also frees any non NULL member pointers with the appropriate pcreX_X_free
> >> + * function. For PCRE this function respects the extra_owned field and frees
> >> + * the pcre_extra data conditionally. Calling this function on a NULL pointer is
> >> + * save.
> >> + */
> >> +void regex_data_free(struct regex_data * regex);
> >> +/**
> >> + * This function compiles the regular expression. Additionally, it prepares
> >> + * data structures required by the different underlying engines. For PCRE
> >> + * it calls pcre_study to generate optional data required for optimized
> >> + * execution of the compiled pattern. In the case of PCRE2, it allocates
> >> + * a pcre2_match_data structure of appropriate size to hold all possible
> >> + * matches created by the pattern.
> >> + *
> >> + * @arg regex If successful, the structure returned through *regex was allocated
> >> + *            with regex_data_create and must be freed with regex_data_free.
> >> + * @arg pattern_string The pattern string that is to be compiled.
> >> + * @arg errordata A pointer to a regex_error_data structure must be passed
> >> + *                to this function. This structure depends on the underlying
> >> + *                implementation. It can be passed to regex_format_error
> >> + *                to generate a human readable error message.
> >> + * @retval 0 on success
> >> + * @retval -1 on error
> >> + */
> >> +int regex_prepare_data(struct regex_data ** regex, char const * pattern_string,
> >> +                     struct regex_error_data * errordata);
> >> +/**
> >> + * This function loads a serialized precompiled pattern from a contiguous
> >> + * data region given by map_area.
> >> + *
> >> + * @arg map_area Description of the memory region holding a serialized
> >> + *               representation of the precompiled pattern.
> >> + * @arg regex If successful, the structure returned through *regex was allocated
> >> + *            with regex_data_create and must be freed with regex_data_free.
> >> + *
> >> + * @retval 0 on success
> >> + * @retval -1 on error
> >> + */
> >> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data ** regex);
> >> +/**
> >> + * This function stores a precompiled regular expression to a file.
> >> + * In the case of PCRE, it just dumps the binary representation of the
> >> + * precomplied pattern into a file. In the case of PCRE2, it uses the
> >> + * serialization function provided by the library.
> >> + *
> >> + * @arg regex The precomplied regular expression data.
> >> + * @arg fp A file stream specifying the output file.
> >> + */
> >> +int regex_writef(struct regex_data * regex, FILE * fp);
> >> +/**
> >> + * This function applies a precompiled pattern to a subject string and
> >> + * returns whether or not a match was found.
> >> + *
> >> + * @arg regex The precompiled pattern.
> >> + * @arg subject The subject string.
> >> + * @arg partial Boolean indicating if partial matches are wanted. A nonzero
> >> + *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
> >> + *              option to pcre_exec of pcre2_match.
> >> + * @retval REGEX_MATCH if a match was found
> >> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> >> + * @retval REGEX_NO_MATCH if no match was found
> >> + * @retval REGEX_ERROR if an error was encountered during the execution of the
> >> + *                     regular expression
> >> + */
> >> +int regex_match(struct regex_data * regex, char const * subject, int partial);
> >> +/**
> >> + * This function compares two compiled regular expressions (regex1 and regex2).
> >> + * It compares the binary representations of the compiled patterns. It is a very
> >> + * crude approximation because the binary representation holds data like
> >> + * reference counters, that has nothing to do with the actual state machine.
> >> + *
> >> + * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
> >> + *                       the same
> >> + * @retval SELABEL_INCOMPARABLE otherwise
> >> + */
> >> +int regex_cmp(struct regex_data * regex1, struct regex_data * regex2);
> >> +/**
> >> + * This function takes the error data returned by regex_prepare_data and turns
> >> + * it in to a human readable error message.
> >> + * If the buffer given to hold the error message is to small it truncates the
> >> + * message and indicates the truncation with an ellipsis ("...") at the end of
> >> + * the buffer.
> >> + *
> >> + * @arg error_data Error data as returned by regex_prepare_data.
> >> + * @arg buffer String buffer to hold the formated error string.
> >> + * @arg buf_size Total size of the given bufer in bytes.
> >> + */
> >> +void regex_format_error(struct regex_error_data const * error_data,
> >> +                     char * buffer, size_t buf_size);
> >> +#endif  /* SRC_REGEX_H_ */
> >> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> >> index 8497cb4..1e7a048 100644
> >> --- a/libselinux/utils/Makefile
> >> +++ b/libselinux/utils/Makefile
> >> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k -Wformat-security -Winit-self -Wmissi
> >>            -fasynchronous-unwind-tables -fdiagnostics-show-option -funit-at-a-time \
> >>            -fipa-pure-const -Wno-suggest-attribute=pure -Wno-suggest-attribute=const \
> >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS)
> >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE $(EMFLAGS) $(PCRE_CFLAGS)
> >>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
> >>
> >>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
> >>
> >> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> >> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a -lsepol
> >>
> >>  selinux_restorecon: LDLIBS += -lsepol
> >>
> >> diff --git a/libselinux/utils/sefcontext_compile.c b/libselinux/utils/sefcontext_compile.c
> >> index fd6fb78..8ff73f4 100644
> >> --- a/libselinux/utils/sefcontext_compile.c
> >> +++ b/libselinux/utils/sefcontext_compile.c
> >> @@ -1,6 +1,5 @@
> >>  #include <ctype.h>
> >>  #include <errno.h>
> >> -#include <pcre.h>
> >>  #include <stdint.h>
> >>  #include <stdio.h>
> >>  #include <string.h>
> >> @@ -13,6 +12,7 @@
> >>  #include <sepol/sepol.h>
> >>
> >>  #include "../src/label_file.h"
> >> +#include "../src/regex.h"
> >>
> >>  const char *policy_file;
> >>  static int ctx_err;
> >> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data *data, int fd)
> >>       if (len != 1)
> >>               goto err;
> >>
> >> -     /* write the pcre version */
> >> -     section_len = strlen(pcre_version());
> >> +     /* write version of the regex back-end */
> >> +     if (!regex_version())
> >> +             goto err;
> >> +     section_len = strlen(regex_version());
> >>       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
> >>       if (len != 1)
> >>               goto err;
> >> -     len = fwrite(pcre_version(), sizeof(char), section_len, bin_file);
> >> +     len = fwrite(regex_version(), sizeof(char), section_len, bin_file);
> >>       if (len != section_len)
> >>               goto err;
> >>
> >> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data *data, int fd)
> >>               mode_t mode = specs[i].mode;
> >>               size_t prefix_len = specs[i].prefix_len;
> >>               int32_t stem_id = specs[i].stem_id;
> >> -             pcre *re = specs[i].regex;
> >> -             pcre_extra *sd = get_pcre_extra(&specs[i]);
> >> +             struct regex_data *re = specs[i].regex;
> >>               uint32_t to_write;
> >> -             size_t size;
> >>
> >>               /* length of the context string (including nul) */
> >>               to_write = strlen(context) + 1;
> >> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data *data, int fd)
> >>               if (len != 1)
> >>                       goto err;
> >>
> >> -             /* determine the size of the pcre data in bytes */
> >> -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> >> +             /* Write regex related data */
> >> +             rc = regex_writef(re, bin_file);
> >>               if (rc < 0)
> >>                       goto err;
> >> -
> >> -             /* write the number of bytes in the pcre data */
> >> -             to_write = size;
> >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> >> -             if (len != 1)
> >> -                     goto err;
> >> -
> >> -             /* write the actual pcre data as a char array */
> >> -             len = fwrite(re, 1, to_write, bin_file);
> >> -             if (len != to_write)
> >> -                     goto err;
> >> -
> >> -             if (sd) {
> >> -                     /* determine the size of the pcre study info */
> >> -                     rc = pcre_fullinfo(re, sd, PCRE_INFO_STUDYSIZE, &size);
> >> -                     if (rc < 0)
> >> -                             goto err;
> >> -             } else
> >> -                     size = 0;
> >> -
> >> -             /* write the number of bytes in the pcre study data */
> >> -             to_write = size;
> >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> >> -             if (len != 1)
> >> -                     goto err;
> >> -
> >> -             if (sd) {
> >> -                     /* write the actual pcre study data as a char array */
> >> -                     len = fwrite(sd->study_data, 1, to_write, bin_file);
> >> -                     if (len != to_write)
> >> -                             goto err;
> >> -             }
> >>       }
> >>
> >>       rc = 0;
> >> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
> >>               free(specs[i].lr.ctx_trans);
> >>               free(specs[i].regex_str);
> >>               free(specs[i].type_str);
> >> -             pcre_free(specs[i].regex);
> >> -             pcre_free_study(specs[i].sd);
> >> +             regex_data_free(specs[i].regex);
> >>       }
> >>       free(specs);
> >>
> >>
> >
> > _______________________________________________
> > Selinux mailing list
> > Selinux@tycho.nsa.gov
> > To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> > To get help, send an email containing "help" to Selinux-request@tycho.nsa.gov.
> 
> 
> 
> -- 
> Respectfully,
> 
> William C Roberts
> _______________________________________________
> Selinux mailing list
> Selinux@tycho.nsa.gov
> To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> To get help, send an email containing "help" to Selinux-request@tycho.nsa.gov.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
       [not found]       ` <CAFftDdqSUHPVVn7megAAHyjn_14XvVqn+8ukywr8nCgwMH4X3g@mail.gmail.com>
@ 2016-09-07 18:36         ` William Roberts
  0 siblings, 0 replies; 13+ messages in thread
From: William Roberts @ 2016-09-07 18:36 UTC (permalink / raw)
  To: Jason Zaman
  Cc: seandroid-list, selinux, Janis Danisevskis, Janis Danisevskis,
	Stephen Smalley

[-- Attachment #1: Type: text/plain, Size: 49934 bytes --]

On Sep 7, 2016 11:29, "Jason Zaman" <jason@perfinion.com> wrote:
>
> On Wed, Sep 07, 2016 at 09:40:43AM -0700, William Roberts wrote:
> > On Wed, Sep 7, 2016 at 8:02 AM, Stephen Smalley <sds@tycho.nsa.gov>
wrote:
> > > On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
> > >> From: Janis Danisevskis <jdanis@google.com>
> > >>
> > >> This patch moves all pcre1/2 dependencies into the new files regex.h
> > >> and regex.c implementing the common denominator of features needed
> > >> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
> > >> used implementations.
> > >>
> > >> As of this patch libselinux supports either pcre or pcre2 but not
> > >> both at the same time. The persistently stored file contexts
> > >> information differs. This means libselinux can only load file
> > >> context files generated by sefcontext_compile build with the
> > >> same pcre variant.
> > >
> > > Shouldn't the pcre variant be encoded in some manner in the
> > > file_contexts.bin file so that libselinux can tell immediately whether
> > > it is supported?
> >
> > Don't we have that in pcre_version()?
> >
> > >
> > >> Also, for pcre2 the persistent format is architecture dependant.
> > >> Stored precompiled regular expressions can only be used on the
> > >> same architecture they were generated on. If pcre2 is used and
> > >> sefcontext_compile shall generate portable output, it and libselinux
> > >> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
> > >> cost of having to recompile the regular expressions at load time.
>
> I'd rather the arch was just added to the compiled files like is done
> for pcre version. It may not be a bad idea to add arch even for pcre1
> since those also seemed quite fragile in the past.
>
> > > Is it worth supporting the -DNO_PERSISTENTLY_STORED_PATTERNS case?
The
> > > point of using file_contexts.bin was to move the cost of compiling the
> > > regexes to build time rather than load time; if we cannot do that,
then
> > > how much do we gain from using file_contexts.bin instead of just
falling
> > > back to file_contexts?

I have some work that speeds up file_contexts load times by 25% and if we
mmap file_contexts we get a slight bump. I'm confident i can speed it up
even more. The binary format is 3 times the size. Gprof has most if the
slowness shown in nodups on validation, I don't think regex compilation is
the bottleneck. I has hoping I could get the speeds close and then propose
ditching bin file support on Android.

> > >
> > > The #ifdef maze makes it very hard to read and maintain this code;
that
> > > needs to be refactored.
> >
> > Perhaps set up some function pointers and hide the regex structure to
be opaque
> > to the rest of selinux. This makes me think, should we just dlopen the
> > correct version of libpcre based on pcre_version()? Not sure how you
> > feel about dlopen calls....
>
> Please no :(. dlopen makes things harder to track in a distro. there are
> a ton of tools in gentoo that figure things out with ldd and dont work
> at all with dlopen (like keeping old libs around if other packages link
> to old .so versions and whatnot).

I didn't think dlopen would be fun.
>
> -- Jason
>
> > > valgrind is reporting numerous errors, including both use of
> > > uninitialised values and memory leaks with both patches applied.  Try:
> > > make DESTDIR=~/obj CFLAGS+=-g clean install
> > > LD_LIBRARY_PATH=~/obj/lib valgrind --leak-check=full
> > > ~/obj/usr/sbin/matchpathcon /etc
> > >
> > > On x86_64.
> > >
> > > Will provide review of the code itself later...
> > >
> > >>
> > >> Signed-off-by: Janis Danisevskis <jdanis@google.com>
> > >> ---
> > >>  libselinux/Makefile                   |  13 ++
> > >>  libselinux/src/Makefile               |   4 +-
> > >>  libselinux/src/label_file.c           |  91 ++------
> > >>  libselinux/src/label_file.h           |  54 ++---
> > >>  libselinux/src/regex.c                | 405
++++++++++++++++++++++++++++++++++
> > >>  libselinux/src/regex.h                | 168 ++++++++++++++
> > >>  libselinux/utils/Makefile             |   4 +-
> > >>  libselinux/utils/sefcontext_compile.c |  53 +----
> > >>  8 files changed, 637 insertions(+), 155 deletions(-)
> > >>  create mode 100644 libselinux/src/regex.c
> > >>  create mode 100644 libselinux/src/regex.h
> > >>
> > >> diff --git a/libselinux/Makefile b/libselinux/Makefile
> > >> index 6142b60..15d051e 100644
> > >> --- a/libselinux/Makefile
> > >> +++ b/libselinux/Makefile
> > >> @@ -24,6 +24,19 @@ ifeq ($(DISABLE_SETRANS),y)
> > >>  endif
> > >>  export DISABLE_AVC DISABLE_SETRANS DISABLE_RPM DISABLE_BOOL EMFLAGS
> > >>
> > >> +USE_PCRE2 ?= n
> > >> +DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS ?= n
> > >> +ifeq ($(USE_PCRE2),y)
> > >> +     PCRE_CFLAGS := -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=8
> > >> +     ifeq ($(DISABLE_PERSISTENTLY_STORED_REGEX_PATTERNS), y)
> > >> +             PCRE_CFLAGS += -DNO_PERSISTENTLY_STORED_PATTERNS
> > >> +     endif
> > >> +     PCRE_LDFLAGS := -lpcre2-8
> > >> +else
> > >> +     PCRE_LDFLAGS := -lpcre
> > >> +endif
> > >> +export PCRE_CFLAGS PCRE_LDFLAGS
> > >> +
> > >>  all install relabel clean distclean indent:
> > >>       @for subdir in $(SUBDIRS); do \
> > >>               (cd $$subdir && $(MAKE) $@) || exit 1; \
> > >> diff --git a/libselinux/src/Makefile b/libselinux/src/Makefile
> > >> index 37d01af..66687e6 100644
> > >> --- a/libselinux/src/Makefile
> > >> +++ b/libselinux/src/Makefile
> > >> @@ -74,7 +74,7 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
-Wformat-security -Winit-self -Wmissi
> > >>            -fipa-pure-const -Wno-suggest-attribute=pure
-Wno-suggest-attribute=const \
> > >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> > >>
> > >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
$(EMFLAGS)
> > >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
$(EMFLAGS) $(PCRE_CFLAGS)
> > >>
> > >>  SWIG_CFLAGS += -Wno-error -Wno-unused-variable
-Wno-unused-but-set-variable -Wno-unused-parameter \
> > >>               -Wno-shadow -Wno-uninitialized -Wno-missing-prototypes
-Wno-missing-declarations
> > >> @@ -113,7 +113,7 @@ $(LIBA): $(OBJS)
> > >>       $(RANLIB) $@
> > >>
> > >>  $(LIBSO): $(LOBJS)
> > >> -     $(CC) $(CFLAGS) -shared -o $@ $^ -lpcre -ldl $(LDFLAGS)
-L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> > >> +     $(CC) $(CFLAGS) -shared -o $@ $^ $(PCRE_LDFLAGS) -ldl
$(LDFLAGS) -L$(LIBDIR) -Wl,-soname,$(LIBSO),-z,defs,-z,relro
> > >>       ln -sf $@ $(TARGET)
> > >>
> > >>  $(LIBPC): $(LIBPC).in ../VERSION
> > >> diff --git a/libselinux/src/label_file.c
b/libselinux/src/label_file.c
> > >> index c89bb35..6698624 100644
> > >> --- a/libselinux/src/label_file.c
> > >> +++ b/libselinux/src/label_file.c
> > >> @@ -15,7 +15,6 @@
> > >>  #include <errno.h>
> > >>  #include <limits.h>
> > >>  #include <stdint.h>
> > >> -#include <pcre.h>
> > >>  #include <unistd.h>
> > >>  #include <sys/mman.h>
> > >>  #include <sys/types.h>
> > >> @@ -176,7 +175,10 @@ static int load_mmap(struct selabel_handle
*rec, const char *path,
> > >>               return -1;
> > >>
> > >>       if (version >= SELINUX_COMPILED_FCONTEXT_PCRE_VERS) {
> > >> -             len = strlen(pcre_version());
> > >> +             if (!regex_version()) {
> > >> +                     return -1;
> > >> +             }
> > >> +             len = strlen(regex_version());
> > >>
> > >>               rc = next_entry(&entry_len, mmap_area,
sizeof(uint32_t));
> > >>               if (rc < 0)
> > >> @@ -198,7 +200,7 @@ static int load_mmap(struct selabel_handle *rec,
const char *path,
> > >>               }
> > >>
> > >>               str_buf[entry_len] = '\0';
> > >> -             if ((strcmp(str_buf, pcre_version()) != 0)) {
> > >> +             if ((strcmp(str_buf, regex_version()) != 0)) {
> > >>                       free(str_buf);
> > >>                       return -1;
> > >>               }
> > >> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle
*rec, const char *path,
> > >>
> > >>               spec = &data->spec_arr[data->nspec];
> > >>               spec->from_mmap = 1;
> > >> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
> > >> +             spec->regcomp = 0;
> > >> +#else
> > >>               spec->regcomp = 1;
> > >> +#endif
> > >>
> > >>               /* Process context */
> > >>               rc = next_entry(&entry_len, mmap_area,
sizeof(uint32_t));
> > >> @@ -364,47 +370,10 @@ static int load_mmap(struct selabel_handle
*rec, const char *path,
> > >>                       spec->prefix_len = prefix_len;
> > >>               }
> > >>
> > >> -             /* Process regex and study_data entries */
> > >> -             rc = next_entry(&entry_len, mmap_area,
sizeof(uint32_t));
> > >> -             if (rc < 0 || !entry_len) {
> > >> -                     rc = -1;
> > >> -                     goto err;
> > >> -             }
> > >> -             spec->regex = (pcre *)mmap_area->next_addr;
> > >> -             rc = next_entry(NULL, mmap_area, entry_len);
> > >> +             rc = regex_load_mmap(mmap_area, &spec->regex);
> > >>               if (rc < 0)
> > >>                       goto err;
> > >>
> > >> -             /* Check that regex lengths match. pcre_fullinfo()
> > >> -              * also validates its magic number. */
> > >> -             rc = pcre_fullinfo(spec->regex, NULL, PCRE_INFO_SIZE,
&len);
> > >> -             if (rc < 0 || len != entry_len) {
> > >> -                     rc = -1;
> > >> -                     goto err;
> > >> -             }
> > >> -
> > >> -             rc = next_entry(&entry_len, mmap_area,
sizeof(uint32_t));
> > >> -             if (rc < 0 || !entry_len) {
> > >> -                     rc = -1;
> > >> -                     goto err;
> > >> -             }
> > >> -
> > >> -             if (entry_len) {
> > >> -                     spec->lsd.study_data = (void
*)mmap_area->next_addr;
> > >> -                     spec->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> > >> -                     rc = next_entry(NULL, mmap_area, entry_len);
> > >> -                     if (rc < 0)
> > >> -                             goto err;
> > >> -
> > >> -                     /* Check that study data lengths match. */
> > >> -                     rc = pcre_fullinfo(spec->regex, &spec->lsd,
> > >> -                                        PCRE_INFO_STUDYSIZE, &len);
> > >> -                     if (rc < 0 || len != entry_len) {
> > >> -                             rc = -1;
> > >> -                             goto err;
> > >> -                     }
> > >> -             }
> > >> -
> > >>               data->nspec++;
> > >>       }
> > >>
> > >> @@ -609,10 +578,7 @@ static void closef(struct selabel_handle *rec)
> > >>                       continue;
> > >>               free(spec->regex_str);
> > >>               free(spec->type_str);
> > >> -             if (spec->regcomp) {
> > >> -                     pcre_free(spec->regex);
> > >> -                     pcre_free_study(spec->sd);
> > >> -             }
> > >> +             regex_data_free(spec->regex);
> > >>       }
> > >>
> > >>       for (i = 0; i < (unsigned int)data->num_stems; i++) {
> > >> @@ -644,13 +610,14 @@ static struct spec *lookup_common(struct
selabel_handle *rec,
> > >>  {
> > >>       struct saved_data *data = (struct saved_data *)rec->data;
> > >>       struct spec *spec_arr = data->spec_arr;
> > >> -     int i, rc, file_stem, pcre_options = 0;
> > >> +     int i, rc, file_stem;
> > >>       mode_t mode = (mode_t)type;
> > >>       const char *buf;
> > >>       struct spec *ret = NULL;
> > >>       char *clean_key = NULL;
> > >>       const char *prev_slash, *next_slash;
> > >>       unsigned int sofar = 0;
> > >> +     struct regex_error_data regex_error_data;
> > >>
> > >>       if (!data->nspec) {
> > >>               errno = ENOENT;
> > >> @@ -677,9 +644,6 @@ static struct spec *lookup_common(struct
selabel_handle *rec,
> > >>       file_stem = find_stem_from_file(data, &buf);
> > >>       mode &= S_IFMT;
> > >>
> > >> -     if (partial)
> > >> -             pcre_options |= PCRE_PARTIAL_SOFT;
> > >> -
> > >>       /*
> > >>        * Check for matching specifications in reverse order, so that
> > >>        * the last matching specification is used.
> > >> @@ -692,25 +656,19 @@ static struct spec *lookup_common(struct
selabel_handle *rec,
> > >>                * a regex check        */
> > >>               if ((spec->stem_id == -1 || spec->stem_id ==
file_stem) &&
> > >>                   (!mode || !spec->mode || mode == spec->mode)) {
> > >> -                     if (compile_regex(data, spec, NULL) < 0)
> > >> +                     if (compile_regex(data, spec,
&regex_error_data) < 0)
> > >>                               goto finish;
> > >>                       if (spec->stem_id == -1)
> > >> -                             rc = pcre_exec(spec->regex,
> > >> -
 get_pcre_extra(spec),
> > >> -                                                 key, strlen(key),
0,
> > >> -                                                 pcre_options,
NULL, 0);
> > >> +                             rc = regex_match(spec->regex, key,
partial);
> > >>                       else
> > >> -                             rc = pcre_exec(spec->regex,
> > >> -
 get_pcre_extra(spec),
> > >> -                                                 buf, strlen(buf),
0,
> > >> -                                                 pcre_options,
NULL, 0);
> > >> -                     if (rc == 0) {
> > >> +                             rc = regex_match(spec->regex, buf,
partial);
> > >> +                     if (rc == REGEX_MATCH) {
> > >>                               spec->matches++;
> > >>                               break;
> > >> -                     } else if (partial && rc == PCRE_ERROR_PARTIAL)
> > >> +                     } else if (partial && rc ==
REGEX_MATCH_PARTIAL)
> > >>                               break;
> > >>
> > >> -                     if (rc == PCRE_ERROR_NOMATCH)
> > >> +                     if (rc == REGEX_NO_MATCH)
> > >>                               continue;
> > >>
> > >>                       errno = ENOENT;
> > >> @@ -850,16 +808,9 @@ static enum selabel_cmp_result cmp(struct
selabel_handle *h1,
> > >>               }
> > >>
> > >>               if (spec1->regcomp && spec2->regcomp) {
> > >> -                     size_t len1, len2;
> > >> -                     int rc;
> > >> -
> > >> -                     rc = pcre_fullinfo(spec1->regex, NULL,
PCRE_INFO_SIZE, &len1);
> > >> -                     assert(rc == 0);
> > >> -                     rc = pcre_fullinfo(spec2->regex, NULL,
PCRE_INFO_SIZE, &len2);
> > >> -                     assert(rc == 0);
> > >> -                     if (len1 != len2 ||
> > >> -                         memcmp(spec1->regex, spec2->regex, len1))
> > >> +                     if (regex_cmp(spec1->regex, spec2->regex) ==
SELABEL_INCOMPARABLE){
> > >>                               return incomp(spec1, spec2, "regex",
i, j);
> > >> +                     }
> > >>               } else {
> > >>                       if (strcmp(spec1->regex_str, spec2->regex_str))
> > >>                               return incomp(spec1, spec2,
"regex_str", i, j);
> > >> diff --git a/libselinux/src/label_file.h
b/libselinux/src/label_file.h
> > >> index 6d1e890..a2e30e5 100644
> > >> --- a/libselinux/src/label_file.h
> > >> +++ b/libselinux/src/label_file.h
> > >> @@ -6,6 +6,14 @@
> > >>
> > >>  #include <sys/stat.h>
> > >>
> > >> +/*
> > >> + * regex.h/c were introduced to hold all dependencies on the regular
> > >> + * expression back-end when we started supporting PCRE2. regex.h
defines a
> > >> + * minimal interface required by libselinux, so that the remaining
code
> > >> + * can be agnostic about the underlying implementation.
> > >> + */
> > >> +#include "regex.h"
> > >> +
> > >>  #include "callbacks.h"
> > >>  #include "label_internal.h"
> > >>
> > >> @@ -19,21 +27,12 @@
> > >>
> > >>  #define SELINUX_COMPILED_FCONTEXT_MAX_VERS
 SELINUX_COMPILED_FCONTEXT_PREFIX_LEN
> > >>
> > >> -/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> > >> -#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> > >> -#define pcre_free_study  pcre_free
> > >> -#endif
> > >> -
> > >>  /* A file security context specification. */
> > >>  struct spec {
> > >>       struct selabel_lookup_rec lr;   /* holds contexts for lookup
result */
> > >>       char *regex_str;        /* regular expession string for
diagnostics */
> > >>       char *type_str;         /* type string for diagnostic messages
*/
> > >> -     pcre *regex;            /* compiled regular expression */
> > >> -     union {
> > >> -             pcre_extra *sd; /* pointer to extra compiled stuff */
> > >> -             pcre_extra lsd; /* used to hold the mmap'd version */
> > >> -     };
> > >> +     struct regex_data * regex; /* backend dependent regular
expression data */
> > >>       mode_t mode;            /* mode format value */
> > >>       int matches;            /* number of matching pathnames */
> > >>       int stem_id;            /* indicates which stem-compression
item */
> > >> @@ -78,17 +77,6 @@ struct saved_data {
> > >>       struct mmap_area *mmap_areas;
> > >>  };
> > >>
> > >> -static inline pcre_extra *get_pcre_extra(struct spec *spec)
> > >> -{
> > >> -     if (spec->from_mmap) {
> > >> -             if (spec->lsd.study_data)
> > >> -                     return &spec->lsd;
> > >> -             else
> > >> -                     return NULL;
> > >> -     } else
> > >> -             return spec->sd;
> > >> -}
> > >> -
> > >>  static inline mode_t string_to_mode(char *mode)
> > >>  {
> > >>       size_t len;
> > >> @@ -331,13 +319,12 @@ static inline int next_entry(void *buf, struct
mmap_area *fp, size_t bytes)
> > >>  }
> > >>
> > >>  static inline int compile_regex(struct saved_data *data, struct
spec *spec,
> > >> -                                         const char **errbuf)
> > >> +                                         struct regex_error_data *
error_data)
> > >>  {
> > >> -     const char *tmperrbuf;
> > >>       char *reg_buf, *anchored_regex, *cp;
> > >>       struct stem *stem_arr = data->stem_arr;
> > >>       size_t len;
> > >> -     int erroff;
> > >> +     int rc;
> > >>
> > >>       if (spec->regcomp)
> > >>               return 0; /* already done */
> > >> @@ -361,19 +348,9 @@ static inline int compile_regex(struct
saved_data *data, struct spec *spec,
> > >>       *cp = '\0';
> > >>
> > >>       /* Compile the regular expression. */
> > >> -     spec->regex = pcre_compile(anchored_regex, PCRE_DOTALL,
&tmperrbuf,
> > >> -                                                 &erroff, NULL);
> > >> +     rc = regex_prepare_data(&spec->regex, anchored_regex,
error_data);
> > >>       free(anchored_regex);
> > >> -     if (!spec->regex) {
> > >> -             if (errbuf)
> > >> -                     *errbuf = tmperrbuf;
> > >> -             return -1;
> > >> -     }
> > >> -
> > >> -     spec->sd = pcre_study(spec->regex, 0, &tmperrbuf);
> > >> -     if (!spec->sd && tmperrbuf) {
> > >> -             if (errbuf)
> > >> -                     *errbuf = tmperrbuf;
> > >> +     if (rc < 0) {
> > >>               return -1;
> > >>       }
> > >>
> > >> @@ -394,7 +371,8 @@ static inline int process_line(struct
selabel_handle *rec,
> > >>       struct saved_data *data = (struct saved_data *)rec->data;
> > >>       struct spec *spec_arr;
> > >>       unsigned int nspec = data->nspec;
> > >> -     const char *errbuf = NULL;
> > >> +     char const *errbuf;
> > >> +     struct regex_error_data error_data;
> > >>
> > >>       items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type,
&context);
> > >>       if (items < 0) {
> > >> @@ -454,7 +432,7 @@ static inline int process_line(struct
selabel_handle *rec,
> > >>       data->nspec++;
> > >>
> > >>       if (rec->validating &&
> > >> -                         compile_regex(data, &spec_arr[nspec],
&errbuf)) {
> > >> +                         compile_regex(data, &spec_arr[nspec],
&error_data)) {
> > >>               COMPAT_LOG(SELINUX_ERROR,
> > >>                          "%s:  line %u has invalid regex %s:  %s\n",
> > >>                          path, lineno, regex,
> > >> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
> > >> new file mode 100644
> > >> index 0000000..6b92b04
> > >> --- /dev/null
> > >> +++ b/libselinux/src/regex.c
> > >> @@ -0,0 +1,405 @@
> > >> +#include <assert.h>
> > >> +#include <stdint.h>
> > >> +#include <stdio.h>
> > >> +#include <string.h>
> > >> +
> > >> +#include "regex.h"
> > >> +#include "label_file.h"
> > >> +
> > >> +int regex_prepare_data(struct regex_data ** regex, char const *
pattern_string,
> > >> +                     struct regex_error_data * errordata) {
> > >> +     memset(errordata, 0, sizeof(struct regex_error_data));
> > >> +     *regex = regex_data_create();
> > >> +     if (!(*regex))
> > >> +             return -1;
> > >> +#ifdef USE_PCRE2
> > >> +     (*regex)->regex = pcre2_compile((PCRE2_SPTR)pattern_string,
> > >> +                     PCRE2_ZERO_TERMINATED,
> > >> +                     PCRE2_DOTALL,
> > >> +                     &errordata->error_code,
> > >> +                     &errordata->error_offset, NULL);
> > >> +#else
> > >> +     (*regex)->regex = pcre_compile(pattern_string, PCRE_DOTALL,
> > >> +                                     &errordata->error_buffer,
> > >> +                                     &errordata->error_offset,
NULL);
> > >> +#endif
> > >> +     if (!(*regex)->regex) {
> > >> +             goto err;
> > >> +     }
> > >> +
> > >> +#ifdef USE_PCRE2
> > >> +     (*regex)->match_data =
> > >> +             pcre2_match_data_create_from_pattern((*regex)->regex,
NULL);
> > >> +     if (!(*regex)->match_data) {
> > >> +             goto err;
> > >> +     }
> > >> +#else
> > >> +     (*regex)->sd = pcre_study((*regex)->regex, 0,
&errordata->error_buffer);
> > >> +     if (!(*regex)->sd && errordata->error_buffer) {
> > >> +             goto err;
> > >> +     }
> > >> +     (*regex)->extra_owned = !!(*regex)->sd;
> > >> +#endif
> > >> +     return 0;
> > >> +
> > >> +err: regex_data_free(*regex);
> > >> +     *regex = NULL;
> > >> +     return -1;
> > >> +}
> > >> +
> > >> +char const * regex_version(void) {
> > >> +#ifdef USE_PCRE2
> > >> +     static int initialized = 0;
> > >> +     static char * version_string = NULL;
> > >> +     size_t version_string_len;
> > >> +     if (!initialized) {
> > >> +             version_string_len =
pcre2_config(PCRE2_CONFIG_VERSION, NULL);
> > >> +             version_string = (char*) malloc(version_string_len);
> > >> +             if (!version_string) {
> > >> +                     return NULL;
> > >> +             }
> > >> +             pcre2_config(PCRE2_CONFIG_VERSION, version_string);
> > >> +             initialized = 1;
> > >> +     }
> > >> +     return version_string;
> > >> +#else
> > >> +     return pcre_version();
> > >> +#endif
> > >> +}
> > >> +
> > >> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data
** regex) {
> > >> +     int rc;
> > >> +     size_t entry_len;
> > >> +#ifndef USE_PCRE2
> > >> +     size_t info_len;
> > >> +#endif
> > >> +
> > >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > >> +#ifdef USE_PCRE2
> > >> +     if (rc < 0)
> > >> +             return -1;
> > >> +
> > >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > >> +     /* this should yield exactly one because we store one pattern
at a time
> > >> +      */
> > >> +     rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
> > >> +     if (rc != 1)
> > >> +             return -1;
> > >> +
> > >> +     *regex = regex_data_create();
> > >> +     if (!*regex)
> > >> +             return -1;
> > >> +
> > >> +     rc = pcre2_serialize_decode(&(*regex)->regex, 1,
> > >> +                     (PCRE2_SPTR)mmap_area->next_addr, NULL);
> > >> +     if (rc != 1)
> > >> +             goto err;
> > >> +
> > >> +     (*regex)->match_data =
> > >> +             pcre2_match_data_create_from_pattern((*regex)->regex,
NULL);
> > >> +     if (!(*regex)->match_data)
> > >> +             goto err;
> > >> +
> > >> +#endif /* NO_PERSISTENTLY_STORED_PATTERNS */
> > >> +     /* and skip the decoded bit */
> > >> +     rc = next_entry(NULL, mmap_area, entry_len);
> > >> +     if (rc < 0)
> > >> +             goto err;
> > >> +
> > >> +     return 0;
> > >> +#else
> > >> +     if (rc < 0 || !entry_len) {
> > >> +             return -1;
> > >> +     }
> > >> +     *regex = regex_data_create();
> > >> +     if (!(*regex))
> > >> +             return -1;
> > >> +
> > >> +     (*regex)->extra_owned = 0;
> > >> +     (*regex)->regex = (pcre *) mmap_area->next_addr;
> > >> +     rc = next_entry(NULL, mmap_area, entry_len);
> > >> +     if (rc < 0)
> > >> +             goto err;
> > >> +
> > >> +     /* Check that regex lengths match. pcre_fullinfo()
> > >> +      * also validates its magic number. */
> > >> +     rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE,
&info_len);
> > >> +     if (rc < 0 || info_len != entry_len) {
> > >> +             goto err;
> > >> +     }
> > >> +
> > >> +     rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> > >> +     if (rc < 0 || !entry_len) {
> > >> +             goto err;
> > >> +     }
> > >> +
> > >> +     if (entry_len) {
> > >> +             (*regex)->lsd.study_data = (void *)
mmap_area->next_addr;
> > >> +             (*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
> > >> +             rc = next_entry(NULL, mmap_area, entry_len);
> > >> +             if (rc < 0)
> > >> +                     goto err;
> > >> +
> > >> +             /* Check that study data lengths match. */
> > >> +             rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
> > >> +                                PCRE_INFO_STUDYSIZE, &info_len);
> > >> +             if (rc < 0 || info_len != entry_len)
> > >> +                     goto err;
> > >> +     }
> > >> +     return 0;
> > >> +#endif
> > >> +err:
> > >> +     regex_data_free(*regex);
> > >> +     *regex = NULL;
> > >> +     return -1;
> > >> +}
> > >> +
> > >> +int regex_writef(struct regex_data * regex, FILE * fp) {
> > >> +#if !defined USE_PCRE2 || !defined NO_PERSISTENTLY_STORED_PATTERNS
> > >> +     int rc;
> > >> +#endif
> > >> +     size_t len;
> > >> +#ifdef USE_PCRE2
> > >> +     PCRE2_SIZE to_write;
> > >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > >> +     PCRE2_UCHAR * bytes;
> > >> +
> > >> +     /* encode the patter for serialization */
> > >> +     rc = pcre2_serialize_encode((const pcre2_code
**)&regex->regex, 1,
> > >> +                                 &bytes, &to_write, NULL);
> > >> +     if (rc != 1)
> > >> +             return -1;
> > >> +
> > >> +#else
> > >> +     (void)regex; // silence unused parameter warning
> > >> +     to_write = 0;
> > >> +#endif
> > >> +     /* write serialized pattern's size */
> > >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > >> +     if (len != 1) {
> > >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > >> +             pcre2_serialize_free(bytes);
> > >> +#endif
> > >> +             return -1;
> > >> +     }
> > >> +
> > >> +#ifndef NO_PERSISTENTLY_STORED_PATTERNS
> > >> +     /* write serialized pattern */
> > >> +     len = fwrite(bytes, 1, to_write, fp);
> > >> +     if (len != to_write) {
> > >> +             pcre2_serialize_free(bytes);
> > >> +             return -1;
> > >> +     }
> > >> +     pcre2_serialize_free(bytes);
> > >> +#endif
> > >> +#else
> > >> +     uint32_t to_write;
> > >> +     size_t size;
> > >> +     pcre_extra * sd = regex->extra_owned ? regex->sd :
> > >> +                     (regex->lsd.study_data ? &regex->lsd : NULL);
> > >> +
> > >> +     /* determine the size of the pcre data in bytes */
> > >> +     rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
> > >> +     if (rc < 0)
> > >> +             return -1;
> > >> +
> > >> +     /* write the number of bytes in the pcre data */
> > >> +     to_write = size;
> > >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > >> +     if (len != 1)
> > >> +             return -1;
> > >> +
> > >> +     /* write the actual pcre data as a char array */
> > >> +     len = fwrite(regex->regex, 1, to_write, fp);
> > >> +     if (len != to_write)
> > >> +             return -1;
> > >> +
> > >> +     if (sd) {
> > >> +             /* determine the size of the pcre study info */
> > >> +             rc = pcre_fullinfo(regex->regex, sd,
PCRE_INFO_STUDYSIZE,
> > >> +                             &size);
> > >> +             if (rc < 0)
> > >> +                     return -1;
> > >> +     } else
> > >> +             size = 0;
> > >> +
> > >> +     /* write the number of bytes in the pcre study data */
> > >> +     to_write = size;
> > >> +     len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
> > >> +     if (len != 1)
> > >> +             return -1;
> > >> +
> > >> +     if (sd) {
> > >> +             /* write the actual pcre study data as a char array */
> > >> +             len = fwrite(sd->study_data, 1, to_write, fp);
> > >> +             if (len != to_write)
> > >> +                     return -1;
> > >> +     }
> > >> +#endif
> > >> +     return 0;
> > >> +}
> > >> +
> > >> +struct regex_data * regex_data_create(void) {
> > >> +     struct regex_data * dummy = (struct regex_data*) malloc(
> > >> +                     sizeof(struct regex_data));
> > >> +     if (dummy) {
> > >> +             memset(dummy, 0, sizeof(struct regex_data));
> > >> +     }
> > >> +     return dummy;
> > >> +}
> > >> +
> > >> +void regex_data_free(struct regex_data * regex) {
> > >> +     if (regex) {
> > >> +#ifdef USE_PCRE2
> > >> +             if (regex->regex) {
> > >> +                     pcre2_code_free(regex->regex);
> > >> +             }
> > >> +             if (regex->match_data) {
> > >> +                     pcre2_match_data_free(regex->match_data);
> > >> +             }
> > >> +#else
> > >> +             if (regex->regex)
> > >> +                     pcre_free(regex->regex);
> > >> +             if (regex->extra_owned && regex->sd) {
> > >> +                     pcre_free_study(regex->sd);
> > >> +             }
> > >> +#endif
> > >> +             free(regex);
> > >> +     }
> > >> +}
> > >> +
> > >> +int regex_match(struct regex_data * regex, char const * subject,
int partial) {
> > >> +     int rc;
> > >> +#ifdef USE_PCRE2
> > >> +     rc = pcre2_match(regex->regex,
> > >> +                     (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
> > >> +                     partial ? PCRE2_PARTIAL_SOFT : 0,
regex->match_data,
> > >> +                     NULL);
> > >> +     if (rc > 0)
> > >> +     return REGEX_MATCH;
> > >> +     switch (rc) {
> > >> +             case PCRE2_ERROR_PARTIAL:
> > >> +                     return REGEX_MATCH_PARTIAL;
> > >> +             case PCRE2_ERROR_NOMATCH:
> > >> +                     return REGEX_NO_MATCH;
> > >> +             default:
> > >> +                     return REGEX_ERROR;
> > >> +     }
> > >> +#else
> > >> +     rc = pcre_exec(regex->regex,
> > >> +                     regex->extra_owned ? regex->sd : &regex->lsd,
subject,
> > >> +                     strlen(subject), 0, partial ?
PCRE_PARTIAL_SOFT : 0,
> > >> +                     NULL,
> > >> +                     0);
> > >> +     switch (rc) {
> > >> +             case 0:
> > >> +                     return REGEX_MATCH;
> > >> +             case PCRE_ERROR_PARTIAL:
> > >> +                     return REGEX_MATCH_PARTIAL;
> > >> +             case PCRE_ERROR_NOMATCH:
> > >> +                     return REGEX_NO_MATCH;
> > >> +             default:
> > >> +                     return REGEX_ERROR;
> > >> +     }
> > >> +#endif
> > >> +}
> > >> +
> > >> +/* TODO Replace this compare function with something that actually
compares the
> > >> + * regular expressions.
> > >> + * This compare function basically just compares the binary
representations of
> > >> + * the automatons, and because this representation contains
pointers and
> > >> + * metadata, it can only return a match if regex1 == regex2.
> > >> + * Preferably, this function would be replaced with an algorithm
that computes
> > >> + * the equivalence of the automatons systematically.
> > >> + */
> > >> +int regex_cmp(struct regex_data * regex1, struct regex_data *
regex2) {
> > >> +     int rc;
> > >> +     size_t len1, len2;
> > >> +#ifdef USE_PCRE2
> > >> +     rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
> > >> +     assert(rc == 0);
> > >> +     rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
> > >> +     assert(rc == 0);
> > >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> > >> +             return SELABEL_INCOMPARABLE;
> > >> +#else
> > >> +     rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
> > >> +     assert(rc == 0);
> > >> +     rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
> > >> +     assert(rc == 0);
> > >> +     if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
> > >> +             return SELABEL_INCOMPARABLE;
> > >> +#endif
> > >> +     return SELABEL_EQUAL;
> > >> +}
> > >> +
> > >> +void regex_format_error(struct regex_error_data const * error_data,
> > >> +                     char * buffer, size_t buf_size) {
> > >> +     unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
> > >> +     char * ptr = &buffer[buf_size - the_end_length];
> > >> +     int rc = 0;
> > >> +     size_t pos = 0;
> > >> +     if (!buffer || !buf_size)
> > >> +             return;
> > >> +     rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
> > >> +     if (rc < 0)
> > >> +             /* If snprintf fails it constitutes a logical error
that needs
> > >> +              * fixing.
> > >> +              */
> > >> +             abort();
> > >> +
> > >> +     pos += rc;
> > >> +     if (pos >= buf_size)
> > >> +             goto truncated;
> > >> +
> > >> +     if (error_data->error_offset > 0) {
> > >> +#ifdef USE_PCRE2
> > >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset
%zu: ",
> > >> +                             error_data->error_offset);
> > >> +#else
> > >> +             rc = snprintf(buffer + pos, buf_size - pos, "At offset
%d: ",
> > >> +                             error_data->error_offset);
> > >> +#endif
> > >> +             if (rc < 0)
> > >> +                     abort();
> > >> +
> > >> +     }
> > >> +     pos += rc;
> > >> +     if (pos >= buf_size)
> > >> +             goto truncated;
> > >> +
> > >> +#ifdef USE_PCRE2
> > >> +     rc = pcre2_get_error_message(error_data->error_code,
> > >> +                     (PCRE2_UCHAR*)(buffer + pos),
> > >> +                     buf_size - pos);
> > >> +     if (rc == PCRE2_ERROR_NOMEMORY)
> > >> +             goto truncated;
> > >> +#else
> > >> +     rc = snprintf(buffer + pos, buf_size - pos, "%s",
> > >> +                     error_data->error_buffer);
> > >> +     if (rc < 0)
> > >> +             abort();
> > >> +
> > >> +     if ((size_t)rc < strlen(error_data->error_buffer))
> > >> +             goto truncated;
> > >> +#endif
> > >> +
> > >> +     return;
> > >> +
> > >> +truncated:
> > >> +     /* replace end of string with "..." to indicate that it was
truncated */
> > >> +     switch (the_end_length) {
> > >> +             /* no break statements, fall-through is intended */
> > >> +             case 4:
> > >> +                     *ptr++ = '.';
> > >> +             case 3:
> > >> +                     *ptr++ = '.';
> > >> +             case 2:
> > >> +                     *ptr++ = '.';
> > >> +             case 1:
> > >> +                     *ptr++ = '\0';
> > >> +             default:
> > >> +                     break;
> > >> +     }
> > >> +     return;
> > >> +}
> > >> diff --git a/libselinux/src/regex.h b/libselinux/src/regex.h
> > >> new file mode 100644
> > >> index 0000000..bdc10c0
> > >> --- /dev/null
> > >> +++ b/libselinux/src/regex.h
> > >> @@ -0,0 +1,168 @@
> > >> +#ifndef SRC_REGEX_H_
> > >> +#define SRC_REGEX_H_
> > >> +
> > >> +#include <stdio.h>
> > >> +
> > >> +#ifdef USE_PCRE2
> > >> +#include <pcre2.h>
> > >> +#else
> > >> +#include <pcre.h>
> > >> +#endif
> > >> +
> > >> +enum {
> > >> +     REGEX_MATCH,
> > >> +     REGEX_MATCH_PARTIAL,
> > >> +     REGEX_NO_MATCH,
> > >> +     REGEX_ERROR = -1,
> > >> +};
> > >> +
> > >> +#ifdef USE_PCRE2
> > >> +struct regex_data {
> > >> +     pcre2_code * regex; /* compiled regular expression */
> > >> +     pcre2_match_data * match_data; /* match data block required
for the compiled
> > >> +      pattern in regex2 */
> > >> +};
> > >> +
> > >> +struct regex_error_data {
> > >> +     int error_code;
> > >> +     PCRE2_SIZE error_offset;
> > >> +};
> > >> +
> > >> +/* ^^^^^^ USE_PCRE2  ^^^^^^ */
> > >> +#else
> > >> +/* vvvvvv USE_PCRE vvvvvv */
> > >> +
> > >> +/* Prior to version 8.20, libpcre did not have pcre_free_study() */
> > >> +#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
> > >> +#define pcre_free_study  pcre_free
> > >> +#endif
> > >> +
> > >> +struct regex_data {
> > >> +     pcre *regex; /* compiled regular expression */
> > >> +     int extra_owned; /* non zero if pcre_extra is owned by this
structure
> > >> +                       * and thus must be freed on destruction.
> > >> +                       */
> > >> +     union {
> > >> +             pcre_extra *sd; /* pointer to extra compiled stuff */
> > >> +             pcre_extra lsd; /* used to hold the mmap'd version */
> > >> +     };
> > >> +};
> > >> +
> > >> +struct regex_error_data {
> > >> +     char const * error_buffer;
> > >> +     int error_offset;
> > >> +};
> > >> +
> > >> +#endif /* USE_PCRE2 */
> > >> +
> > >> +struct mmap_area;
> > >> +
> > >> +/**
> > >> + * regex_verison returns the version string of the underlying
regular
> > >> + * regular expressions library. In the case of PCRE it just returns
the
> > >> + * result of pcre_version(). In the case of PCRE2, the very first
time this
> > >> + * function is called it allocates a buffer large enough to hold
the version
> > >> + * string and reads the PCRE2_CONFIG_VERSION option to fill the
buffer.
> > >> + * The allocated buffer will linger in memory until the calling
process is being
> > >> + * reaped.
> > >> + *
> > >> + * It may return NULL on error.
> > >> + */
> > >> +char const * regex_version(void);
> > >> +/**
> > >> + * This constructor function allocates a buffer for a regex_data
structure.
> > >> + * The buffer is being initialized with zeroes.
> > >> + */
> > >> +struct regex_data * regex_data_create(void);
> > >> +/**
> > >> + * This complementary destructor function frees the a given
regex_data buffer.
> > >> + * It also frees any non NULL member pointers with the appropriate
pcreX_X_free
> > >> + * function. For PCRE this function respects the extra_owned field
and frees
> > >> + * the pcre_extra data conditionally. Calling this function on a
NULL pointer is
> > >> + * save.
> > >> + */
> > >> +void regex_data_free(struct regex_data * regex);
> > >> +/**
> > >> + * This function compiles the regular expression. Additionally, it
prepares
> > >> + * data structures required by the different underlying engines.
For PCRE
> > >> + * it calls pcre_study to generate optional data required for
optimized
> > >> + * execution of the compiled pattern. In the case of PCRE2, it
allocates
> > >> + * a pcre2_match_data structure of appropriate size to hold all
possible
> > >> + * matches created by the pattern.
> > >> + *
> > >> + * @arg regex If successful, the structure returned through *regex
was allocated
> > >> + *            with regex_data_create and must be freed with
regex_data_free.
> > >> + * @arg pattern_string The pattern string that is to be compiled.
> > >> + * @arg errordata A pointer to a regex_error_data structure must be
passed
> > >> + *                to this function. This structure depends on the
underlying
> > >> + *                implementation. It can be passed to
regex_format_error
> > >> + *                to generate a human readable error message.
> > >> + * @retval 0 on success
> > >> + * @retval -1 on error
> > >> + */
> > >> +int regex_prepare_data(struct regex_data ** regex, char const *
pattern_string,
> > >> +                     struct regex_error_data * errordata);
> > >> +/**
> > >> + * This function loads a serialized precompiled pattern from a
contiguous
> > >> + * data region given by map_area.
> > >> + *
> > >> + * @arg map_area Description of the memory region holding a
serialized
> > >> + *               representation of the precompiled pattern.
> > >> + * @arg regex If successful, the structure returned through *regex
was allocated
> > >> + *            with regex_data_create and must be freed with
regex_data_free.
> > >> + *
> > >> + * @retval 0 on success
> > >> + * @retval -1 on error
> > >> + */
> > >> +int regex_load_mmap(struct mmap_area * map_area, struct regex_data
** regex);
> > >> +/**
> > >> + * This function stores a precompiled regular expression to a file.
> > >> + * In the case of PCRE, it just dumps the binary representation of
the
> > >> + * precomplied pattern into a file. In the case of PCRE2, it uses
the
> > >> + * serialization function provided by the library.
> > >> + *
> > >> + * @arg regex The precomplied regular expression data.
> > >> + * @arg fp A file stream specifying the output file.
> > >> + */
> > >> +int regex_writef(struct regex_data * regex, FILE * fp);
> > >> +/**
> > >> + * This function applies a precompiled pattern to a subject string
and
> > >> + * returns whether or not a match was found.
> > >> + *
> > >> + * @arg regex The precompiled pattern.
> > >> + * @arg subject The subject string.
> > >> + * @arg partial Boolean indicating if partial matches are wanted. A
nonzero
> > >> + *              value is equivalent to specifying
PCRE[2]_PARTIAL_SOFT as
> > >> + *              option to pcre_exec of pcre2_match.
> > >> + * @retval REGEX_MATCH if a match was found
> > >> + * @retval REGEX_MATCH_PARTIAL if a partial match was found
> > >> + * @retval REGEX_NO_MATCH if no match was found
> > >> + * @retval REGEX_ERROR if an error was encountered during the
execution of the
> > >> + *                     regular expression
> > >> + */
> > >> +int regex_match(struct regex_data * regex, char const * subject,
int partial);
> > >> +/**
> > >> + * This function compares two compiled regular expressions (regex1
and regex2).
> > >> + * It compares the binary representations of the compiled patterns.
It is a very
> > >> + * crude approximation because the binary representation holds data
like
> > >> + * reference counters, that has nothing to do with the actual state
machine.
> > >> + *
> > >> + * @retval SELABEL_EQUAL if the pattern's binary representations
are exactly
> > >> + *                       the same
> > >> + * @retval SELABEL_INCOMPARABLE otherwise
> > >> + */
> > >> +int regex_cmp(struct regex_data * regex1, struct regex_data *
regex2);
> > >> +/**
> > >> + * This function takes the error data returned by
regex_prepare_data and turns
> > >> + * it in to a human readable error message.
> > >> + * If the buffer given to hold the error message is to small it
truncates the
> > >> + * message and indicates the truncation with an ellipsis ("...") at
the end of
> > >> + * the buffer.
> > >> + *
> > >> + * @arg error_data Error data as returned by regex_prepare_data.
> > >> + * @arg buffer String buffer to hold the formated error string.
> > >> + * @arg buf_size Total size of the given bufer in bytes.
> > >> + */
> > >> +void regex_format_error(struct regex_error_data const * error_data,
> > >> +                     char * buffer, size_t buf_size);
> > >> +#endif  /* SRC_REGEX_H_ */
> > >> diff --git a/libselinux/utils/Makefile b/libselinux/utils/Makefile
> > >> index 8497cb4..1e7a048 100644
> > >> --- a/libselinux/utils/Makefile
> > >> +++ b/libselinux/utils/Makefile
> > >> @@ -24,12 +24,12 @@ CFLAGS ?= -O -Wall -W -Wundef -Wformat-y2k
-Wformat-security -Winit-self -Wmissi
> > >>            -fasynchronous-unwind-tables -fdiagnostics-show-option
-funit-at-a-time \
> > >>            -fipa-pure-const -Wno-suggest-attribute=pure
-Wno-suggest-attribute=const \
> > >>            -Werror -Wno-aggregate-return -Wno-redundant-decls
> > >> -override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
$(EMFLAGS)
> > >> +override CFLAGS += -I../include -I$(INCLUDEDIR) -D_GNU_SOURCE
$(EMFLAGS) $(PCRE_CFLAGS)
> > >>  LDLIBS += -L../src -lselinux -L$(LIBDIR)
> > >>
> > >>  TARGETS=$(patsubst %.c,%,$(wildcard *.c))
> > >>
> > >> -sefcontext_compile: LDLIBS += -lpcre ../src/libselinux.a -lsepol
> > >> +sefcontext_compile: LDLIBS += $(PCRE_LDFLAGS) ../src/libselinux.a
-lsepol
> > >>
> > >>  selinux_restorecon: LDLIBS += -lsepol
> > >>
> > >> diff --git a/libselinux/utils/sefcontext_compile.c
b/libselinux/utils/sefcontext_compile.c
> > >> index fd6fb78..8ff73f4 100644
> > >> --- a/libselinux/utils/sefcontext_compile.c
> > >> +++ b/libselinux/utils/sefcontext_compile.c
> > >> @@ -1,6 +1,5 @@
> > >>  #include <ctype.h>
> > >>  #include <errno.h>
> > >> -#include <pcre.h>
> > >>  #include <stdint.h>
> > >>  #include <stdio.h>
> > >>  #include <string.h>
> > >> @@ -13,6 +12,7 @@
> > >>  #include <sepol/sepol.h>
> > >>
> > >>  #include "../src/label_file.h"
> > >> +#include "../src/regex.h"
> > >>
> > >>  const char *policy_file;
> > >>  static int ctx_err;
> > >> @@ -119,12 +119,14 @@ static int write_binary_file(struct saved_data
*data, int fd)
> > >>       if (len != 1)
> > >>               goto err;
> > >>
> > >> -     /* write the pcre version */
> > >> -     section_len = strlen(pcre_version());
> > >> +     /* write version of the regex back-end */
> > >> +     if (!regex_version())
> > >> +             goto err;
> > >> +     section_len = strlen(regex_version());
> > >>       len = fwrite(&section_len, sizeof(uint32_t), 1, bin_file);
> > >>       if (len != 1)
> > >>               goto err;
> > >> -     len = fwrite(pcre_version(), sizeof(char), section_len,
bin_file);
> > >> +     len = fwrite(regex_version(), sizeof(char), section_len,
bin_file);
> > >>       if (len != section_len)
> > >>               goto err;
> > >>
> > >> @@ -162,10 +164,8 @@ static int write_binary_file(struct saved_data
*data, int fd)
> > >>               mode_t mode = specs[i].mode;
> > >>               size_t prefix_len = specs[i].prefix_len;
> > >>               int32_t stem_id = specs[i].stem_id;
> > >> -             pcre *re = specs[i].regex;
> > >> -             pcre_extra *sd = get_pcre_extra(&specs[i]);
> > >> +             struct regex_data *re = specs[i].regex;
> > >>               uint32_t to_write;
> > >> -             size_t size;
> > >>
> > >>               /* length of the context string (including nul) */
> > >>               to_write = strlen(context) + 1;
> > >> @@ -212,42 +212,10 @@ static int write_binary_file(struct saved_data
*data, int fd)
> > >>               if (len != 1)
> > >>                       goto err;
> > >>
> > >> -             /* determine the size of the pcre data in bytes */
> > >> -             rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
> > >> +             /* Write regex related data */
> > >> +             rc = regex_writef(re, bin_file);
> > >>               if (rc < 0)
> > >>                       goto err;
> > >> -
> > >> -             /* write the number of bytes in the pcre data */
> > >> -             to_write = size;
> > >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> > >> -             if (len != 1)
> > >> -                     goto err;
> > >> -
> > >> -             /* write the actual pcre data as a char array */
> > >> -             len = fwrite(re, 1, to_write, bin_file);
> > >> -             if (len != to_write)
> > >> -                     goto err;
> > >> -
> > >> -             if (sd) {
> > >> -                     /* determine the size of the pcre study info */
> > >> -                     rc = pcre_fullinfo(re, sd,
PCRE_INFO_STUDYSIZE, &size);
> > >> -                     if (rc < 0)
> > >> -                             goto err;
> > >> -             } else
> > >> -                     size = 0;
> > >> -
> > >> -             /* write the number of bytes in the pcre study data */
> > >> -             to_write = size;
> > >> -             len = fwrite(&to_write, sizeof(uint32_t), 1, bin_file);
> > >> -             if (len != 1)
> > >> -                     goto err;
> > >> -
> > >> -             if (sd) {
> > >> -                     /* write the actual pcre study data as a char
array */
> > >> -                     len = fwrite(sd->study_data, 1, to_write,
bin_file);
> > >> -                     if (len != to_write)
> > >> -                             goto err;
> > >> -             }
> > >>       }
> > >>
> > >>       rc = 0;
> > >> @@ -270,8 +238,7 @@ static void free_specs(struct saved_data *data)
> > >>               free(specs[i].lr.ctx_trans);
> > >>               free(specs[i].regex_str);
> > >>               free(specs[i].type_str);
> > >> -             pcre_free(specs[i].regex);
> > >> -             pcre_free_study(specs[i].sd);
> > >> +             regex_data_free(specs[i].regex);
> > >>       }
> > >>       free(specs);
> > >>
> > >>
> > >
> > > _______________________________________________
> > > Selinux mailing list
> > > Selinux@tycho.nsa.gov
> > > To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> > > To get help, send an email containing "help" to
Selinux-request@tycho.nsa.gov.
> >
> >
> >
> > --
> > Respectfully,
> >
> > William C Roberts
> > _______________________________________________
> > Selinux mailing list
> > Selinux@tycho.nsa.gov
> > To unsubscribe, send email to Selinux-leave@tycho.nsa.gov.
> > To get help, send an email containing "help" to
Selinux-request@tycho.nsa.gov.

[-- Attachment #2: Type: text/html, Size: 75411 bytes --]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 18:25 ` Stephen Smalley
@ 2016-09-07 18:38   ` Stephen Smalley
  2016-09-07 19:29   ` Stephen Smalley
  1 sibling, 0 replies; 13+ messages in thread
From: Stephen Smalley @ 2016-09-07 18:38 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2; +Cc: Janis Danisevskis

On 09/07/2016 02:25 PM, Stephen Smalley wrote:
> On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>>
>> Also, for pcre2 the persistent format is architecture dependant.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used and
>> sefcontext_compile shall generate portable output, it and libselinux
>> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
>> cost of having to recompile the regular expressions at load time.
>>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>>  libselinux/Makefile                   |  13 ++
>>  libselinux/src/Makefile               |   4 +-
>>  libselinux/src/label_file.c           |  91 ++------
>>  libselinux/src/label_file.h           |  54 ++---
>>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>>  libselinux/src/regex.h                | 168 ++++++++++++++
>>  libselinux/utils/Makefile             |   4 +-
>>  libselinux/utils/sefcontext_compile.c |  53 +----
>>  8 files changed, 637 insertions(+), 155 deletions(-)
>>  create mode 100644 libselinux/src/regex.c
>>  create mode 100644 libselinux/src/regex.h
>>
> 
>> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
>> index c89bb35..6698624 100644
>> --- a/libselinux/src/label_file.c
>> +++ b/libselinux/src/label_file.c
>> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>  
>>  		spec = &data->spec_arr[data->nspec];
>>  		spec->from_mmap = 1;
>> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
>> +		spec->regcomp = 0;
>> +#else
>>  		spec->regcomp = 1;
>> +#endif
> 
> If we still need this, maybe regex_load_mmap() should take
> &spec->regcomp as an argument and set it internally so that we don't
> need to litter this file with #ifdefs?
> 
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..a2e30e5 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>>  	struct saved_data *data = (struct saved_data *)rec->data;
>>  	struct spec *spec_arr;
>>  	unsigned int nspec = data->nspec;
>> -	const char *errbuf = NULL;
>> +	char const *errbuf;
>> +	struct regex_error_data error_data;
>>  
>>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>>  	if (items < 0) {
>> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>>  	data->nspec++;
>>  
>>  	if (rec->validating &&
>> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
>> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {
>>  		COMPAT_LOG(SELINUX_ERROR,
>>  			   "%s:  line %u has invalid regex %s:  %s\n",
>>  			   path, lineno, regex,
> 
> On the next line (omitted from the diff) we pass errbuf if set as the
> error string.  But your error is hidden in error_data.  Looks like we
> need to use regex_format_error() here?
> 
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..6b92b04
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
>> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
>> +	int rc;
>> +	size_t entry_len;
>> +#ifndef USE_PCRE2
>> +	size_t info_len;
>> +#endif
>> +
>> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> 
> This and similar statements are the cause of your uninitialised variable
> use warnings.  entry_len needs to be a uint32_t here.  size_t is 64 bits
> on 64-bit architectures.  Same for info_len.
> 
>> +struct regex_data * regex_data_create(void) {
>> +	struct regex_data * dummy = (struct regex_data*) malloc(
>> +			sizeof(struct regex_data));
>> +	if (dummy) {
>> +		memset(dummy, 0, sizeof(struct regex_data));
>> +	}
>> +	return dummy;
>> +}
>> +
>> +void regex_data_free(struct regex_data * regex) {
>> +	if (regex) {
>> +#ifdef USE_PCRE2
>> +		if (regex->regex) {
>> +			pcre2_code_free(regex->regex);
>> +		}
>> +		if (regex->match_data) {
>> +			pcre2_match_data_free(regex->match_data);
>> +		}
>> +#else
>> +		if (regex->regex)
>> +			pcre_free(regex->regex);
>> +		if (regex->extra_owned && regex->sd) {
>> +			pcre_free_study(regex->sd);
>> +		}
>> +#endif
>> +		free(regex);
>> +	}
>> +}
> 
> The reason you are leaking memory is that regex_data_free() is only ever
> called if !spec->from_mmap.  The old code in closef() to free the
> compiled regexes was only necessary when the regexes were compiled at
> runtime, but you have introduced a memory allocation for regex_data even
> for the mmap'd file that needs to be freed.

And if you move your regex_data_free() call up, you need to make sure
you don't call any _free() functions on anything other than your
top-level data structure if it is from_mmap.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 1/2] libselinux: add support for pcre2
  2016-09-07 18:25 ` Stephen Smalley
  2016-09-07 18:38   ` Stephen Smalley
@ 2016-09-07 19:29   ` Stephen Smalley
  1 sibling, 0 replies; 13+ messages in thread
From: Stephen Smalley @ 2016-09-07 19:29 UTC (permalink / raw)
  To: Janis Danisevskis, selinux, seandroid-list, jwcart2; +Cc: Janis Danisevskis

On 09/07/2016 02:25 PM, Stephen Smalley wrote:
> On 09/07/2016 04:08 AM, Janis Danisevskis wrote:
>> From: Janis Danisevskis <jdanis@google.com>
>>
>> This patch moves all pcre1/2 dependencies into the new files regex.h
>> and regex.c implementing the common denominator of features needed
>> by libselinux. The compiler flag -DUSE_PCRE2 toggles between the
>> used implementations.
>>
>> As of this patch libselinux supports either pcre or pcre2 but not
>> both at the same time. The persistently stored file contexts
>> information differs. This means libselinux can only load file
>> context files generated by sefcontext_compile build with the
>> same pcre variant.
>>
>> Also, for pcre2 the persistent format is architecture dependant.
>> Stored precompiled regular expressions can only be used on the
>> same architecture they were generated on. If pcre2 is used and
>> sefcontext_compile shall generate portable output, it and libselinux
>> must be compiled with -DNO_PERSISTENTLY_STORED_PATTERNS, at the
>> cost of having to recompile the regular expressions at load time.
>>
>> Signed-off-by: Janis Danisevskis <jdanis@google.com>
>> ---
>>  libselinux/Makefile                   |  13 ++
>>  libselinux/src/Makefile               |   4 +-
>>  libselinux/src/label_file.c           |  91 ++------
>>  libselinux/src/label_file.h           |  54 ++---
>>  libselinux/src/regex.c                | 405 ++++++++++++++++++++++++++++++++++
>>  libselinux/src/regex.h                | 168 ++++++++++++++
>>  libselinux/utils/Makefile             |   4 +-
>>  libselinux/utils/sefcontext_compile.c |  53 +----
>>  8 files changed, 637 insertions(+), 155 deletions(-)
>>  create mode 100644 libselinux/src/regex.c
>>  create mode 100644 libselinux/src/regex.h
>>
> 
>> diff --git a/libselinux/src/label_file.c b/libselinux/src/label_file.c
>> index c89bb35..6698624 100644
>> --- a/libselinux/src/label_file.c
>> +++ b/libselinux/src/label_file.c
>> @@ -278,7 +280,11 @@ static int load_mmap(struct selabel_handle *rec, const char *path,
>>  
>>  		spec = &data->spec_arr[data->nspec];
>>  		spec->from_mmap = 1;
>> +#if defined USE_PCRE2 && defined NO_PERSISTENTLY_STORED_PATTERNS
>> +		spec->regcomp = 0;
>> +#else
>>  		spec->regcomp = 1;
>> +#endif
> 
> If we still need this, maybe regex_load_mmap() should take
> &spec->regcomp as an argument and set it internally so that we don't
> need to litter this file with #ifdefs?
> 
>> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
>> index 6d1e890..a2e30e5 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -394,7 +371,8 @@ static inline int process_line(struct selabel_handle *rec,
>>  	struct saved_data *data = (struct saved_data *)rec->data;
>>  	struct spec *spec_arr;
>>  	unsigned int nspec = data->nspec;
>> -	const char *errbuf = NULL;
>> +	char const *errbuf;
>> +	struct regex_error_data error_data;
>>  
>>  	items = read_spec_entries(line_buf, &errbuf, 3, &regex, &type, &context);
>>  	if (items < 0) {
>> @@ -454,7 +432,7 @@ static inline int process_line(struct selabel_handle *rec,
>>  	data->nspec++;
>>  
>>  	if (rec->validating &&
>> -			    compile_regex(data, &spec_arr[nspec], &errbuf)) {
>> +			    compile_regex(data, &spec_arr[nspec], &error_data)) {
>>  		COMPAT_LOG(SELINUX_ERROR,
>>  			   "%s:  line %u has invalid regex %s:  %s\n",
>>  			   path, lineno, regex,
> 
> On the next line (omitted from the diff) we pass errbuf if set as the
> error string.  But your error is hidden in error_data.  Looks like we
> need to use regex_format_error() here?
> 
>> diff --git a/libselinux/src/regex.c b/libselinux/src/regex.c
>> new file mode 100644
>> index 0000000..6b92b04
>> --- /dev/null
>> +++ b/libselinux/src/regex.c
>> +int regex_load_mmap(struct mmap_area * mmap_area, struct regex_data ** regex) {
>> +	int rc;
>> +	size_t entry_len;
>> +#ifndef USE_PCRE2
>> +	size_t info_len;
>> +#endif
>> +
>> +	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
> 
> This and similar statements are the cause of your uninitialised variable
> use warnings.  entry_len needs to be a uint32_t here.  size_t is 64 bits
> on 64-bit architectures.  Same for info_len.

Oops, sorry, never mind about info_len.  That one stays as size_t.

> 
>> +struct regex_data * regex_data_create(void) {
>> +	struct regex_data * dummy = (struct regex_data*) malloc(
>> +			sizeof(struct regex_data));
>> +	if (dummy) {
>> +		memset(dummy, 0, sizeof(struct regex_data));
>> +	}
>> +	return dummy;
>> +}
>> +
>> +void regex_data_free(struct regex_data * regex) {
>> +	if (regex) {
>> +#ifdef USE_PCRE2
>> +		if (regex->regex) {
>> +			pcre2_code_free(regex->regex);
>> +		}
>> +		if (regex->match_data) {
>> +			pcre2_match_data_free(regex->match_data);
>> +		}
>> +#else
>> +		if (regex->regex)
>> +			pcre_free(regex->regex);
>> +		if (regex->extra_owned && regex->sd) {
>> +			pcre_free_study(regex->sd);
>> +		}
>> +#endif
>> +		free(regex);
>> +	}
>> +}
> 
> The reason you are leaking memory is that regex_data_free() is only ever
> called if !spec->from_mmap.  The old code in closef() to free the
> compiled regexes was only necessary when the regexes were compiled at
> runtime, but you have introduced a memory allocation for regex_data even
> for the mmap'd file that needs to be freed.
> 
> 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-09-07 19:29 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-07  8:08 [PATCH 1/2] libselinux: add support for pcre2 Janis Danisevskis
2016-09-07  8:08 ` [PATCH 2/2] libselinux: fix memory leak on pcre2 Janis Danisevskis
2016-09-07 15:02 ` [PATCH 1/2] libselinux: add support for pcre2 Stephen Smalley
2016-09-07 15:37   ` William Roberts
2016-09-07 15:38     ` Janis Danisevskis
2016-09-07 16:40   ` William Roberts
2016-09-07 18:29     ` Jason Zaman
     [not found]       ` <CAFftDdqSUHPVVn7megAAHyjn_14XvVqn+8ukywr8nCgwMH4X3g@mail.gmail.com>
2016-09-07 18:36         ` William Roberts
2016-09-07 17:16   ` Janis Danisevskis
2016-09-07 15:19 ` William Roberts
2016-09-07 18:25 ` Stephen Smalley
2016-09-07 18:38   ` Stephen Smalley
2016-09-07 19:29   ` Stephen Smalley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).