All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/2] Generalize tokenizer and remove sscanf calls from libsepol
@ 2015-07-27 13:21 Yuli Khodorkovskiy
  2015-07-27 13:21 ` [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter Yuli Khodorkovskiy
  2015-07-27 13:21 ` [PATCH 2/2] libsepol: Replace sscanf in module_to_cil Yuli Khodorkovskiy
  0 siblings, 2 replies; 5+ messages in thread
From: Yuli Khodorkovskiy @ 2015-07-27 13:21 UTC (permalink / raw)
  To: selinux

Libsepol uses %ms in sscanf, which certain platforms do not support. Since
Richard Haines already added support for this in libselinux we modified and
added the new tokenize() function to libsepol and libselinux.

Modifications were necessary to preserve existing libsepol functionality such as
splitting on non-whitespace delimiters and not squashing consecutive delimiters.

Yuli Khodorkovskiy (2):
  libselinux: generalize read_spec_entries for any delimiter
  libsepol: Replace sscanf in module_to_cil

 libselinux/src/label_android_property.c |  11 ++-
 libselinux/src/label_file.h             |  10 ++-
 libselinux/src/label_internal.h         |   7 +-
 libselinux/src/label_support.c          | 129 +++++++++++++++++++-------------
 libsepol/include/sepol/policydb/util.h  |   6 ++
 libsepol/src/module_to_cil.c            |  61 ++++++++++++---
 libsepol/src/util.c                     | 102 +++++++++++++++++++++++++
 7 files changed, 255 insertions(+), 71 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter
  2015-07-27 13:21 [PATCH 0/2] Generalize tokenizer and remove sscanf calls from libsepol Yuli Khodorkovskiy
@ 2015-07-27 13:21 ` Yuli Khodorkovskiy
  2015-07-27 16:18   ` Stephen Smalley
  2015-07-27 13:21 ` [PATCH 2/2] libsepol: Replace sscanf in module_to_cil Yuli Khodorkovskiy
  1 sibling, 1 reply; 5+ messages in thread
From: Yuli Khodorkovskiy @ 2015-07-27 13:21 UTC (permalink / raw)
  To: selinux

This patch allows the tokenizer previously introduced for libselinux to
use any delimiter for tokenization. Delimiters may be squashed by
setting the squash_delim flag to 1. When any whitespace delimiter is used,
isspace() allows any whitespace character to be used as the delimiter.

Previously, read_spec_entries() trimmed whitespace and ignored comments and
empty lines. This functionality has been removed as is now the
responsiblity of the caller to implement.

Signed-off-by: Yuli Khodorkovskiy <ykhodorkovskiy@tresys.com>
---
 libselinux/src/label_android_property.c |  11 ++-
 libselinux/src/label_file.h             |  10 ++-
 libselinux/src/label_internal.h         |   7 +-
 libselinux/src/label_support.c          | 129 +++++++++++++++++++-------------
 4 files changed, 97 insertions(+), 60 deletions(-)

diff --git a/libselinux/src/label_android_property.c b/libselinux/src/label_android_property.c
index 4af9896..5c06ed3 100644
--- a/libselinux/src/label_android_property.c
+++ b/libselinux/src/label_android_property.c
@@ -90,9 +90,14 @@ static int process_line(struct selabel_handle *rec,
 	spec_t *spec_arr = data->spec_arr;
 	unsigned int nspec = data->nspec;
 
-	items = read_spec_entries(line_buf, 2, &prop, &context);
-	if (items <= 0)
-		return items;
+	trim_buf(&line_buf);
+
+	/* Skip comment lines and empty lines. */
+	if (*line_buf == '#' || *line_buf == '\0') {
+		return 0;
+	}
+
+	items = tokenize(line_buf, 1, ' ', 2, &prop, &context);
 	if (items != 2) {
 		selinux_log(SELINUX_WARNING,
 			    "%s:  line %u is missing fields, skipping\n", path,
diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
index db961ba..3499da1 100644
--- a/libselinux/src/label_file.h
+++ b/libselinux/src/label_file.h
@@ -387,10 +387,14 @@ static inline int process_line(struct selabel_handle *rec,
 	unsigned int nspec = data->nspec;
 	const char *errbuf = NULL;
 
-	items = read_spec_entries(line_buf, 3, &regex, &type, &context);
-	if (items <= 0)
-		return items;
+	trim_buf(&line_buf);
 
+	/* Skip comment lines and empty lines. */
+	if (*line_buf == '#' || *line_buf == '\0') {
+		return 0;
+	}
+
+	items = tokenize(line_buf, 1, ' ', 3, &regex, &type, &context);
 	if (items < 2) {
 		COMPAT_LOG(SELINUX_WARNING,
 			    "%s:  line %u is missing fields, skipping\n", path,
diff --git a/libselinux/src/label_internal.h b/libselinux/src/label_internal.h
index 861eca1..81a4f37 100644
--- a/libselinux/src/label_internal.h
+++ b/libselinux/src/label_internal.h
@@ -108,9 +108,10 @@ compat_validate(struct selabel_handle *rec,
 		const char *path, unsigned lineno) hidden;
 
 /*
- * The read_spec_entries function may be used to
- * replace sscanf to read entries from spec files.
+ * The tokenize function may be used to
+ * replace sscanf.
  */
-extern int read_spec_entries(char *line_buf, int num_args, ...);
+extern int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...);
+extern void trim_buf(char **buf);
 
 #endif				/* _SELABEL_INTERNAL_H_ */
diff --git a/libselinux/src/label_support.c b/libselinux/src/label_support.c
index b3ab8ab..08ef303 100644
--- a/libselinux/src/label_support.c
+++ b/libselinux/src/label_support.c
@@ -11,88 +11,115 @@
 #include "label_internal.h"
 
 /*
- * The read_spec_entries and read_spec_entry functions may be used to
- * replace sscanf to read entries from spec files. The file and
- * property services now use these.
+ * The tokenize and tokenize_str functions may be used to
+ * replace sscanf to read tokens from buffers.
  */
 
-/* Read an entry from a spec file (e.g. file_contexts) */
-static inline int read_spec_entry(char **entry, char **ptr, int *len)
+/* Read a token from a buffer */
+static inline int tokenize_str(int squash_delim, char delim, char **str, char **ptr, int *len)
 {
-	*entry = NULL;
-	char *tmp_buf = NULL;
+	char *tmp_buf = *ptr;
+	*str = NULL;
+
+	while (**ptr != '\0') {
+		if (isspace(delim) && isspace(**ptr)) {
+			(*ptr)++;
+			break;
+		} else if (!isspace(delim) && **ptr == delim) {
+			(*ptr)++;
+			break;
+		}
 
-	while (isspace(**ptr) && **ptr != '\0')
 		(*ptr)++;
+	}
 
-	tmp_buf = *ptr;
-	*len = 0;
+	*len = *ptr - tmp_buf;
+	/* If the end of the string has not been reached, this will ensure the
+	 * delimiter is not included when returning the token.
+	 */
+	if (**ptr != '\0') {
+		(*len)--;
+	}
 
-	while (!isspace(**ptr) && **ptr != '\0') {
-		(*ptr)++;
-		(*len)++;
+	*str = strndup(tmp_buf, *len);
+	if (!*str) {
+		return -1;
 	}
 
-	if (*len) {
-		*entry = strndup(tmp_buf, *len);
-		if (!*entry)
-			return -1;
+	while (**ptr != '\0' && squash_delim == 1) {
+		if (isspace(delim) && isspace(**ptr)) {
+			(*ptr)++;
+		} else if (!isspace(delim) && **ptr == delim) {
+			(*ptr)++;
+		} else {
+			break;
+		}
 	}
 
 	return 0;
 }
 
 /*
- * line_buf - Buffer containing the spec entries .
- * num_args - The number of spec parameter entries to process.
- * ...      - A 'char **spec_entry' for each parameter.
+ * line_buf - Buffer containing string to tokenize.
+ * squash_delim - Whether or not to squash repeating delimiters in input data.
+ *		   Set to 1 to squash_delimiter or 0 to disable squashing delimiter.
+ * delim - The delimiter used to tokenize line_buf. A whitespace delimiter will
+ *	    be tokenized using isspace().
+ * num_args - The number of parameter entries to process.
+ * ...      - A 'char **' for each parameter.
  * returns  - The number of items processed.
  *
- * This function calls read_spec_entry() to do the actual string processing.
+ * This function calls tokenize_str() to do the actual string processing. The
+ * caller is responsible for calling free() on each additional argument. The
+ * function will not tokenize more than num_args and the last argument will
+ * contain the remaining content of line_buf.
  */
-int hidden read_spec_entries(char *line_buf, int num_args, ...)
+int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...)
 {
-	char **spec_entry, *buf_p;
-	int len, rc, items, entry_len = 0;
+	char **arg, *buf_p;
+	int rc, items, arg_len = 0;
 	va_list ap;
 
-	len = strlen(line_buf);
-	if (line_buf[len - 1] == '\n')
-		line_buf[len - 1] = '\0';
-	else
-		/* Handle case if line not \n terminated by bumping
-		 * the len for the check below (as the line is NUL
-		 * terminated by getline(3)) */
-		len++;
-
 	buf_p = line_buf;
-	while (isspace(*buf_p))
-		buf_p++;
 
-	/* Skip comment lines and empty lines. */
-	if (*buf_p == '#' || *buf_p == '\0')
-		return 0;
-
-	/* Process the spec file entries */
+	/* Process the arguments */
 	va_start(ap, num_args);
 
-	items = 0;
-	while (items < num_args) {
-		spec_entry = va_arg(ap, char **);
+	for (items = 0; items < num_args && *buf_p != '\0'; items++) {
+		arg = va_arg(ap, char **);
+
+		/* Save the remainder of the string in arg */
+		if (items == num_args - 1) {
+			*arg = strdup(buf_p);
+			if (*arg == NULL) {
+				goto exit;
+			}
 
-		if (len - 1 == buf_p - line_buf) {
-			va_end(ap);
-			return items;
+			continue;
 		}
 
-		rc = read_spec_entry(spec_entry, &buf_p, &entry_len);
+		rc = tokenize_str(squash_delim, delim, arg, &buf_p, &arg_len);
 		if (rc < 0) {
-			va_end(ap);
-			return rc;
+			goto exit;
 		}
-		if (entry_len)
-			items++;
 	}
+
+exit:
 	va_end(ap);
 	return items;
 }
+
+/* This function removes leading whitespace. If the buffer ends in a newline,
+ * the newline is removed.
+ */
+void trim_buf(char **buf)
+{
+	int len = strlen(*buf);
+	if ((*buf)[len - 1] == '\n') {
+		(*buf)[len - 1] = '\0';
+	}
+
+	while (isspace(**buf)) {
+		(*buf)++;
+	}
+}
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] libsepol: Replace sscanf in module_to_cil
  2015-07-27 13:21 [PATCH 0/2] Generalize tokenizer and remove sscanf calls from libsepol Yuli Khodorkovskiy
  2015-07-27 13:21 ` [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter Yuli Khodorkovskiy
@ 2015-07-27 13:21 ` Yuli Khodorkovskiy
  1 sibling, 0 replies; 5+ messages in thread
From: Yuli Khodorkovskiy @ 2015-07-27 13:21 UTC (permalink / raw)
  To: selinux

Some platforms do not have %ms support in sscanf. This adds a tokenize()
function to be used instead of sscanf. It is copied from the tokenize()
function in libselinux. tokenize() has the ability to split on any
delimiter and supports squashing of consecutive delimiters.

Signed-off-by: Yuli Khodorkovskiy <ykhodorkovskiy@tresys.com>
---
 libsepol/include/sepol/policydb/util.h |   6 ++
 libsepol/src/module_to_cil.c           |  61 ++++++++++++++++----
 libsepol/src/util.c                    | 102 +++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+), 11 deletions(-)

diff --git a/libsepol/include/sepol/policydb/util.h b/libsepol/include/sepol/policydb/util.h
index ef1c90d..ed58dff 100644
--- a/libsepol/include/sepol/policydb/util.h
+++ b/libsepol/include/sepol/policydb/util.h
@@ -32,5 +32,11 @@ extern int add_i_to_a(uint32_t i, uint32_t * cnt, uint32_t ** a);
 extern char *sepol_av_to_string(policydb_t * policydbp, uint32_t tclass,
 				sepol_access_vector_t av);
 
+/*
+ * The tokenize function may be used to
+ * replace sscanf
+ */
+extern int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...);
+
 __END_DECLS
 #endif
diff --git a/libsepol/src/module_to_cil.c b/libsepol/src/module_to_cil.c
index 2d8046c..056e10e 100644
--- a/libsepol/src/module_to_cil.c
+++ b/libsepol/src/module_to_cil.c
@@ -2859,7 +2859,7 @@ static int level_string_to_cil(char *levelstr)
 	char *token = NULL;
 	char *ranged = NULL;
 
-	matched = sscanf(levelstr, "%m[^:]:%ms", &sens, &cats);
+	matched = tokenize(levelstr, 0, ':', 2, &sens, &cats);
 	if (matched < 1 || matched > 2) {
 		log_err("Invalid level: %s", levelstr);
 		rc = -1;
@@ -2924,7 +2924,7 @@ static int context_string_to_cil(char *contextstr)
 	char *type = NULL;
 	char *level = NULL;
 
-	matched = sscanf(contextstr, "%m[^:]:%m[^:]:%m[^:]:%ms", &user, &role, &type, &level);
+	matched = tokenize(contextstr, 0, ':', 4, &user, &role, &type, &level);
 	if (matched < 3 || matched > 4) {
 		log_err("Invalid context: %s", contextstr);
 		rc = -1;
@@ -2965,6 +2965,7 @@ static int seusers_to_cil(struct sepol_module_package *mod_pkg)
 	char *user = NULL;
 	char *seuser = NULL;
 	char *level = NULL;
+	char *tmp = NULL;
 	int matched;
 
 	if (seusers_len == 0) {
@@ -2972,11 +2973,18 @@ static int seusers_to_cil(struct sepol_module_package *mod_pkg)
 	}
 
 	while ((rc = get_line(&cur, end, &line)) > 0) {
-		if (line[0] == '#') {
+		tmp = line;
+		while (isspace(*tmp)) {
+			tmp++;
+		}
+
+		if (tmp[0] == '#' || tmp[0] == '\0') {
+			free(line);
+			line = NULL;
 			continue;
 		}
 
-		matched = sscanf(line, "%m[^:]:%m[^:]:%ms", &user, &seuser, &level);
+		matched = tokenize(tmp, 0, ':', 3, &user, &seuser, &level);
 
 		if (matched < 2 || matched > 3) {
 			log_err("Invalid seuser line: %s", line);
@@ -3045,28 +3053,51 @@ static int user_extra_to_cil(struct sepol_module_package *mod_pkg)
 	int matched;
 	char *user = NULL;
 	char *prefix = NULL;
+	int prefix_len = 0;
+	char *user_str = NULL;
+	char *prefix_str = NULL;
+	char *eol = NULL;
+	char *tmp = NULL;
 
 	if (userx_len == 0) {
 		return 0;
 	}
 
 	while ((rc = get_line(&cur, end, &line)) > 0) {
-		if (line[0] == '#') {
+		tmp = line;
+		while (isspace(*tmp)) {
+			tmp++;
+		}
+
+		if (tmp[0] == '#' || tmp[0] == '\0') {
+			free(line);
+			line = NULL;
 			continue;
 		}
 
-		matched = sscanf(line, "user %ms prefix %m[^;];", &user, &prefix);
-		if (matched != 2) {
+		matched = tokenize(tmp, 1, ' ', 4, &user_str, &user, &prefix_str, &prefix);
+		if (matched != 4) {
 			rc = -1;
-			log_err("Invalid file context line: %s", line);
+			log_err("Invalid user extra line: %s", line);
 			goto exit;
 		}
 
+		prefix_len = strlen(prefix);
+		eol = prefix + prefix_len - 1;
+		if (*eol != ';' || strcmp(user_str, "user") || strcmp(prefix_str, "prefix")) {
+			rc = -1;
+			log_err("Invalid user extra line: %s", line);
+			goto exit;
+		}
+		*eol = '\0';
+
 		cil_println(0, "(userprefix %s %s)", user, prefix);
 		free(user);
 		free(prefix);
 		free(line);
-		user = prefix = line = NULL;
+		free(user_str);
+		free(prefix_str);
+		user = prefix = line = user_str = prefix_str = NULL;
 	}
 
 	if (rc == -1) {
@@ -3096,17 +3127,25 @@ static int file_contexts_to_cil(struct sepol_module_package *mod_pkg)
 	char *mode = NULL;
 	char *context = NULL;
 	const char *cilmode;
+	char *tmp = NULL;
 
 	if (fc_len == 0) {
 		return 0;
 	}
 
 	while ((rc = get_line(&cur, end, &line)) > 0) {
-		if (line[0] == '#') {
+		tmp = line;
+		while (isspace(*tmp)) {
+			tmp++;
+		}
+
+		if (tmp[0] == '#' || tmp[0] == '\0') {
+			free(line);
+			line = NULL;
 			continue;
 		}
 
-		matched = sscanf(line, "%ms %ms %ms", &regex, &mode, &context);
+		matched = tokenize(tmp, 1, ' ', 3, &regex, &mode, &context);
 		if (matched < 2 || matched > 3) {
 			rc = -1;
 			log_err("Invalid file context line: %s", line);
diff --git a/libsepol/src/util.c b/libsepol/src/util.c
index a824e61..d1249d8 100644
--- a/libsepol/src/util.c
+++ b/libsepol/src/util.c
@@ -19,11 +19,14 @@
  */
 
 #include <assert.h>
+#include <ctype.h>
+#include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 
 #include <sepol/policydb/flask_types.h>
 #include <sepol/policydb/policydb.h>
+#include <sepol/policydb/util.h>
 
 struct val_to_name {
 	unsigned int val;
@@ -114,3 +117,102 @@ char *sepol_av_to_string(policydb_t * policydbp, uint32_t tclass,
 
 	return avbuf;
 }
+
+/*
+ * The tokenize and tokenize_str functions may be used to
+ * replace sscanf to read tokens from buffers.
+ */
+
+/* Read a token from a buffer */
+static inline int tokenize_str(int squash_delim, char delim, char **str, char **ptr, int *len)
+{
+	char *tmp_buf = *ptr;
+	*str = NULL;
+
+	while (**ptr != '\0') {
+		if (isspace(delim) && isspace(**ptr)) {
+			(*ptr)++;
+			break;
+		} else if (!isspace(delim) && **ptr == delim) {
+			(*ptr)++;
+			break;
+		}
+
+		(*ptr)++;
+	}
+
+	*len = *ptr - tmp_buf;
+	/* If the end of the string has not been reached, this will ensure the
+	 * delimiter is not included when returning the token.
+	 */
+	if (**ptr != '\0') {
+		(*len)--;
+	}
+
+	*str = strndup(tmp_buf, *len);
+	if (!*str) {
+		return -1;
+	}
+
+	while (**ptr != '\0' && squash_delim == 1) {
+		if (isspace(delim) && isspace(**ptr)) {
+			(*ptr)++;
+		} else if (!isspace(delim) && **ptr == delim) {
+			(*ptr)++;
+		} else {
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * line_buf - Buffer containing string to tokenize.
+ * squash_delim - Whether or not to squash repeating delimiters in input data.
+ *		   Set to 1 to squash_delimiter or 0 to disable squashing delimiter.
+ * delim - The delimiter used to tokenize line_buf. A whitespace delimiter will
+ *	    be tokenized using isspace().
+ * num_args - The number of parameter entries to process.
+ * ...      - A 'char **' for each parameter.
+ * returns  - The number of items processed.
+ *
+ * This function calls tokenize_str() to do the actual string processing. The
+ * caller is responsible for calling free() on each additional argument. The
+ * function will not tokenize more than num_args and the last argument will
+ * contain the remaining content of line_buf.
+ */
+int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...)
+{
+	char **arg, *buf_p;
+	int rc, items, arg_len = 0;
+	va_list ap;
+
+	buf_p = line_buf;
+
+	/* Process the arguments */
+	va_start(ap, num_args);
+
+	for (items = 0; items < num_args && *buf_p != '\0'; items++) {
+		arg = va_arg(ap, char **);
+
+		/* Save the remainder of the string in arg */
+		if (items == num_args - 1) {
+			*arg = strdup(buf_p);
+			if (*arg == NULL) {
+				goto exit;
+			}
+
+			continue;
+		}
+
+		rc = tokenize_str(squash_delim, delim, arg, &buf_p, &arg_len);
+		if (rc < 0) {
+			goto exit;
+		}
+	}
+
+exit:
+	va_end(ap);
+	return items;
+}
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter
  2015-07-27 13:21 ` [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter Yuli Khodorkovskiy
@ 2015-07-27 16:18   ` Stephen Smalley
  2015-07-27 19:37     ` Yuli Khodorkovskiy
  0 siblings, 1 reply; 5+ messages in thread
From: Stephen Smalley @ 2015-07-27 16:18 UTC (permalink / raw)
  To: Yuli Khodorkovskiy, selinux

On 07/27/2015 09:21 AM, Yuli Khodorkovskiy wrote:
> This patch allows the tokenizer previously introduced for libselinux to
> use any delimiter for tokenization. Delimiters may be squashed by
> setting the squash_delim flag to 1. When any whitespace delimiter is used,
> isspace() allows any whitespace character to be used as the delimiter.
> 
> Previously, read_spec_entries() trimmed whitespace and ignored comments and
> empty lines. This functionality has been removed as is now the
> responsiblity of the caller to implement.

Doesn't yield the same output from sefcontext_compile before and after
this change if file_contexts has any lines trailing in whitespace (old:
trailing whitespace ignored; new: trailing whitespace included in
context field).

> 
> Signed-off-by: Yuli Khodorkovskiy <ykhodorkovskiy@tresys.com>
> ---
>  libselinux/src/label_android_property.c |  11 ++-
>  libselinux/src/label_file.h             |  10 ++-
>  libselinux/src/label_internal.h         |   7 +-
>  libselinux/src/label_support.c          | 129 +++++++++++++++++++-------------
>  4 files changed, 97 insertions(+), 60 deletions(-)
> 
> diff --git a/libselinux/src/label_android_property.c b/libselinux/src/label_android_property.c
> index 4af9896..5c06ed3 100644
> --- a/libselinux/src/label_android_property.c
> +++ b/libselinux/src/label_android_property.c
> @@ -90,9 +90,14 @@ static int process_line(struct selabel_handle *rec,
>  	spec_t *spec_arr = data->spec_arr;
>  	unsigned int nspec = data->nspec;
>  
> -	items = read_spec_entries(line_buf, 2, &prop, &context);
> -	if (items <= 0)
> -		return items;
> +	trim_buf(&line_buf);
> +
> +	/* Skip comment lines and empty lines. */
> +	if (*line_buf == '#' || *line_buf == '\0') {
> +		return 0;
> +	}
> +
> +	items = tokenize(line_buf, 1, ' ', 2, &prop, &context);
>  	if (items != 2) {
>  		selinux_log(SELINUX_WARNING,
>  			    "%s:  line %u is missing fields, skipping\n", path,
> diff --git a/libselinux/src/label_file.h b/libselinux/src/label_file.h
> index db961ba..3499da1 100644
> --- a/libselinux/src/label_file.h
> +++ b/libselinux/src/label_file.h
> @@ -387,10 +387,14 @@ static inline int process_line(struct selabel_handle *rec,
>  	unsigned int nspec = data->nspec;
>  	const char *errbuf = NULL;
>  
> -	items = read_spec_entries(line_buf, 3, &regex, &type, &context);
> -	if (items <= 0)
> -		return items;
> +	trim_buf(&line_buf);
>  
> +	/* Skip comment lines and empty lines. */
> +	if (*line_buf == '#' || *line_buf == '\0') {
> +		return 0;
> +	}
> +
> +	items = tokenize(line_buf, 1, ' ', 3, &regex, &type, &context);
>  	if (items < 2) {
>  		COMPAT_LOG(SELINUX_WARNING,
>  			    "%s:  line %u is missing fields, skipping\n", path,
> diff --git a/libselinux/src/label_internal.h b/libselinux/src/label_internal.h
> index 861eca1..81a4f37 100644
> --- a/libselinux/src/label_internal.h
> +++ b/libselinux/src/label_internal.h
> @@ -108,9 +108,10 @@ compat_validate(struct selabel_handle *rec,
>  		const char *path, unsigned lineno) hidden;
>  
>  /*
> - * The read_spec_entries function may be used to
> - * replace sscanf to read entries from spec files.
> + * The tokenize function may be used to
> + * replace sscanf.
>   */
> -extern int read_spec_entries(char *line_buf, int num_args, ...);
> +extern int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...);
> +extern void trim_buf(char **buf);
>  
>  #endif				/* _SELABEL_INTERNAL_H_ */
> diff --git a/libselinux/src/label_support.c b/libselinux/src/label_support.c
> index b3ab8ab..08ef303 100644
> --- a/libselinux/src/label_support.c
> +++ b/libselinux/src/label_support.c
> @@ -11,88 +11,115 @@
>  #include "label_internal.h"
>  
>  /*
> - * The read_spec_entries and read_spec_entry functions may be used to
> - * replace sscanf to read entries from spec files. The file and
> - * property services now use these.
> + * The tokenize and tokenize_str functions may be used to
> + * replace sscanf to read tokens from buffers.
>   */
>  
> -/* Read an entry from a spec file (e.g. file_contexts) */
> -static inline int read_spec_entry(char **entry, char **ptr, int *len)
> +/* Read a token from a buffer */
> +static inline int tokenize_str(int squash_delim, char delim, char **str, char **ptr, int *len)

Not introduced by this change, but should likely use size_t *len here.

>  {
> -	*entry = NULL;
> -	char *tmp_buf = NULL;
> +	char *tmp_buf = *ptr;
> +	*str = NULL;
> +
> +	while (**ptr != '\0') {
> +		if (isspace(delim) && isspace(**ptr)) {
> +			(*ptr)++;
> +			break;
> +		} else if (!isspace(delim) && **ptr == delim) {
> +			(*ptr)++;
> +			break;
> +		}

This seems unnecessarily complicated.  First, all users within
libselinux want squash_delim=1, delim=' ', so we might as well just make
that the only supported case in libselinux until such a time as we have
a need for another.  Also, can't quite see why
squash_delim=1,delim=<non-space> would ever be desired, or
squash_delim=0,delim=<space>, so it seems like we would simplify the
interface even when supporting both cases.

>  
> -	while (isspace(**ptr) && **ptr != '\0')
>  		(*ptr)++;
> +	}
>  
> -	tmp_buf = *ptr;
> -	*len = 0;
> +	*len = *ptr - tmp_buf;
> +	/* If the end of the string has not been reached, this will ensure the
> +	 * delimiter is not included when returning the token.
> +	 */
> +	if (**ptr != '\0') {
> +		(*len)--;

Can *len underflow here?

> +	}
>  
> -	while (!isspace(**ptr) && **ptr != '\0') {
> -		(*ptr)++;
> -		(*len)++;
> +	*str = strndup(tmp_buf, *len);

Can *len be 0 here?

> +	if (!*str) {
> +		return -1;
>  	}
>  
> -	if (*len) {
> -		*entry = strndup(tmp_buf, *len);
> -		if (!*entry)
> -			return -1;
> +	while (**ptr != '\0' && squash_delim == 1) {
> +		if (isspace(delim) && isspace(**ptr)) {
> +			(*ptr)++;
> +		} else if (!isspace(delim) && **ptr == delim) {
> +			(*ptr)++;
> +		} else {
> +			break;
> +		}
>  	}
>  
>  	return 0;
>  }
>  
>  /*
> - * line_buf - Buffer containing the spec entries .
> - * num_args - The number of spec parameter entries to process.
> - * ...      - A 'char **spec_entry' for each parameter.
> + * line_buf - Buffer containing string to tokenize.
> + * squash_delim - Whether or not to squash repeating delimiters in input data.
> + *		   Set to 1 to squash_delimiter or 0 to disable squashing delimiter.
> + * delim - The delimiter used to tokenize line_buf. A whitespace delimiter will
> + *	    be tokenized using isspace().
> + * num_args - The number of parameter entries to process.
> + * ...      - A 'char **' for each parameter.
>   * returns  - The number of items processed.
>   *
> - * This function calls read_spec_entry() to do the actual string processing.
> + * This function calls tokenize_str() to do the actual string processing. The
> + * caller is responsible for calling free() on each additional argument. The
> + * function will not tokenize more than num_args and the last argument will
> + * contain the remaining content of line_buf.
>   */
> -int hidden read_spec_entries(char *line_buf, int num_args, ...)
> +int tokenize(char *line_buf, int squash_delim, char delim, int num_args, ...)

Must be hidden or it becomes part of the libselinux.so ABI.

>  {
> -	char **spec_entry, *buf_p;
> -	int len, rc, items, entry_len = 0;
> +	char **arg, *buf_p;
> +	int rc, items, arg_len = 0;
>  	va_list ap;
>  
> -	len = strlen(line_buf);
> -	if (line_buf[len - 1] == '\n')
> -		line_buf[len - 1] = '\0';
> -	else
> -		/* Handle case if line not \n terminated by bumping
> -		 * the len for the check below (as the line is NUL
> -		 * terminated by getline(3)) */
> -		len++;
> -
>  	buf_p = line_buf;
> -	while (isspace(*buf_p))
> -		buf_p++;
>  
> -	/* Skip comment lines and empty lines. */
> -	if (*buf_p == '#' || *buf_p == '\0')
> -		return 0;
> -
> -	/* Process the spec file entries */
> +	/* Process the arguments */
>  	va_start(ap, num_args);
>  
> -	items = 0;
> -	while (items < num_args) {
> -		spec_entry = va_arg(ap, char **);
> +	for (items = 0; items < num_args && *buf_p != '\0'; items++) {
> +		arg = va_arg(ap, char **);
> +
> +		/* Save the remainder of the string in arg */
> +		if (items == num_args - 1) {
> +			*arg = strdup(buf_p);
> +			if (*arg == NULL) {
> +				goto exit;
> +			}

Needs to prune last argument at delimiter.

>  
> -		if (len - 1 == buf_p - line_buf) {
> -			va_end(ap);
> -			return items;
> +			continue;
>  		}
>  
> -		rc = read_spec_entry(spec_entry, &buf_p, &entry_len);
> +		rc = tokenize_str(squash_delim, delim, arg, &buf_p, &arg_len);
>  		if (rc < 0) {
> -			va_end(ap);
> -			return rc;
> +			goto exit;
>  		}
> -		if (entry_len)
> -			items++;
>  	}
> +
> +exit:
>  	va_end(ap);
>  	return items;
>  }
> +
> +/* This function removes leading whitespace. If the buffer ends in a newline,
> + * the newline is removed.
> + */
> +void trim_buf(char **buf)

Must be hidden or it becomes part of the libselinux.so ABI.

> +{
> +	int len = strlen(*buf);

size_t len

> +	if ((*buf)[len - 1] == '\n') {

Can len == 0 here?

> +		(*buf)[len - 1] = '\0';
> +	}
> +
> +	while (isspace(**buf)) {
> +		(*buf)++;
> +	}
> +}
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter
  2015-07-27 16:18   ` Stephen Smalley
@ 2015-07-27 19:37     ` Yuli Khodorkovskiy
  0 siblings, 0 replies; 5+ messages in thread
From: Yuli Khodorkovskiy @ 2015-07-27 19:37 UTC (permalink / raw)
  To: Stephen Smalley, selinux



>-----Original Message-----
>From: Stephen Smalley [mailto:sds@tycho.nsa.gov]
>Sent: Monday, July 27, 2015 12:19 PM
>To: Yuli Khodorkovskiy; selinux@tycho.nsa.gov
>Subject: Re: [PATCH 1/2] libselinux: generalize read_spec_entries for any
>delimiter
>
>On 07/27/2015 09:21 AM, Yuli Khodorkovskiy wrote:
>> This patch allows the tokenizer previously introduced for libselinux
>> to use any delimiter for tokenization. Delimiters may be squashed by
>> setting the squash_delim flag to 1. When any whitespace delimiter is
>> used,
>> isspace() allows any whitespace character to be used as the delimiter.
>>
>> Previously, read_spec_entries() trimmed whitespace and ignored
>> comments and empty lines. This functionality has been removed as is
>> now the responsiblity of the caller to implement.
>
>Doesn't yield the same output from sefcontext_compile before and after
>this change if file_contexts has any lines trailing in whitespace (old:
>trailing whitespace ignored; new: trailing whitespace included in context
>field).

Okay, I'll look into this and send out a v2.

>
>>
>> Signed-off-by: Yuli Khodorkovskiy <ykhodorkovskiy@tresys.com>
>> ---
>>  libselinux/src/label_android_property.c |  11 ++-
>>  libselinux/src/label_file.h             |  10 ++-
>>  libselinux/src/label_internal.h         |   7 +-
>>  libselinux/src/label_support.c          | 129 +++++++++++++++++++------------
>-
>>  4 files changed, 97 insertions(+), 60 deletions(-)
>>
>> diff --git a/libselinux/src/label_android_property.c
>> b/libselinux/src/label_android_property.c
>> index 4af9896..5c06ed3 100644
>> --- a/libselinux/src/label_android_property.c
>> +++ b/libselinux/src/label_android_property.c
>> @@ -90,9 +90,14 @@ static int process_line(struct selabel_handle *rec,
>>  	spec_t *spec_arr = data->spec_arr;
>>  	unsigned int nspec = data->nspec;
>>
>> -	items = read_spec_entries(line_buf, 2, &prop, &context);
>> -	if (items <= 0)
>> -		return items;
>> +	trim_buf(&line_buf);
>> +
>> +	/* Skip comment lines and empty lines. */
>> +	if (*line_buf == '#' || *line_buf == '\0') {
>> +		return 0;
>> +	}
>> +
>> +	items = tokenize(line_buf, 1, ' ', 2, &prop, &context);
>>  	if (items != 2) {
>>  		selinux_log(SELINUX_WARNING,
>>  			    "%s:  line %u is missing fields, skipping\n", path,
>diff --git
>> a/libselinux/src/label_file.h b/libselinux/src/label_file.h index
>> db961ba..3499da1 100644
>> --- a/libselinux/src/label_file.h
>> +++ b/libselinux/src/label_file.h
>> @@ -387,10 +387,14 @@ static inline int process_line(struct
>selabel_handle *rec,
>>  	unsigned int nspec = data->nspec;
>>  	const char *errbuf = NULL;
>>
>> -	items = read_spec_entries(line_buf, 3, &regex, &type, &context);
>> -	if (items <= 0)
>> -		return items;
>> +	trim_buf(&line_buf);
>>
>> +	/* Skip comment lines and empty lines. */
>> +	if (*line_buf == '#' || *line_buf == '\0') {
>> +		return 0;
>> +	}
>> +
>> +	items = tokenize(line_buf, 1, ' ', 3, &regex, &type, &context);
>>  	if (items < 2) {
>>  		COMPAT_LOG(SELINUX_WARNING,
>>  			    "%s:  line %u is missing fields, skipping\n", path,
>diff --git
>> a/libselinux/src/label_internal.h b/libselinux/src/label_internal.h
>> index 861eca1..81a4f37 100644
>> --- a/libselinux/src/label_internal.h
>> +++ b/libselinux/src/label_internal.h
>> @@ -108,9 +108,10 @@ compat_validate(struct selabel_handle *rec,
>>  		const char *path, unsigned lineno) hidden;
>>
>>  /*
>> - * The read_spec_entries function may be used to
>> - * replace sscanf to read entries from spec files.
>> + * The tokenize function may be used to
>> + * replace sscanf.
>>   */
>> -extern int read_spec_entries(char *line_buf, int num_args, ...);
>> +extern int tokenize(char *line_buf, int squash_delim, char delim, int
>> +num_args, ...); extern void trim_buf(char **buf);
>>
>>  #endif				/* _SELABEL_INTERNAL_H_ */
>> diff --git a/libselinux/src/label_support.c
>> b/libselinux/src/label_support.c index b3ab8ab..08ef303 100644
>> --- a/libselinux/src/label_support.c
>> +++ b/libselinux/src/label_support.c
>> @@ -11,88 +11,115 @@
>>  #include "label_internal.h"
>>
>>  /*
>> - * The read_spec_entries and read_spec_entry functions may be used to
>> - * replace sscanf to read entries from spec files. The file and
>> - * property services now use these.
>> + * The tokenize and tokenize_str functions may be used to
>> + * replace sscanf to read tokens from buffers.
>>   */
>>
>> -/* Read an entry from a spec file (e.g. file_contexts) */ -static
>> inline int read_spec_entry(char **entry, char **ptr, int *len)
>> +/* Read a token from a buffer */
>> +static inline int tokenize_str(int squash_delim, char delim, char
>> +**str, char **ptr, int *len)
>
>Not introduced by this change, but should likely use size_t *len here.
>
>>  {
>> -	*entry = NULL;
>> -	char *tmp_buf = NULL;
>> +	char *tmp_buf = *ptr;
>> +	*str = NULL;
>> +
>> +	while (**ptr != '\0') {
>> +		if (isspace(delim) && isspace(**ptr)) {
>> +			(*ptr)++;
>> +			break;
>> +		} else if (!isspace(delim) && **ptr == delim) {
>> +			(*ptr)++;
>> +			break;
>> +		}
>
>This seems unnecessarily complicated.  First, all users within libselinux
>want squash_delim=1, delim=' ', so we might as well just make that the
>only supported case in libselinux until such a time as we have a need for
>another.  Also, can't quite see why squash_delim=1,delim=<non-space>
>would ever be desired, or squash_delim=0,delim=<space>, so it seems like
>we would simplify the interface even when supporting both cases.

The reason this was implemented to support both cases was to have consistency with the implementation in libsepol. If you think it would be easier, I can drop the patch from libselinux altogether and only support squash_delim=1,delim=<space> and squash_delim=0,delim=<non-space> in libsepol.

>
>>
>> -	while (isspace(**ptr) && **ptr != '\0')
>>  		(*ptr)++;
>> +	}
>>
>> -	tmp_buf = *ptr;
>> -	*len = 0;
>> +	*len = *ptr - tmp_buf;
>> +	/* If the end of the string has not been reached, this will ensure
>the
>> +	 * delimiter is not included when returning the token.
>> +	 */
>> +	if (**ptr != '\0') {
>> +		(*len)--;
>
>Can *len underflow here?

*ptr had to have moved by at least one character at this point, therefore len must be greater than 0.

>
>> +	}
>>
>> -	while (!isspace(**ptr) && **ptr != '\0') {
>> -		(*ptr)++;
>> -		(*len)++;
>> +	*str = strndup(tmp_buf, *len);
>
>Can *len be 0 here?

Yes len can be 0 here. If for example, "foo::bar" is provided and squash_delim is set to 0, the string would tokenize into foo, a 0 length NUL terminated string, and bar.

>
>> +	if (!*str) {
>> +		return -1;
>>  	}
>>
>> -	if (*len) {
>> -		*entry = strndup(tmp_buf, *len);
>> -		if (!*entry)
>> -			return -1;
>> +	while (**ptr != '\0' && squash_delim == 1) {
>> +		if (isspace(delim) && isspace(**ptr)) {
>> +			(*ptr)++;
>> +		} else if (!isspace(delim) && **ptr == delim) {
>> +			(*ptr)++;
>> +		} else {
>> +			break;
>> +		}
>>  	}
>>
>>  	return 0;
>>  }
>>
>>  /*
>> - * line_buf - Buffer containing the spec entries .
>> - * num_args - The number of spec parameter entries to process.
>> - * ...      - A 'char **spec_entry' for each parameter.
>> + * line_buf - Buffer containing string to tokenize.
>> + * squash_delim - Whether or not to squash repeating delimiters in
>input data.
>> + *		   Set to 1 to squash_delimiter or 0 to disable squashing
>delimiter.
>> + * delim - The delimiter used to tokenize line_buf. A whitespace
>delimiter will
>> + *	    be tokenized using isspace().
>> + * num_args - The number of parameter entries to process.
>> + * ...      - A 'char **' for each parameter.
>>   * returns  - The number of items processed.
>>   *
>> - * This function calls read_spec_entry() to do the actual string
>processing.
>> + * This function calls tokenize_str() to do the actual string
>> + processing. The
>> + * caller is responsible for calling free() on each additional
>> + argument. The
>> + * function will not tokenize more than num_args and the last
>> + argument will
>> + * contain the remaining content of line_buf.
>>   */
>> -int hidden read_spec_entries(char *line_buf, int num_args, ...)
>> +int tokenize(char *line_buf, int squash_delim, char delim, int
>> +num_args, ...)
>
>Must be hidden or it becomes part of the libselinux.so ABI.
>
>>  {
>> -	char **spec_entry, *buf_p;
>> -	int len, rc, items, entry_len = 0;
>> +	char **arg, *buf_p;
>> +	int rc, items, arg_len = 0;
>>  	va_list ap;
>>
>> -	len = strlen(line_buf);
>> -	if (line_buf[len - 1] == '\n')
>> -		line_buf[len - 1] = '\0';
>> -	else
>> -		/* Handle case if line not \n terminated by bumping
>> -		 * the len for the check below (as the line is NUL
>> -		 * terminated by getline(3)) */
>> -		len++;
>> -
>>  	buf_p = line_buf;
>> -	while (isspace(*buf_p))
>> -		buf_p++;
>>
>> -	/* Skip comment lines and empty lines. */
>> -	if (*buf_p == '#' || *buf_p == '\0')
>> -		return 0;
>> -
>> -	/* Process the spec file entries */
>> +	/* Process the arguments */
>>  	va_start(ap, num_args);
>>
>> -	items = 0;
>> -	while (items < num_args) {
>> -		spec_entry = va_arg(ap, char **);
>> +	for (items = 0; items < num_args && *buf_p != '\0'; items++) {
>> +		arg = va_arg(ap, char **);
>> +
>> +		/* Save the remainder of the string in arg */
>> +		if (items == num_args - 1) {
>> +			*arg = strdup(buf_p);
>> +			if (*arg == NULL) {
>> +				goto exit;
>> +			}
>
>Needs to prune last argument at delimiter.

I'll add this functionality to the trim_buf() function. 

>
>>
>> -		if (len - 1 == buf_p - line_buf) {
>> -			va_end(ap);
>> -			return items;
>> +			continue;
>>  		}
>>
>> -		rc = read_spec_entry(spec_entry, &buf_p, &entry_len);
>> +		rc = tokenize_str(squash_delim, delim, arg, &buf_p,
>&arg_len);
>>  		if (rc < 0) {
>> -			va_end(ap);
>> -			return rc;
>> +			goto exit;
>>  		}
>> -		if (entry_len)
>> -			items++;
>>  	}
>> +
>> +exit:
>>  	va_end(ap);
>>  	return items;
>>  }
>> +
>> +/* This function removes leading whitespace. If the buffer ends in a
>> +newline,
>> + * the newline is removed.
>> + */
>> +void trim_buf(char **buf)
>
>Must be hidden or it becomes part of the libselinux.so ABI.
>
>> +{
>> +	int len = strlen(*buf);
>
>size_t len
>
>> +	if ((*buf)[len - 1] == '\n') {
>
>Can len == 0 here?

Good catch. I'll add an error check here.

>
>> +		(*buf)[len - 1] = '\0';
>> +	}
>> +
>> +	while (isspace(**buf)) {
>> +		(*buf)++;
>> +	}
>> +}
>>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-07-27 19:37 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-27 13:21 [PATCH 0/2] Generalize tokenizer and remove sscanf calls from libsepol Yuli Khodorkovskiy
2015-07-27 13:21 ` [PATCH 1/2] libselinux: generalize read_spec_entries for any delimiter Yuli Khodorkovskiy
2015-07-27 16:18   ` Stephen Smalley
2015-07-27 19:37     ` Yuli Khodorkovskiy
2015-07-27 13:21 ` [PATCH 2/2] libsepol: Replace sscanf in module_to_cil Yuli Khodorkovskiy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.