[PATCH v2 0/3] Support character literals

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v2 0/3] Support character literals
@ 2011-09-07 23:15 Anton Staaf
       [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-07 23:15 UTC (permalink / raw)
  To: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

    These patches add simple and escaped character literal parsing support
to the dtc for cell lists and bytestrings.  The first patch refactors the
string parsing code in data.c to expose the more primitive character parsing
code for use in the later patches.  I have left the bytestring support in
place but my hope is that it is now separate enough to be discussed
independantly of the refactor and cell list changes.

    Thanks,
        Anton

Changes in v2:
- Move the refactor of data.c to a separate patch
- Add support for character literals in cell lists
- Merge normal and escaped literal support for bytestrings into single patch

Anton Staaf (3):
  dtc: Refactor character literal parsing code
  dtc: Support character literals in cell lists
  dtc: Support character literals in bytestrings

 Documentation/dts-format.txt |    7 ++-
 Documentation/manual.txt     |    7 ++-
 data.c                       |   77 +--------------------------------
 dtc-lexer.l                  |   28 ++++++++++++
 dtc-parser.y                 |    4 ++
 util.c                       |   99 ++++++++++++++++++++++++++++++++++++++++++
 util.h                       |   18 ++++++++
 7 files changed, 158 insertions(+), 82 deletions(-)

-- 
1.7.3.1

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH v2 1/3] dtc: Refactor character literal parsing code
       [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
@ 2011-09-07 23:15   ` Anton Staaf
       [not found]     ` <1315437340-1661-2-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  2011-09-07 23:15   ` [PATCH v2 2/3] dtc: Support character literals in cell lists Anton Staaf
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-07 23:15 UTC (permalink / raw)
  To: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

Move the parsing of hex, octal and escaped characters from data.c
to util.c where it can be used for character literal parsing within
strings as well as for stand alone C style character literals.

Signed-off-by: Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
Cc: Jon Loeliger <jdl-CYoMK+44s/E@public.gmane.org>
Cc: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
---
 data.c |   77 +-------------------------------------------------
 util.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 util.h |   18 +++++++++++
 3 files changed, 118 insertions(+), 76 deletions(-)

diff --git a/data.c b/data.c
index fe555e8..471f6c3 100644
--- a/data.c
+++ b/data.c
@@ -68,40 +68,6 @@ struct data data_copy_mem(const char *mem, int len)
 	return d;
 }
 
-static char get_oct_char(const char *s, int *i)
-{
-	char x[4];
-	char *endx;
-	long val;
-
-	x[3] = '\0';
-	strncpy(x, s + *i, 3);
-
-	val = strtol(x, &endx, 8);
-
-	assert(endx > x);
-
-	(*i) += endx - x;
-	return val;
-}
-
-static char get_hex_char(const char *s, int *i)
-{
-	char x[3];
-	char *endx;
-	long val;
-
-	x[2] = '\0';
-	strncpy(x, s + *i, 2);
-
-	val = strtol(x, &endx, 16);
-	if (!(endx  > x))
-		die("\\x used with no following hex digits\n");
-
-	(*i) += endx - x;
-	return val;
-}
-
 struct data data_copy_escape_string(const char *s, int len)
 {
 	int i = 0;
@@ -119,48 +85,7 @@ struct data data_copy_escape_string(const char *s, int len)
 			continue;
 		}
 
-		c = s[i++];
-		assert(c);
-		switch (c) {
-		case 'a':
-			q[d.len++] = '\a';
-			break;
-		case 'b':
-			q[d.len++] = '\b';
-			break;
-		case 't':
-			q[d.len++] = '\t';
-			break;
-		case 'n':
-			q[d.len++] = '\n';
-			break;
-		case 'v':
-			q[d.len++] = '\v';
-			break;
-		case 'f':
-			q[d.len++] = '\f';
-			break;
-		case 'r':
-			q[d.len++] = '\r';
-			break;
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-			i--; /* need to re-read the first digit as
-			      * part of the octal value */
-			q[d.len++] = get_oct_char(s, &i);
-			break;
-		case 'x':
-			q[d.len++] = get_hex_char(s, &i);
-			break;
-		default:
-			q[d.len++] = c;
-		}
+                q[d.len++] = get_escape_char(s, &i);
 	}
 
 	q[d.len++] = '\0';
diff --git a/util.c b/util.c
index 994436f..b39392c 100644
--- a/util.c
+++ b/util.c
@@ -25,6 +25,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#include <assert.h>
 
 #include "util.h"
 
@@ -85,3 +86,101 @@ int util_is_printable_string(const void *data, int len)
 
 	return 1;
 }
+
+char get_oct_char(const char *s, int *i)
+{
+	char x[4];
+	char *endx;
+	long val;
+
+	x[3] = '\0';
+	strncpy(x, s + *i, 3);
+
+	val = strtol(x, &endx, 8);
+
+	assert(endx > x);
+
+	(*i) += endx - x;
+	return val;
+}
+
+char get_hex_char(const char *s, int *i)
+{
+	char x[3];
+	char *endx;
+	long val;
+
+	x[2] = '\0';
+	strncpy(x, s + *i, 2);
+
+	val = strtol(x, &endx, 16);
+	if (!(endx  > x))
+		die("\\x used with no following hex digits\n");
+
+	(*i) += endx - x;
+	return val;
+}
+
+char get_escape_char(const char *s, int *i)
+{
+	char	c = s[*i];
+	int	j = *i + 1;
+	char	val;
+
+	assert(c);
+	switch (c) {
+	case 'a':
+		val = '\a';
+		break;
+	case 'b':
+		val = '\b';
+		break;
+	case 't':
+		val = '\t';
+		break;
+	case 'n':
+		val = '\n';
+		break;
+	case 'v':
+		val = '\v';
+		break;
+	case 'f':
+		val = '\f';
+		break;
+	case 'r':
+		val = '\r';
+		break;
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+		j--; /* need to re-read the first digit as
+		      * part of the octal value */
+		val = get_oct_char(s, &j);
+		break;
+	case 'x':
+		val = get_hex_char(s, &j);
+		break;
+	default:
+		val = c;
+	}
+
+	(*i) = j;
+	return val;
+}
+
+char get_escape_char_exact(const char *s, int len)
+{
+	int	j = 1; //skip intial "\"
+	char	c = get_escape_char(s, &j);
+
+	if (j != len)
+		die("Extra characters at end of character literal '%s' "
+		    "(%d != %d)\n", s, j, len);
+
+	return c;
+}
diff --git a/util.h b/util.h
index cc68933..4332b6e 100644
--- a/util.h
+++ b/util.h
@@ -64,4 +64,22 @@ extern char *join_path(const char *path, const char *name);
  * @return 1 if a valid printable string, 0 if not */
 int util_is_printable_string(const void *data, int len);
 
+/*
+ * Octal, Hex and escaped character parsing routines.  Each of these routines
+ * will parse an encoded character starting at index i in string s.  The
+ * resulting character will be returned and the index i will be updated to
+ * point at the character directly after the end of the encoding, this may
+ * be the '\0' terminator of the string.
+ */
+char get_oct_char(const char *s, int *i);
+char get_hex_char(const char *s, int *i);
+char get_escape_char(const char *s, int *i);
+
+/*
+ * Parse an escaped character of exactly len characters starting at the
+ * beginning of the passed string.  If the escape sequence is not exactly
+ * len characters long we die.  Otherwise the parsed character is returned.
+ */
+char get_escape_char_exact(const char *s, int len);
+
 #endif /* _UTIL_H */
-- 
1.7.3.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 2/3] dtc: Support character literals in cell lists
       [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  2011-09-07 23:15   ` [PATCH v2 1/3] dtc: Refactor character literal parsing code Anton Staaf
@ 2011-09-07 23:15   ` Anton Staaf
       [not found]     ` <1315437340-1661-3-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  2011-09-07 23:15   ` [PATCH v2 3/3] dtc: Support character literals in bytestrings Anton Staaf
  2011-09-08  3:18   ` [PATCH v2 0/3] Support character literals David Gibson
  3 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-07 23:15 UTC (permalink / raw)
  To: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

With this patch the following property assignment:

    property = <0x12345678 'a' '\r' 100>;

is equivalent to:

    property = <0x12345678 0x00000061 0x0000000D 0x00000064>

Signed-off-by: Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
Cc: Jon Loeliger <jdl-CYoMK+44s/E@public.gmane.org>
Cc: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
---
 Documentation/dts-format.txt |    2 +-
 Documentation/manual.txt     |    7 ++++---
 dtc-lexer.l                  |   14 ++++++++++++++
 dtc-parser.y                 |    4 ++++
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/Documentation/dts-format.txt b/Documentation/dts-format.txt
index a655b87..eae8b76 100644
--- a/Documentation/dts-format.txt
+++ b/Documentation/dts-format.txt
@@ -33,7 +33,7 @@ Property values may be defined as an array of 32-bit integer cells, as
 NUL-terminated strings, as bytestrings or a combination of these.
 
 * Arrays of cells are represented by angle brackets surrounding a
-  space separated list of C-style integers
+  space separated list of C-style integers or character literals.
 
 	e.g. interrupts = <17 0xc>;
 
diff --git a/Documentation/manual.txt b/Documentation/manual.txt
index f8a8a7b..940fd3d 100644
--- a/Documentation/manual.txt
+++ b/Documentation/manual.txt
@@ -213,10 +213,11 @@ For example:
 
 By default, all numeric values are hexadecimal.  Alternate bases
 may be specified using a prefix "d#" for decimal, "b#" for binary,
-and "o#" for octal.
+and "o#" for octal.  Character literals are supported using the C
+language character literal syntax of 'a'.
 
-Strings support common escape sequences from C: "\n", "\t", "\r",
-"\(octal value)", "\x(hex value)".
+Strings and character literals support common escape sequences from C:
+"\n", "\t", "\r", "\(octal value)", "\x(hex value)".
 
 
 4.3) Labels and References
diff --git a/dtc-lexer.l b/dtc-lexer.l
index e866ea5..d4f9eaa 100644
--- a/dtc-lexer.l
+++ b/dtc-lexer.l
@@ -29,6 +29,8 @@ PROPNODECHAR	[a-zA-Z0-9,._+*#?@-]
 PATHCHAR	({PROPNODECHAR}|[/])
 LABEL		[a-zA-Z_][a-zA-Z0-9_]*
 STRING		\"([^\\"]|\\.)*\"
+CHAR_LITERAL	'[^\\']'
+CHAR_ESCAPED	'\\([^']+|')'
 WS		[[:space:]]
 COMMENT		"/*"([^*]|\*+[^*/])*\*+"/"
 LINECOMMENT	"//".*\n
@@ -109,6 +111,18 @@ static int pop_input_file(void);
 			return DT_LITERAL;
 		}
 
+<V1>{CHAR_LITERAL}	{
+			yylval.byte = yytext[1];
+			DPRINT("Character literal: %s\n", yytext);
+			return DT_BYTE;
+		}
+
+<V1>{CHAR_ESCAPED}	{
+			yylval.byte = get_escape_char_exact(yytext+1, yyleng-2);
+			DPRINT("Character literal escaped: %s\n", yytext);
+			return DT_BYTE;
+		}
+
 <*>\&{LABEL}	{	/* label reference */
 			DPRINT("Ref: %s\n", yytext+1);
 			yylval.labelref = xstrdup(yytext+1);
diff --git a/dtc-parser.y b/dtc-parser.y
index 5e84a67..b7d2ab4 100644
--- a/dtc-parser.y
+++ b/dtc-parser.y
@@ -265,6 +265,10 @@ cellval:
 		{
 			$$ = eval_literal($1, 0, 32);
 		}
+	| DT_BYTE
+		{
+			$$ = $1;
+		}
 	;
 
 bytestring:
-- 
1.7.3.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  2011-09-07 23:15   ` [PATCH v2 1/3] dtc: Refactor character literal parsing code Anton Staaf
  2011-09-07 23:15   ` [PATCH v2 2/3] dtc: Support character literals in cell lists Anton Staaf
@ 2011-09-07 23:15   ` Anton Staaf
       [not found]     ` <1315437340-1661-4-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
  2011-09-08  3:18   ` [PATCH v2 0/3] Support character literals David Gibson
  3 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-07 23:15 UTC (permalink / raw)
  To: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

With this patch the following property assignment:

    property = ['a' 2b '\r'];

is equivalent to:

    property = [61 2b 0d];

Signed-off-by: Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
Cc: Jon Loeliger <jdl-CYoMK+44s/E@public.gmane.org>
Cc: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
---
 Documentation/dts-format.txt |    5 +++--
 dtc-lexer.l                  |   14 ++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/Documentation/dts-format.txt b/Documentation/dts-format.txt
index eae8b76..555bd89 100644
--- a/Documentation/dts-format.txt
+++ b/Documentation/dts-format.txt
@@ -48,11 +48,12 @@ NUL-terminated strings, as bytestrings or a combination of these.
 	e.g. compatible = "simple-bus";
 
 * A bytestring is enclosed in square brackets [] with each byte
-  represented by two hexadecimal digits.  Spaces between each byte are
-  optional.
+  represented by two hexadecimal digits or a character literal.
+  Spaces between each byte or character literal are optional.
 
 	e.g. local-mac-address = [00 00 12 34 56 78]; or equivalently
 	     local-mac-address = [000012345678];
+	e.g. keymap = ['a' 'b' 'c' 'd'];
 
 * Values may have several comma-separated components, which are
   concatenated together.
diff --git a/dtc-lexer.l b/dtc-lexer.l
index d4f9eaa..31cd18a 100644
--- a/dtc-lexer.l
+++ b/dtc-lexer.l
@@ -142,6 +142,20 @@ static int pop_input_file(void);
 			return DT_BYTE;
 		}
 
+<BYTESTRING>{CHAR_LITERAL} {
+			DPRINT("Character literal: %s\n", yytext);
+			yylval.byte = yytext[1];
+			DPRINT("Byte: %02x\n", (int)yylval.byte);
+			return DT_BYTE;
+		}
+
+<BYTESTRING>{CHAR_ESCAPED} {
+			DPRINT("Character escaped literal: %s\n", yytext);
+			yylval.byte = get_escape_char_exact(yytext+1, yyleng-2);
+			DPRINT("Byte: %02x\n", (int)yylval.byte);
+			return DT_BYTE;
+		}
+
 <BYTESTRING>"]"	{
 			DPRINT("/BYTESTRING\n");
 			BEGIN_DEFAULT();
-- 
1.7.3.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 0/3] Support character literals
       [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
                     ` (2 preceding siblings ...)
  2011-09-07 23:15   ` [PATCH v2 3/3] dtc: Support character literals in bytestrings Anton Staaf
@ 2011-09-08  3:18   ` David Gibson
       [not found]     ` <20110908031842.GL30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  3 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08  3:18 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 07, 2011 at 04:15:37PM -0700, Anton Staaf wrote:
>     These patches add simple and escaped character literal parsing support
> to the dtc for cell lists and bytestrings.  The first patch refactors the
> string parsing code in data.c to expose the more primitive character parsing
> code for use in the later patches.  I have left the bytestring support in
> place but my hope is that it is now separate enough to be discussed
> independantly of the refactor and cell list changes.
> 
>     Thanks,
>         Anton
> 
> Changes in v2:
> - Move the refactor of data.c to a separate patch
> - Add support for character literals in cell lists
> - Merge normal and escaped literal support for bytestrings into
> - single patch

Getting very close with version, although there are still some minor
warts I'd like you to address.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/3] dtc: Refactor character literal parsing code
       [not found]     ` <1315437340-1661-2-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
@ 2011-09-08  3:22       ` David Gibson
       [not found]         ` <20110908032250.GM30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08  3:22 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 07, 2011 at 04:15:38PM -0700, Anton Staaf wrote:
> Move the parsing of hex, octal and escaped characters from data.c
> to util.c where it can be used for character literal parsing within
> strings as well as for stand alone C style character literals.

[snip]
> +char get_oct_char(const char *s, int *i)

Probably make these static, since I can't really see a use for them
other than in the escape processing code.

> +{
> +	char x[4];
> +	char *endx;
> +	long val;
> +
> +	x[3] = '\0';
> +	strncpy(x, s + *i, 3);
> +
> +	val = strtol(x, &endx, 8);
> +
> +	assert(endx > x);
> +
> +	(*i) += endx - x;
> +	return val;
> +}
> +
> +char get_hex_char(const char *s, int *i)
> +{
> +	char x[3];
> +	char *endx;
> +	long val;
> +
> +	x[2] = '\0';
> +	strncpy(x, s + *i, 2);
> +
> +	val = strtol(x, &endx, 16);
> +	if (!(endx  > x))
> +		die("\\x used with no following hex digits\n");
> +
> +	(*i) += endx - x;
> +	return val;
> +}
> +
> +char get_escape_char(const char *s, int *i)

Yes, splitting this function off is quite nice.

[snip]
> +char get_escape_char_exact(const char *s, int len)
> +{
> +	int	j = 1; //skip intial "\"
> +	char	c = get_escape_char(s, &j);
> +
> +	if (j != len)
> +		die("Extra characters at end of character literal '%s' "
> +		    "(%d != %d)\n", s, j, len);
> +
> +	return c;
> +}

This I'm not a fan of.  Handling erroneous input with a die() in a
low-level helper like this is pretty nasty (I know there are other
instances of it).  I'd prefer to see the action handling the error in
the caller though, where it's better equipped to know a reasonable way
of informing the user.

Some more on error handling in comments on your next patch.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/3] dtc: Support character literals in cell lists
       [not found]     ` <1315437340-1661-3-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
@ 2011-09-08  3:47       ` David Gibson
       [not found]         ` <20110908034742.GN30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08  3:47 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 07, 2011 at 04:15:39PM -0700, Anton Staaf wrote:
> With this patch the following property assignment:
> 
>     property = <0x12345678 'a' '\r' 100>;
> 
> is equivalent to:
> 
>     property = <0x12345678 0x00000061 0x0000000D 0x00000064>
> 
> Signed-off-by: Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
> Cc: Jon Loeliger <jdl-CYoMK+44s/E@public.gmane.org>
> Cc: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
> ---
>  Documentation/dts-format.txt |    2 +-
>  Documentation/manual.txt     |    7 ++++---
>  dtc-lexer.l                  |   14 ++++++++++++++
>  dtc-parser.y                 |    4 ++++
>  4 files changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/dts-format.txt b/Documentation/dts-format.txt
> index a655b87..eae8b76 100644
> --- a/Documentation/dts-format.txt
> +++ b/Documentation/dts-format.txt
> @@ -33,7 +33,7 @@ Property values may be defined as an array of 32-bit integer cells, as
>  NUL-terminated strings, as bytestrings or a combination of these.
>  
>  * Arrays of cells are represented by angle brackets surrounding a
> -  space separated list of C-style integers
> +  space separated list of C-style integers or character literals.
>  
>  	e.g. interrupts = <17 0xc>;
>  
> diff --git a/Documentation/manual.txt b/Documentation/manual.txt
> index f8a8a7b..940fd3d 100644
> --- a/Documentation/manual.txt
> +++ b/Documentation/manual.txt
> @@ -213,10 +213,11 @@ For example:
>  
>  By default, all numeric values are hexadecimal.  Alternate bases
>  may be specified using a prefix "d#" for decimal, "b#" for binary,
> -and "o#" for octal.
> +and "o#" for octal.  Character literals are supported using the C
> +language character literal syntax of 'a'.

Hrm, this paragraph is clearly referring to the old-style dts-v0
syntax (hex is not the default, these days) - which may well mean the
whole document is hopelessly out of date.  And I think the char
literals should only be processed in dts-v1 mode.

> -Strings support common escape sequences from C: "\n", "\t", "\r",
> -"\(octal value)", "\x(hex value)".
> +Strings and character literals support common escape sequences from C:
> +"\n", "\t", "\r", "\(octal value)", "\x(hex value)".
>  
>  
>  4.3) Labels and References
> diff --git a/dtc-lexer.l b/dtc-lexer.l
> index e866ea5..d4f9eaa 100644
> --- a/dtc-lexer.l
> +++ b/dtc-lexer.l
> @@ -29,6 +29,8 @@ PROPNODECHAR	[a-zA-Z0-9,._+*#?@-]
>  PATHCHAR	({PROPNODECHAR}|[/])
>  LABEL		[a-zA-Z_][a-zA-Z0-9_]*
>  STRING		\"([^\\"]|\\.)*\"
> +CHAR_LITERAL	'[^\\']'
> +CHAR_ESCAPED	'\\([^']+|')'

I'd prefer a single regex here, of '[^']+', and then check that it is
indeed a single character when you process it.  An aside as to why...

So, when first working with lexer/parser generators, it is very
tempting to make the token regexes as tight as possible, so that as
soon as lex has recognized the token, you know it is 100%
syntactically valid.  The problem with this is that when the user
enters something that looks kinda like the token in question, but
isn't quite right, then flex will re-interpret those characters as
some other tokens.  That usually results in a deeply incomprehensible
syntax error from the parser.

I think the better approach, therefore, is to make the token regexes
as wide as they reasonably can be without ambiguity, then do extra
validation once the token is recognized by flex.  That way you can
produce a much more helpful "Bad format for token X" type error
message.  Currently dtc is a bit mixed in the extent it does this, but
that's the direction I'd prefer to move.

>  WS		[[:space:]]
>  COMMENT		"/*"([^*]|\*+[^*/])*\*+"/"
>  LINECOMMENT	"//".*\n
> @@ -109,6 +111,18 @@ static int pop_input_file(void);
>  			return DT_LITERAL;
>  		}
>  
> +<V1>{CHAR_LITERAL}	{
> +			yylval.byte = yytext[1];
> +			DPRINT("Character literal: %s\n", yytext);
> +			return DT_BYTE;
> +		}
> +
> +<V1>{CHAR_ESCAPED}	{
> +			yylval.byte = get_escape_char_exact(yytext+1, yyleng-2);
> +			DPRINT("Character literal escaped: %s\n", yytext);

So, I'd prefer the error handling to be done here - that is if there's
a bad escape sequence or more than one character, report it here.  For
now, a die() will do, although longer term I'd prefer a not
immediately fatal message (allowing the rest of the file to be checked
for errors) with an error triggered after the parse is complete.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]     ` <1315437340-1661-4-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
@ 2011-09-08  3:51       ` David Gibson
       [not found]         ` <20110908035149.GO30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08  3:51 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
> With this patch the following property assignment:
> 
>     property = ['a' 2b '\r'];
> 
> is equivalent to:
> 
>     property = [61 2b 0d];

So, I still have some reservations about this syntax.

It occurred to me: do you actually need to intermix character and
hexbyte values that much?  Could I suggest an alternate sytax as:

	property = 'a', [2b], 'r';

The new character literals sufficiently distinct not to cause problems
here, and it maintains the , == bytestring append behaviour we already
have.

Thoughts?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 0/3] Support character literals
       [not found]     ` <20110908031842.GL30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-08  5:03       ` David Gibson
       [not found]         ` <20110908050354.GP30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08  5:03 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 01:18:42PM +1000, David Gibson wrote:
> On Wed, Sep 07, 2011 at 04:15:37PM -0700, Anton Staaf wrote:
> >     These patches add simple and escaped character literal parsing support
> > to the dtc for cell lists and bytestrings.  The first patch refactors the
> > string parsing code in data.c to expose the more primitive character parsing
> > code for use in the later patches.  I have left the bytestring support in
> > place but my hope is that it is now separate enough to be discussed
> > independantly of the refactor and cell list changes.
> > 
> >     Thanks,
> >         Anton
> > 
> > Changes in v2:
> > - Move the refactor of data.c to a separate patch
> > - Add support for character literals in cell lists
> > - Merge normal and escaped literal support for bytestrings into
> > - single patch
> 
> Getting very close with version, although there are still some minor
> warts I'd like you to address.

Oh, and one other thing, I forgot to say yet: you need some testcases
for the new functionality.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]         ` <20110908035149.GO30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-08  7:07           ` Grant Likely
       [not found]             ` <20110908070716.GC15955-e0URQFbLeQY2iJbIjFUEsiwD8/FfD2ys@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Grant Likely @ 2011-09-08  7:07 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
> On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
> > With this patch the following property assignment:
> > 
> >     property = ['a' 2b '\r'];
> > 
> > is equivalent to:
> > 
> >     property = [61 2b 0d];
> 
> So, I still have some reservations about this syntax.
> 
> It occurred to me: do you actually need to intermix character and
> hexbyte values that much?  Could I suggest an alternate sytax as:
> 
> 	property = 'a', [2b], 'r';
> 
> The new character literals sufficiently distinct not to cause problems
> here, and it maintains the , == bytestring append behaviour we already
> have.
> 
> Thoughts?

Does it matter much?  I'm happy with either.  Is there a downside to
the first syntax?

g.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]             ` <20110908070716.GC15955-e0URQFbLeQY2iJbIjFUEsiwD8/FfD2ys@public.gmane.org>
@ 2011-09-08 13:01               ` David Gibson
       [not found]                 ` <20110908130109.GW30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  2011-09-08 16:02               ` Anton Staaf
  1 sibling, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-08 13:01 UTC (permalink / raw)
  To: Grant Likely; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 12:07:16AM -0700, Grant Likely wrote:
> On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
> > On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
> > > With this patch the following property assignment:
> > > 
> > >     property = ['a' 2b '\r'];
> > > 
> > > is equivalent to:
> > > 
> > >     property = [61 2b 0d];
> > 
> > So, I still have some reservations about this syntax.
> > 
> > It occurred to me: do you actually need to intermix character and
> > hexbyte values that much?  Could I suggest an alternate sytax as:
> > 
> > 	property = 'a', [2b], 'r';
> > 
> > The new character literals sufficiently distinct not to cause problems
> > here, and it maintains the , == bytestring append behaviour we already
> > have.
> > 
> > Thoughts?
> 
> Does it matter much?  I'm happy with either.  Is there a downside to
> the first syntax?

Well.. probably not.  I'm just nervous about adding anything in the
lexically weird bytestring context.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 0/3] Support character literals
       [not found]         ` <20110908050354.GP30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-08 15:51           ` Anton Staaf
       [not found]             ` <CAF6FioVSMD2rT89tBeZEirNWj3DKO2=TXy9cE+z_FvYSn3M_dA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-08 15:51 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 7, 2011 at 10:03 PM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Thu, Sep 08, 2011 at 01:18:42PM +1000, David Gibson wrote:
>> On Wed, Sep 07, 2011 at 04:15:37PM -0700, Anton Staaf wrote:
>> >     These patches add simple and escaped character literal parsing support
>> > to the dtc for cell lists and bytestrings.  The first patch refactors the
>> > string parsing code in data.c to expose the more primitive character parsing
>> > code for use in the later patches.  I have left the bytestring support in
>> > place but my hope is that it is now separate enough to be discussed
>> > independantly of the refactor and cell list changes.
>> >
>> >     Thanks,
>> >         Anton
>> >
>> > Changes in v2:
>> > - Move the refactor of data.c to a separate patch
>> > - Add support for character literals in cell lists
>> > - Merge normal and escaped literal support for bytestrings into
>> > - single patch
>>
>> Getting very close with version, although there are still some minor
>> warts I'd like you to address.
>
> Oh, and one other thing, I forgot to say yet: you need some testcases
> for the new functionality.

Yup, I'll work on these today.  I'm still working on deciphering the
testing code.  :)  Do you have a pointer to a good example of a dtc
test to work from?  I tried looking around for a bytestring or literal
test, but couldn't find anything obvious.

Thanks,
    Anton

> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]             ` <20110908070716.GC15955-e0URQFbLeQY2iJbIjFUEsiwD8/FfD2ys@public.gmane.org>
  2011-09-08 13:01               ` David Gibson
@ 2011-09-08 16:02               ` Anton Staaf
  1 sibling, 0 replies; 24+ messages in thread
From: Anton Staaf @ 2011-09-08 16:02 UTC (permalink / raw)
  To: Grant Likely; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 8, 2011 at 12:07 AM, Grant Likely <grant.likely-s3s/WqlpOiPyB63q8FvJNQ@public.gmane.org> wrote:
> On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
>> On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
>> > With this patch the following property assignment:
>> >
>> >     property = ['a' 2b '\r'];
>> >
>> > is equivalent to:
>> >
>> >     property = [61 2b 0d];
>>
>> So, I still have some reservations about this syntax.
>>
>> It occurred to me: do you actually need to intermix character and
>> hexbyte values that much?  Could I suggest an alternate sytax as:
>>
>>       property = 'a', [2b], 'r';
>>
>> The new character literals sufficiently distinct not to cause problems
>> here, and it maintains the , == bytestring append behaviour we already
>> have.
>>
>> Thoughts?
>
> Does it matter much?  I'm happy with either.  Is there a downside to
> the first syntax?

I don't think it matters that much, though in my use case of encoding
a keyboard mapping table I do actually intermix character literals and
two character hex values a lot.  In particular, I use 00 to indicate a
key that is not present in the scan matrix.  I also use hex values
with the top bit set to indicate special characters, such as modifiers
or arrow keys.  So I guess I have a slight preference for my original
syntax, which has another advantage of looking a bit more C-like.
Though I could certainly use the alternate syntax as well.  One last
thought, the alternate syntax will require quite a few additional
commas and square brackets, most likely causing my tables to wrap and
thus get become even longer in the dts file.

Thanks,
    Anton

> g.
>
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 1/3] dtc: Refactor character literal parsing code
       [not found]         ` <20110908032250.GM30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-08 16:06           ` Anton Staaf
  0 siblings, 0 replies; 24+ messages in thread
From: Anton Staaf @ 2011-09-08 16:06 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 7, 2011 at 8:22 PM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Wed, Sep 07, 2011 at 04:15:38PM -0700, Anton Staaf wrote:
>> Move the parsing of hex, octal and escaped characters from data.c
>> to util.c where it can be used for character literal parsing within
>> strings as well as for stand alone C style character literals.
>
> [snip]
>> +char get_oct_char(const char *s, int *i)
>
> Probably make these static, since I can't really see a use for them
> other than in the escape processing code.

Yup, will do.

>> +{
>> +     char x[4];
>> +     char *endx;
>> +     long val;
>> +
>> +     x[3] = '\0';
>> +     strncpy(x, s + *i, 3);
>> +
>> +     val = strtol(x, &endx, 8);
>> +
>> +     assert(endx > x);
>> +
>> +     (*i) += endx - x;
>> +     return val;
>> +}
>> +
>> +char get_hex_char(const char *s, int *i)
>> +{
>> +     char x[3];
>> +     char *endx;
>> +     long val;
>> +
>> +     x[2] = '\0';
>> +     strncpy(x, s + *i, 2);
>> +
>> +     val = strtol(x, &endx, 16);
>> +     if (!(endx  > x))
>> +             die("\\x used with no following hex digits\n");
>> +
>> +     (*i) += endx - x;
>> +     return val;
>> +}
>> +
>> +char get_escape_char(const char *s, int *i)
>
> Yes, splitting this function off is quite nice.
>
> [snip]
>> +char get_escape_char_exact(const char *s, int len)
>> +{
>> +     int     j = 1; //skip intial "\"
>> +     char    c = get_escape_char(s, &j);
>> +
>> +     if (j != len)
>> +             die("Extra characters at end of character literal '%s' "
>> +                 "(%d != %d)\n", s, j, len);
>> +
>> +     return c;
>> +}
>
> This I'm not a fan of.  Handling erroneous input with a die() in a
> low-level helper like this is pretty nasty (I know there are other
> instances of it).  I'd prefer to see the action handling the error in
> the caller though, where it's better equipped to know a reasonable way
> of informing the user.

Can do, this behavior was copied from another _exact function, I'll
switch to doing the check in the parser rule.

Thanks,
    Anton

> Some more on error handling in comments on your next patch.
>
> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/3] dtc: Support character literals in cell lists
       [not found]         ` <20110908034742.GN30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-08 21:20           ` Anton Staaf
       [not found]             ` <CAF6FioU7ur-bRpvscFV6XG7vWpu-DLezdeyPGAwz-J+moPXyXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-08 21:20 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Wed, Sep 7, 2011 at 8:47 PM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Wed, Sep 07, 2011 at 04:15:39PM -0700, Anton Staaf wrote:
>> With this patch the following property assignment:
>>
>>     property = <0x12345678 'a' '\r' 100>;
>>
>> is equivalent to:
>>
>>     property = <0x12345678 0x00000061 0x0000000D 0x00000064>
>>
>> Signed-off-by: Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
>> Cc: Jon Loeliger <jdl-CYoMK+44s/E@public.gmane.org>
>> Cc: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
>> ---
>>  Documentation/dts-format.txt |    2 +-
>>  Documentation/manual.txt     |    7 ++++---
>>  dtc-lexer.l                  |   14 ++++++++++++++
>>  dtc-parser.y                 |    4 ++++
>>  4 files changed, 23 insertions(+), 4 deletions(-)
>>
>> diff --git a/Documentation/dts-format.txt b/Documentation/dts-format.txt
>> index a655b87..eae8b76 100644
>> --- a/Documentation/dts-format.txt
>> +++ b/Documentation/dts-format.txt
>> @@ -33,7 +33,7 @@ Property values may be defined as an array of 32-bit integer cells, as
>>  NUL-terminated strings, as bytestrings or a combination of these.
>>
>>  * Arrays of cells are represented by angle brackets surrounding a
>> -  space separated list of C-style integers
>> +  space separated list of C-style integers or character literals.
>>
>>       e.g. interrupts = <17 0xc>;
>>
>> diff --git a/Documentation/manual.txt b/Documentation/manual.txt
>> index f8a8a7b..940fd3d 100644
>> --- a/Documentation/manual.txt
>> +++ b/Documentation/manual.txt
>> @@ -213,10 +213,11 @@ For example:
>>
>>  By default, all numeric values are hexadecimal.  Alternate bases
>>  may be specified using a prefix "d#" for decimal, "b#" for binary,
>> -and "o#" for octal.
>> +and "o#" for octal.  Character literals are supported using the C
>> +language character literal syntax of 'a'.
>
> Hrm, this paragraph is clearly referring to the old-style dts-v0
> syntax (hex is not the default, these days) - which may well mean the
> whole document is hopelessly out of date.  And I think the char
> literals should only be processed in dts-v1 mode.

Fair enough, would it make sense for me to just remove this
modification to manual.txt and leave the changes in place in
dts-format.txt?

>> -Strings support common escape sequences from C: "\n", "\t", "\r",
>> -"\(octal value)", "\x(hex value)".
>> +Strings and character literals support common escape sequences from C:
>> +"\n", "\t", "\r", "\(octal value)", "\x(hex value)".
>>
>>
>>  4.3) Labels and References
>> diff --git a/dtc-lexer.l b/dtc-lexer.l
>> index e866ea5..d4f9eaa 100644
>> --- a/dtc-lexer.l
>> +++ b/dtc-lexer.l
>> @@ -29,6 +29,8 @@ PROPNODECHAR        [a-zA-Z0-9,._+*#?@-]
>>  PATHCHAR     ({PROPNODECHAR}|[/])
>>  LABEL                [a-zA-Z_][a-zA-Z0-9_]*
>>  STRING               \"([^\\"]|\\.)*\"
>> +CHAR_LITERAL '[^\\']'
>> +CHAR_ESCAPED '\\([^']+|')'
>
> I'd prefer a single regex here, of '[^']+', and then check that it is
> indeed a single character when you process it.  An aside as to why...

Done, though I made it a slightly more complex regex that can also match '\''.

> So, when first working with lexer/parser generators, it is very
> tempting to make the token regexes as tight as possible, so that as
> soon as lex has recognized the token, you know it is 100%
> syntactically valid.  The problem with this is that when the user
> enters something that looks kinda like the token in question, but
> isn't quite right, then flex will re-interpret those characters as
> some other tokens.  That usually results in a deeply incomprehensible
> syntax error from the parser.

Good point.

> I think the better approach, therefore, is to make the token regexes
> as wide as they reasonably can be without ambiguity, then do extra
> validation once the token is recognized by flex.  That way you can
> produce a much more helpful "Bad format for token X" type error
> message.  Currently dtc is a bit mixed in the extent it does this, but
> that's the direction I'd prefer to move.

Done.

>>  WS           [[:space:]]
>>  COMMENT              "/*"([^*]|\*+[^*/])*\*+"/"
>>  LINECOMMENT  "//".*\n
>> @@ -109,6 +111,18 @@ static int pop_input_file(void);
>>                       return DT_LITERAL;
>>               }
>>
>> +<V1>{CHAR_LITERAL}   {
>> +                     yylval.byte = yytext[1];
>> +                     DPRINT("Character literal: %s\n", yytext);
>> +                     return DT_BYTE;
>> +             }
>> +
>> +<V1>{CHAR_ESCAPED}   {
>> +                     yylval.byte = get_escape_char_exact(yytext+1, yyleng-2);
>> +                     DPRINT("Character literal escaped: %s\n", yytext);
>
> So, I'd prefer the error handling to be done here - that is if there's
> a bad escape sequence or more than one character, report it here.  For
> now, a die() will do, although longer term I'd prefer a not
> immediately fatal message (allowing the rest of the file to be checked
> for errors) with an error triggered after the parse is complete.

I ended up (as you'll see shortly) doing this in dtc-parser.y and
using the print_error function there so I didn't have to just die out.
 So longer term can be now, instead of...

Thanks,
    Anton

> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 2/3] dtc: Support character literals in cell lists
       [not found]             ` <CAF6FioU7ur-bRpvscFV6XG7vWpu-DLezdeyPGAwz-J+moPXyXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-09-09  0:52               ` David Gibson
  0 siblings, 0 replies; 24+ messages in thread
From: David Gibson @ 2011-09-09  0:52 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 02:20:16PM -0700, Anton Staaf wrote:
> On Wed, Sep 7, 2011 at 8:47 PM, David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > On Wed, Sep 07, 2011 at 04:15:39PM -0700, Anton Staaf wrote:
[snip]
> > Hrm, this paragraph is clearly referring to the old-style dts-v0
> > syntax (hex is not the default, these days) - which may well mean the
> > whole document is hopelessly out of date.  And I think the char
> > literals should only be processed in dts-v1 mode.
> 
> Fair enough, would it make sense for me to just remove this
> modification to manual.txt and leave the changes in place in
> dts-format.txt?

I think so.

> >> -Strings support common escape sequences from C: "\n", "\t", "\r",
> >> -"\(octal value)", "\x(hex value)".
> >> +Strings and character literals support common escape sequences from C:
> >> +"\n", "\t", "\r", "\(octal value)", "\x(hex value)".
> >>
> >>
> >>  4.3) Labels and References
> >> diff --git a/dtc-lexer.l b/dtc-lexer.l
> >> index e866ea5..d4f9eaa 100644
> >> --- a/dtc-lexer.l
> >> +++ b/dtc-lexer.l
> >> @@ -29,6 +29,8 @@ PROPNODECHAR        [a-zA-Z0-9,._+*#?@-]
> >>  PATHCHAR     ({PROPNODECHAR}|[/])
> >>  LABEL                [a-zA-Z_][a-zA-Z0-9_]*
> >>  STRING               \"([^\\"]|\\.)*\"
> >> +CHAR_LITERAL '[^\\']'
> >> +CHAR_ESCAPED '\\([^']+|')'
> >
> > I'd prefer a single regex here, of '[^']+', and then check that it is
> > indeed a single character when you process it.  An aside as to why...
> 
> Done, though I made it a slightly more complex regex that can also match '\''.

Good call, I forgot that case.

[snip]
> > So, I'd prefer the error handling to be done here - that is if there's
> > a bad escape sequence or more than one character, report it here.  For
> > now, a die() will do, although longer term I'd prefer a not
> > immediately fatal message (allowing the rest of the file to be checked
> > for errors) with an error triggered after the parse is complete.
> 
> I ended up (as you'll see shortly) doing this in dtc-parser.y and
> using the print_error function there so I didn't have to just die out.
>  So longer term can be now, instead of...

Superb :)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 0/3] Support character literals
       [not found]             ` <CAF6FioVSMD2rT89tBeZEirNWj3DKO2=TXy9cE+z_FvYSn3M_dA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-09-09  0:54               ` David Gibson
  0 siblings, 0 replies; 24+ messages in thread
From: David Gibson @ 2011-09-09  0:54 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 08:51:54AM -0700, Anton Staaf wrote:
> On Wed, Sep 7, 2011 at 10:03 PM, David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > On Thu, Sep 08, 2011 at 01:18:42PM +1000, David Gibson wrote:
> >> On Wed, Sep 07, 2011 at 04:15:37PM -0700, Anton Staaf wrote:
> >> >     These patches add simple and escaped character literal parsing support
> >> > to the dtc for cell lists and bytestrings.  The first patch refactors the
> >> > string parsing code in data.c to expose the more primitive character parsing
> >> > code for use in the later patches.  I have left the bytestring support in
> >> > place but my hope is that it is now separate enough to be discussed
> >> > independantly of the refactor and cell list changes.
> >> >
> >> >     Thanks,
> >> >         Anton
> >> >
> >> > Changes in v2:
> >> > - Move the refactor of data.c to a separate patch
> >> > - Add support for character literals in cell lists
> >> > - Merge normal and escaped literal support for bytestrings into
> >> > - single patch
> >>
> >> Getting very close with version, although there are still some minor
> >> warts I'd like you to address.
> >
> > Oh, and one other thing, I forgot to say yet: you need some testcases
> > for the new functionality.
> 
> Yup, I'll work on these today.  I'm still working on deciphering the
> testing code.  :)  Do you have a pointer to a good example of a dtc
> test to work from?  I tried looking around for a bytestring or literal
> test, but couldn't find anything obvious.

Something like escapes.dts / string_escapes.c is probably fairly
similar to what you're looking for.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                 ` <20110908130109.GW30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-09  6:30                   ` Anton Staaf
       [not found]                     ` <CAF6FioWOuK3szFb8FD+_fde0HpZ0TJJXx51ZL=BAcZFSojRGCQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2011-09-09 14:22                   ` Jon Loeliger
  1 sibling, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-09  6:30 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 8, 2011 at 6:01 AM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Thu, Sep 08, 2011 at 12:07:16AM -0700, Grant Likely wrote:
>> On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
>> > On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
>> > > With this patch the following property assignment:
>> > >
>> > >     property = ['a' 2b '\r'];
>> > >
>> > > is equivalent to:
>> > >
>> > >     property = [61 2b 0d];
>> >
>> > So, I still have some reservations about this syntax.
>> >
>> > It occurred to me: do you actually need to intermix character and
>> > hexbyte values that much?  Could I suggest an alternate sytax as:
>> >
>> >     property = 'a', [2b], 'r';
>> >
>> > The new character literals sufficiently distinct not to cause problems
>> > here, and it maintains the , == bytestring append behaviour we already
>> > have.
>> >
>> > Thoughts?
>>
>> Does it matter much?  I'm happy with either.  Is there a downside to
>> the first syntax?
>
> Well.. probably not.  I'm just nervous about adding anything in the
> lexically weird bytestring context.

Which is completely understandable.  Let's continue to mull that over
while we think up a sane syntax for specifying the size of a cell in a
cell list.  Would it be reasonable to specify the size per list?  Or
would you like to have a mechanism and syntax to specify it per cell
entry in the list.  If it is per cell entry then we have to deal with
the strange packings that would come from something like:

property = <0x11223344 byte(0x55) 0x66778899>

I'm not suggesting that the byte(...) syntax be used by the way.  :)
But the above situation would lead to a value of 0x55667788 and a
trailing 0x99, which then has to be either promoted to a uint32_t
because it was originally in a uint32_t literal, or left as a byte,
not very obvious.  The other option would be to do some sort of local
packing.  Where the above would turn into the same as:

property = <0x11223344 0x55 0x66778899>

But that has all sorts of problems.  When do you start a new uint32_t
cell.  If it's only when you overflow out of the current one or when
you see a full uint32_t literal then you could split a uint16_t across
a cell boundary.  I suppose this problem already exists because of the
"," syntax.  And so you could make a cell list that contains a bunch
of uint32_t values that are stored in a property in such a way as to
be unaligned.

The alternative of specifying the size of the cells in a list seems
cleaner to me.  You still can generate unaligned values in a property
using ",", but within a cell list you can't.  So then what would be a
good syntax for that.  Perhaps:

property = !uint16 <0x12 0x3456>

or

property = !uint8 <0xff 0xaa>

or to explicitly get the default uint32_t behavior

property = !uint32 <0x12345678 0 12>

This has the advantage that ! is not currently used in any of the dts
syntax.  And having it at the front of the cell list definition means
that you can check and correctly pack/append each literal as they are
read.  It has the disadvantage that if we want to use ! to mean
logical not at some point it will complicate that parsing.

-Anton

> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                     ` <CAF6FioWOuK3szFb8FD+_fde0HpZ0TJJXx51ZL=BAcZFSojRGCQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-09-09  7:25                       ` David Gibson
       [not found]                         ` <20110909072532.GC9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-09  7:25 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Thu, Sep 08, 2011 at 11:30:42PM -0700, Anton Staaf wrote:
> On Thu, Sep 8, 2011 at 6:01 AM, David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > On Thu, Sep 08, 2011 at 12:07:16AM -0700, Grant Likely wrote:
> >> On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
> >> > On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
> >> > > With this patch the following property assignment:
> >> > >
> >> > >     property = ['a' 2b '\r'];
> >> > >
> >> > > is equivalent to:
> >> > >
> >> > >     property = [61 2b 0d];
> >> >
> >> > So, I still have some reservations about this syntax.
> >> >
> >> > It occurred to me: do you actually need to intermix character and
> >> > hexbyte values that much?  Could I suggest an alternate sytax as:
> >> >
> >> >     property = 'a', [2b], 'r';
> >> >
> >> > The new character literals sufficiently distinct not to cause problems
> >> > here, and it maintains the , == bytestring append behaviour we already
> >> > have.
> >> >
> >> > Thoughts?
> >>
> >> Does it matter much?  I'm happy with either.  Is there a downside to
> >> the first syntax?
> >
> > Well.. probably not.  I'm just nervous about adding anything in the
> > lexically weird bytestring context.
> 
> Which is completely understandable.  Let's continue to mull that over
> while we think up a sane syntax for specifying the size of a cell in a
> cell list.  Would it be reasonable to specify the size per list?

That was what I had in mind.

>  Or
> would you like to have a mechanism and syntax to specify it per cell
> entry in the list.  If it is per cell entry then we have to deal with
> the strange packings that would come from something like:
> 
> property = <0x11223344 byte(0x55) 0x66778899>
> 
> I'm not suggesting that the byte(...) syntax be used by the way.  :)
> But the above situation would lead to a value of 0x55667788 and a
> trailing 0x99, which then has to be either promoted to a uint32_t

Well.. only if you're now interpreting the property as an array of
u32s, which if you specified it with that byte in the middle, is
presumably not how this property is typically interpreted.

> because it was originally in a uint32_t literal, or left as a byte,
> not very obvious.  The other option would be to do some sort of local
> packing.  Where the above would turn into the same as:

> property = <0x11223344 0x55 0x66778899>

Um, I'm really not sure what you're getting at with this one.

> But that has all sorts of problems.  When do you start a new uint32_t
> cell.  If it's only when you overflow out of the current one or when
> you see a full uint32_t literal then you could split a uint16_t across
> a cell boundary.

So, I have it on Mitch Bradley's authority, that the properties should
in general be treated as bytestrings and that we should *never* do
padding in there for the sake of alignment.

>  I suppose this problem already exists because of the
> "," syntax.  And so you could make a cell list that contains a bunch
> of uint32_t values that are stored in a property in such a way as to
> be unaligned.

Yes, and intentionally so.  Way back, we used to always 4-byte align
at the beginning of a cell list, but I took that out on Mitch's advice
(plus it was easier that way).  I believe there are some bindings out
there (though they don't seem to be in common, current use) than
define properties to contain a string followed by cells, which will
lead to unaligned cell values.

> The alternative of specifying the size of the cells in a list seems
> cleaner to me.  You still can generate unaligned values in a property
> using ",", but within a cell list you can't.  So then what would be a
> good syntax for that.  Perhaps:

I concur.

> property = !uint16 <0x12 0x3456>
> 
> or
> 
> property = !uint8 <0xff 0xaa>
> 
> or to explicitly get the default uint32_t behavior
> 
> property = !uint32 <0x12345678 0 12>

Well, I think this is better than anything I've yet considered, though
still not quite good enough.

> This has the advantage that ! is not currently used in any of the dts
> syntax.  And having it at the front of the cell list definition means
> that you can check and correctly pack/append each literal as they are
> read.  It has the disadvantage that if we want to use ! to mean
> logical not at some point it will complicate that parsing.

Yeah, and I'd really prefer not to make life difficult for
implementing expressions.  So, some vague, and possibly contradictory
points on this topic:

 * If we can do it in a way that's not too verbose, I'd prefer both
a marker for the size present at both < and >, so that on a big array
a human has an extra visual clue that this is an array of non-default
size.

 * I've toyed with the idea of different bracketing types for the
different sizes.  e.g. << >> for 64-bit values, and, uh, something for
8 and 16.  Haven't come up with any variants on this that don't seem
excessively brief and cryptic, though.

 * Also contemplated something like <$8 0xff 0x00 > or <.8 0xff 0x00>
for 8-bit values, but it seems pretty ugly.

I supposed modifying your suggestion, but combining with our existing
convention for "reserved words" we could do:

	prop = /uint8/ <0xab 0xcd>;

I don't love it, but it's about the best I've come up with yet.  And
in particular it's probably the variant I'd be least upset to carry
around as legacy if we come up with a better way in future.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                 ` <20110908130109.GW30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  2011-09-09  6:30                   ` Anton Staaf
@ 2011-09-09 14:22                   ` Jon Loeliger
  1 sibling, 0 replies; 24+ messages in thread
From: Jon Loeliger @ 2011-09-09 14:22 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

> > > >     property = ['a' 2b '\r'];
> > > > 
> > > >     property = [61 2b 0d];

> > Does it matter much?  I'm happy with either.  Is there a downside to
> > the first syntax?
> 
> Well.. probably not.  I'm just nervous about adding anything in the
> lexically weird bytestring context.

As a general statement, it is often difficult for parsers
to establish enough look-ahead when lists of things are
simply placed side-by-side without some sort of connecting
syntax.  So a structure like "foo1 foo2 foo3" can be more prone
to S/R and R/R errors than something like "foo1, foo2, foo3".
No, that's not a hard and fast rule because prior context
can make it clear to a parsing rule set.  In this case,
the '[' can estabish that context and make it clear what
is intended as a list of values.  But if there were ever
any future notion of wanting to create a bracketed list
of values for another purpose, then a comma separated list
would be preferable.

jdl

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                         ` <20110909072532.GC9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-09 18:37                           ` Anton Staaf
       [not found]                             ` <CAF6FioVxYZW7grfwE1Q_FP5T95Omh9sqzAVyzsZr8EbfM6WV_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-09 18:37 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Fri, Sep 9, 2011 at 12:25 AM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Thu, Sep 08, 2011 at 11:30:42PM -0700, Anton Staaf wrote:
>> On Thu, Sep 8, 2011 at 6:01 AM, David Gibson
>> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>> > On Thu, Sep 08, 2011 at 12:07:16AM -0700, Grant Likely wrote:
>> >> On Thu, Sep 08, 2011 at 01:51:49PM +1000, David Gibson wrote:
>> >> > On Wed, Sep 07, 2011 at 04:15:40PM -0700, Anton Staaf wrote:
>> >> > > With this patch the following property assignment:
>> >> > >
>> >> > >     property = ['a' 2b '\r'];
>> >> > >
>> >> > > is equivalent to:
>> >> > >
>> >> > >     property = [61 2b 0d];
>> >> >
>> >> > So, I still have some reservations about this syntax.
>> >> >
>> >> > It occurred to me: do you actually need to intermix character and
>> >> > hexbyte values that much?  Could I suggest an alternate sytax as:
>> >> >
>> >> >     property = 'a', [2b], 'r';
>> >> >
>> >> > The new character literals sufficiently distinct not to cause problems
>> >> > here, and it maintains the , == bytestring append behaviour we already
>> >> > have.
>> >> >
>> >> > Thoughts?
>> >>
>> >> Does it matter much?  I'm happy with either.  Is there a downside to
>> >> the first syntax?
>> >
>> > Well.. probably not.  I'm just nervous about adding anything in the
>> > lexically weird bytestring context.
>>
>> Which is completely understandable.  Let's continue to mull that over
>> while we think up a sane syntax for specifying the size of a cell in a
>> cell list.  Would it be reasonable to specify the size per list?
>
> That was what I had in mind.
>
>>  Or
>> would you like to have a mechanism and syntax to specify it per cell
>> entry in the list.  If it is per cell entry then we have to deal with
>> the strange packings that would come from something like:
>>
>> property = <0x11223344 byte(0x55) 0x66778899>
>>
>> I'm not suggesting that the byte(...) syntax be used by the way.  :)
>> But the above situation would lead to a value of 0x55667788 and a
>> trailing 0x99, which then has to be either promoted to a uint32_t
>
> Well.. only if you're now interpreting the property as an array of
> u32s, which if you specified it with that byte in the middle, is
> presumably not how this property is typically interpreted.
>
>> because it was originally in a uint32_t literal, or left as a byte,
>> not very obvious.  The other option would be to do some sort of local
>> packing.  Where the above would turn into the same as:
>
>> property = <0x11223344 0x55 0x66778899>
>
> Um, I'm really not sure what you're getting at with this one.
>
>> But that has all sorts of problems.  When do you start a new uint32_t
>> cell.  If it's only when you overflow out of the current one or when
>> you see a full uint32_t literal then you could split a uint16_t across
>> a cell boundary.
>
> So, I have it on Mitch Bradley's authority, that the properties should
> in general be treated as bytestrings and that we should *never* do
> padding in there for the sake of alignment.
>
>>  I suppose this problem already exists because of the
>> "," syntax.  And so you could make a cell list that contains a bunch
>> of uint32_t values that are stored in a property in such a way as to
>> be unaligned.
>
> Yes, and intentionally so.  Way back, we used to always 4-byte align
> at the beginning of a cell list, but I took that out on Mitch's advice
> (plus it was easier that way).  I believe there are some bindings out
> there (though they don't seem to be in common, current use) than
> define properties to contain a string followed by cells, which will
> lead to unaligned cell values.
>
>> The alternative of specifying the size of the cells in a list seems
>> cleaner to me.  You still can generate unaligned values in a property
>> using ",", but within a cell list you can't.  So then what would be a
>> good syntax for that.  Perhaps:
>
> I concur.
>
>> property = !uint16 <0x12 0x3456>
>>
>> or
>>
>> property = !uint8 <0xff 0xaa>
>>
>> or to explicitly get the default uint32_t behavior
>>
>> property = !uint32 <0x12345678 0 12>
>
> Well, I think this is better than anything I've yet considered, though
> still not quite good enough.
>
>> This has the advantage that ! is not currently used in any of the dts
>> syntax.  And having it at the front of the cell list definition means
>> that you can check and correctly pack/append each literal as they are
>> read.  It has the disadvantage that if we want to use ! to mean
>> logical not at some point it will complicate that parsing.
>
> Yeah, and I'd really prefer not to make life difficult for
> implementing expressions.  So, some vague, and possibly contradictory
> points on this topic:
>
>  * If we can do it in a way that's not too verbose, I'd prefer both
> a marker for the size present at both < and >, so that on a big array
> a human has an extra visual clue that this is an array of non-default
> size.
>
>  * I've toyed with the idea of different bracketing types for the
> different sizes.  e.g. << >> for 64-bit values, and, uh, something for
> 8 and 16.  Haven't come up with any variants on this that don't seem
> excessively brief and cryptic, though.
>
>  * Also contemplated something like <$8 0xff 0x00 > or <.8 0xff 0x00>
> for 8-bit values, but it seems pretty ugly.
>
> I supposed modifying your suggestion, but combining with our existing
> convention for "reserved words" we could do:
>
>        prop = /uint8/ <0xab 0xcd>;
>
> I don't love it, but it's about the best I've come up with yet.  And
> in particular it's probably the variant I'd be least upset to carry
> around as legacy if we come up with a better way in future.

Yes, I like this better than my suggestions of using !.  I wasn't sure
if the /.../ syntax was something that was going to be allowed in
property definitions.  One other option working from this could be:

property = /size/ 8 <0xab 0xcd>;

It has the advantage of limiting the number of reserved words created.
 It could also be:

property = /type/ uint8 <0xab 0xcd>;

Which would allow us to define new types for cell lists without adding
new syntax.  I'm not sure if this is too useful though because the
only types that I can think of that are not summed up by their size
are things like float and double...  But it does have a nice look to
it in my opinion.  And I would assume that if the /type/ <typename>
was left off it would default to a uint32_t cell list.  So the
additional verbosity of having to indicate it's a different type and
what the type is will only be needed in a few instances.

-Anton

> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                             ` <CAF6FioVxYZW7grfwE1Q_FP5T95Omh9sqzAVyzsZr8EbfM6WV_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-09-12  0:48                               ` David Gibson
       [not found]                                 ` <20110912004807.GH9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: David Gibson @ 2011-09-12  0:48 UTC (permalink / raw)
  To: Anton Staaf; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Fri, Sep 09, 2011 at 11:37:51AM -0700, Anton Staaf wrote:
> On Fri, Sep 9, 2011 at 12:25 AM, David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > On Thu, Sep 08, 2011 at 11:30:42PM -0700, Anton Staaf wrote:
[snip]
> > I supposed modifying your suggestion, but combining with our existing
> > convention for "reserved words" we could do:
> >
> >        prop = /uint8/ <0xab 0xcd>;
> >
> > I don't love it, but it's about the best I've come up with yet.  And
> > in particular it's probably the variant I'd be least upset to carry
> > around as legacy if we come up with a better way in future.
> 
> Yes, I like this better than my suggestions of using !.  I wasn't sure
> if the /.../ syntax was something that was going to be allowed in
> property definitions.  One other option working from this could be:
> 
> property = /size/ 8 <0xab 0xcd>;

I quite like that idea.

> It has the advantage of limiting the number of reserved words created.
>  It could also be:
> 
> property = /type/ uint8 <0xab 0xcd>;

Not so fond of this one.  The "uint8" would have to be some new
lexical type - "identifed" probably, which we'd then have to look up.

> Which would allow us to define new types for cell lists without adding
> new syntax.  I'm not sure if this is too useful though because the
> only types that I can think of that are not summed up by their size
> are things like float and double... 

Yeah, it's my feeling that the < > symtax should remain for integer
arrays.  If we need something for other types in future, we should
define new, distinct syntax for that when the time comes.

> But it does have a nice look to
> it in my opinion.  And I would assume that if the /type/ <typename>
> was left off it would default to a uint32_t cell list.  So the
> additional verbosity of having to indicate it's a different type and
> what the type is will only be needed in a few instances.

Yes.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                                 ` <20110912004807.GH9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
@ 2011-09-12  5:09                                   ` Anton Staaf
       [not found]                                     ` <CAF6FioW9iY1JAEq5_vn0ZbHC81LBF7xN6JNzBVTbs+k19p2pRg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 24+ messages in thread
From: Anton Staaf @ 2011-09-12  5:09 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Sun, Sep 11, 2011 at 5:48 PM, David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> On Fri, Sep 09, 2011 at 11:37:51AM -0700, Anton Staaf wrote:
>> On Fri, Sep 9, 2011 at 12:25 AM, David Gibson
>> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>> > On Thu, Sep 08, 2011 at 11:30:42PM -0700, Anton Staaf wrote:
> [snip]
>> > I supposed modifying your suggestion, but combining with our existing
>> > convention for "reserved words" we could do:
>> >
>> >        prop = /uint8/ <0xab 0xcd>;
>> >
>> > I don't love it, but it's about the best I've come up with yet.  And
>> > in particular it's probably the variant I'd be least upset to carry
>> > around as legacy if we come up with a better way in future.
>>
>> Yes, I like this better than my suggestions of using !.  I wasn't sure
>> if the /.../ syntax was something that was going to be allowed in
>> property definitions.  One other option working from this could be:
>>
>> property = /size/ 8 <0xab 0xcd>;
>
> I quite like that idea.

OK, I can take a look at writing up a patch to add this.  Unless it
was something anyone else wanted to do.  :)

-Anton

>> It has the advantage of limiting the number of reserved words created.
>>  It could also be:
>>
>> property = /type/ uint8 <0xab 0xcd>;
>
> Not so fond of this one.  The "uint8" would have to be some new
> lexical type - "identifed" probably, which we'd then have to look up.
>
>> Which would allow us to define new types for cell lists without adding
>> new syntax.  I'm not sure if this is too useful though because the
>> only types that I can think of that are not summed up by their size
>> are things like float and double...
>
> Yeah, it's my feeling that the < > symtax should remain for integer
> arrays.  If we need something for other types in future, we should
> define new, distinct syntax for that when the time comes.
>
>> But it does have a nice look to
>> it in my opinion.  And I would assume that if the /type/ <typename>
>> was left off it would default to a uint32_t cell list.  So the
>> additional verbosity of having to indicate it's a different type and
>> what the type is will only be needed in a few instances.
>
> Yes.
>
> --
> David Gibson                    | I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>                                | _way_ _around_!
> http://www.ozlabs.org/~dgibson
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH v2 3/3] dtc: Support character literals in bytestrings
       [not found]                                     ` <CAF6FioW9iY1JAEq5_vn0ZbHC81LBF7xN6JNzBVTbs+k19p2pRg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2011-09-12 17:53                                       ` Anton Staaf
  0 siblings, 0 replies; 24+ messages in thread
From: Anton Staaf @ 2011-09-12 17:53 UTC (permalink / raw)
  To: David Gibson; +Cc: devicetree-discuss-uLR06cmDAlY/bJ5BZ2RsiQ

On Sun, Sep 11, 2011 at 10:09 PM, Anton Staaf <robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
> On Sun, Sep 11, 2011 at 5:48 PM, David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>> On Fri, Sep 09, 2011 at 11:37:51AM -0700, Anton Staaf wrote:
>>> On Fri, Sep 9, 2011 at 12:25 AM, David Gibson
>>> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>>> > On Thu, Sep 08, 2011 at 11:30:42PM -0700, Anton Staaf wrote:
>> [snip]
>>> > I supposed modifying your suggestion, but combining with our existing
>>> > convention for "reserved words" we could do:
>>> >
>>> >        prop = /uint8/ <0xab 0xcd>;
>>> >
>>> > I don't love it, but it's about the best I've come up with yet.  And
>>> > in particular it's probably the variant I'd be least upset to carry
>>> > around as legacy if we come up with a better way in future.
>>>
>>> Yes, I like this better than my suggestions of using !.  I wasn't sure
>>> if the /.../ syntax was something that was going to be allowed in
>>> property definitions.  One other option working from this could be:
>>>
>>> property = /size/ 8 <0xab 0xcd>;
>>
>> I quite like that idea.
>
> OK, I can take a look at writing up a patch to add this.  Unless it
> was something anyone else wanted to do.  :)
>
> -Anton

So, I've done some looking into an implementation of the /size/
syntax.  Here's what I'm thinking so far:

1) Parsing /size/ and the subsequent literal are easy, we knew this already.

2) The phandle references are a problem.  They are fixed up in
checks.c:fixup_phandle_references and that function expects that there
is 32-bits of space (sizeof(cell_t)) available to write the phandle
into.

3) Communicating the current size of the cells from the parser rule
that reads the DT_SIZE and DT_LITERAL to the parser rule that reads
the cellval requires either some auxiliary data or an explosion of the
parser rules (one per size we want to support).  Obviously the latter
is bad.

So, one solution to the above problems is to disallow phandle
references in anything other than a uint32 cell list, and to augment
the "data" struct to include the current cell size.  This can be used
to reject any attempts to add a REF_PHANDLE marker when the cell size
is not 32-bits.  The fixup code in checks.c would then never encounter
a REF_PHANDLE in a location that didn't have enough space reserved for
it.  A slight modification of the above could be to allow a
REF_PHANDLE any time the current cell size is greater than 32 bits.

Other solutions include always appending 32-bits and adding the
REF_PHANDLE, no matter the current size of the cell.  Or always adding
the minimum of 32-bits or the current size of the cell.

Updating the current cell size will also be a trick.  The parser rules
will need to be split up a bit I think so that once the DT_SIZE and
DT_LITERAL are read the current cell size can be updated and then the
celllist can be read.

In any of these solutions I think we still need to track the current
cell size in the data struct.

I don't see any problem with adding LABEL markers to cell lists of
size smaller than 32-bits.

Thoughts?

Thanks,
    Anton

>>> It has the advantage of limiting the number of reserved words created.
>>>  It could also be:
>>>
>>> property = /type/ uint8 <0xab 0xcd>;
>>
>> Not so fond of this one.  The "uint8" would have to be some new
>> lexical type - "identifed" probably, which we'd then have to look up.
>>
>>> Which would allow us to define new types for cell lists without adding
>>> new syntax.  I'm not sure if this is too useful though because the
>>> only types that I can think of that are not summed up by their size
>>> are things like float and double...
>>
>> Yeah, it's my feeling that the < > symtax should remain for integer
>> arrays.  If we need something for other types in future, we should
>> define new, distinct syntax for that when the time comes.
>>
>>> But it does have a nice look to
>>> it in my opinion.  And I would assume that if the /type/ <typename>
>>> was left off it would default to a uint32_t cell list.  So the
>>> additional verbosity of having to indicate it's a different type and
>>> what the type is will only be needed in a few instances.
>>
>> Yes.
>>
>> --
>> David Gibson                    | I'll have my music baroque, and my code
>> david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
>>                                | _way_ _around_!
>> http://www.ozlabs.org/~dgibson
>>
>

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2011-09-12 17:53 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-07 23:15 [PATCH v2 0/3] Support character literals Anton Staaf
     [not found] ` <1315437340-1661-1-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
2011-09-07 23:15   ` [PATCH v2 1/3] dtc: Refactor character literal parsing code Anton Staaf
     [not found]     ` <1315437340-1661-2-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
2011-09-08  3:22       ` David Gibson
     [not found]         ` <20110908032250.GM30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-08 16:06           ` Anton Staaf
2011-09-07 23:15   ` [PATCH v2 2/3] dtc: Support character literals in cell lists Anton Staaf
     [not found]     ` <1315437340-1661-3-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
2011-09-08  3:47       ` David Gibson
     [not found]         ` <20110908034742.GN30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-08 21:20           ` Anton Staaf
     [not found]             ` <CAF6FioU7ur-bRpvscFV6XG7vWpu-DLezdeyPGAwz-J+moPXyXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-09-09  0:52               ` David Gibson
2011-09-07 23:15   ` [PATCH v2 3/3] dtc: Support character literals in bytestrings Anton Staaf
     [not found]     ` <1315437340-1661-4-git-send-email-robotboy-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org>
2011-09-08  3:51       ` David Gibson
     [not found]         ` <20110908035149.GO30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-08  7:07           ` Grant Likely
     [not found]             ` <20110908070716.GC15955-e0URQFbLeQY2iJbIjFUEsiwD8/FfD2ys@public.gmane.org>
2011-09-08 13:01               ` David Gibson
     [not found]                 ` <20110908130109.GW30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-09  6:30                   ` Anton Staaf
     [not found]                     ` <CAF6FioWOuK3szFb8FD+_fde0HpZ0TJJXx51ZL=BAcZFSojRGCQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-09-09  7:25                       ` David Gibson
     [not found]                         ` <20110909072532.GC9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-09 18:37                           ` Anton Staaf
     [not found]                             ` <CAF6FioVxYZW7grfwE1Q_FP5T95Omh9sqzAVyzsZr8EbfM6WV_Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-09-12  0:48                               ` David Gibson
     [not found]                                 ` <20110912004807.GH9025-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-12  5:09                                   ` Anton Staaf
     [not found]                                     ` <CAF6FioW9iY1JAEq5_vn0ZbHC81LBF7xN6JNzBVTbs+k19p2pRg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-09-12 17:53                                       ` Anton Staaf
2011-09-09 14:22                   ` Jon Loeliger
2011-09-08 16:02               ` Anton Staaf
2011-09-08  3:18   ` [PATCH v2 0/3] Support character literals David Gibson
     [not found]     ` <20110908031842.GL30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-08  5:03       ` David Gibson
     [not found]         ` <20110908050354.GP30278-787xzQ0H9iQXU02nzanrWNbf9cGiqdzd@public.gmane.org>
2011-09-08 15:51           ` Anton Staaf
     [not found]             ` <CAF6FioVSMD2rT89tBeZEirNWj3DKO2=TXy9cE+z_FvYSn3M_dA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-09-09  0:54               ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.