All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/5] Improve output type formatting
@ 2021-07-27 18:30 Rob Herring
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

This series improves maintaining type information in the output of dtc
from sources without any type annotations such as dtb format. As there's
already a bunch of type information in the checks, we simply need to
have the checks add markers.

This is needed in part to be able to run DT schema validation on dtb
files. I also plan to use the schema files to provide type information
for all the properties not covered by the dtc checks. Why not do this
for all the properties? It's possible, but it wouldn't be possible with
just pure schema. The phandle+args patterns with variable cells would
need to recreate the same parsing code.

Rob

Rob Herring (5):
  Move marker functions to dtc.h
  Add has_type_markers() helper
  checks: Add markers on known properties
  dtc: Drop dts source restriction for yaml output
  treesource: Maintain phandle label/path on output

 checks.c                        | 87 +++++++++++++++++++++++++++++++--
 dtc.c                           |  2 -
 dtc.h                           | 28 ++++++++++-
 tests/type-preservation.dt.yaml |  3 ++
 tests/type-preservation.dts     |  3 ++
 treesource.c                    | 48 ++++++++----------
 6 files changed, 135 insertions(+), 36 deletions(-)

-- 
2.27.0


^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH v3 1/5] Move marker functions to dtc.h
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2021-07-27 18:30   ` Rob Herring
       [not found]     ` <20210727183023.3212077-2-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2021-07-27 18:30   ` [PATCH v3 2/5] Add has_type_markers() helper Rob Herring
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

In preparation to share the marker related functions, let's move them all
out of treeresource.c into dtc.h. Rework the next_type_marker()
implementation to use for_each_marker() instead of open coding it.

Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
v3:
 - New patch
---
 dtc.h        | 23 ++++++++++++++++++++++-
 treesource.c | 23 +----------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/dtc.h b/dtc.h
index cf2c6ac6e81e..0a1f54991026 100644
--- a/dtc.h
+++ b/dtc.h
@@ -116,6 +116,12 @@ enum markertype {
 	TYPE_UINT64,
 	TYPE_STRING,
 };
+
+static inline bool is_type_marker(enum markertype type)
+{
+	return type >= TYPE_UINT8;
+}
+
 extern const char *markername(enum markertype markertype);
 
 struct  marker {
@@ -140,7 +146,22 @@ struct data {
 	for_each_marker(m) \
 		if ((m)->type == (t))
 
-size_t type_marker_length(struct marker *m);
+static inline struct marker *next_type_marker(struct marker *m)
+{
+	for_each_marker(m)
+		if (is_type_marker(m->type))
+			break;
+	return m;
+}
+
+static inline size_t type_marker_length(struct marker *m)
+{
+	struct marker *next = next_type_marker(m->next);
+
+	if (next)
+		return next->offset - m->offset;
+	return 0;
+}
 
 void data_free(struct data d);
 
diff --git a/treesource.c b/treesource.c
index 061ba8c9c5e8..db2ff69f5ccb 100644
--- a/treesource.c
+++ b/treesource.c
@@ -124,27 +124,6 @@ static void write_propval_int(FILE *f, const char *p, size_t len, size_t width)
 	}
 }
 
-static bool has_data_type_information(struct marker *m)
-{
-	return m->type >= TYPE_UINT8;
-}
-
-static struct marker *next_type_marker(struct marker *m)
-{
-	while (m && !has_data_type_information(m))
-		m = m->next;
-	return m;
-}
-
-size_t type_marker_length(struct marker *m)
-{
-	struct marker *next = next_type_marker(m->next);
-
-	if (next)
-		return next->offset - m->offset;
-	return 0;
-}
-
 static const char *delim_start[] = {
 	[TYPE_UINT8] = "[",
 	[TYPE_UINT16] = "/bits/ 16 <",
@@ -230,7 +209,7 @@ static void write_propval(FILE *f, struct property *prop)
 		size_t data_len = type_marker_length(m) ? : len - m->offset;
 		const char *p = &prop->val.val[m->offset];
 
-		if (has_data_type_information(m)) {
+		if (is_type_marker(m->type)) {
 			emit_type = m->type;
 			fprintf(f, " %s", delim_start[emit_type]);
 		} else if (m->type == LABEL)
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 2/5] Add has_type_markers() helper
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2021-07-27 18:30   ` [PATCH v3 1/5] Move marker functions to dtc.h Rob Herring
@ 2021-07-27 18:30   ` Rob Herring
  2021-07-27 18:30   ` [PATCH v3 3/5] checks: Add markers on known properties Rob Herring
                     ` (2 subsequent siblings)
  4 siblings, 0 replies; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

Add a marker helper function, has_type_markers(), to test if there are
already type markers present.

Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
v3:
 - New patch
---
 dtc.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dtc.h b/dtc.h
index 0a1f54991026..5608fdf4fe28 100644
--- a/dtc.h
+++ b/dtc.h
@@ -154,6 +154,11 @@ static inline struct marker *next_type_marker(struct marker *m)
 	return m;
 }
 
+static inline bool has_type_markers(struct marker *m)
+{
+	return next_type_marker(m) != NULL;
+}
+
 static inline size_t type_marker_length(struct marker *m)
 {
 	struct marker *next = next_type_marker(m->next);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 3/5] checks: Add markers on known properties
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2021-07-27 18:30   ` [PATCH v3 1/5] Move marker functions to dtc.h Rob Herring
  2021-07-27 18:30   ` [PATCH v3 2/5] Add has_type_markers() helper Rob Herring
@ 2021-07-27 18:30   ` Rob Herring
       [not found]     ` <20210727183023.3212077-4-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2021-07-27 18:30   ` [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output Rob Herring
  2021-07-27 18:30   ` [PATCH v3 5/5] treesource: Maintain phandle label/path on output Rob Herring
  4 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

For properties we already have checks for, we know the type and how to
parse them. Use this to add type and phandle markers so we have them when
the source did not (e.g. dtb format).

Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
v3:
 - Rework marker_add() and callers into 3 functions. marker_add() just
   adds a marker. marker_add_type_annotations() adds type annotations
   at each entry offset. marker_add_phandle_annotation() adds a phandle
   and type annotation at a phandle offset.
 - Only add type info when no type markers are present at all.
 - Keep phandle ref check on phandle+args properties.
v2:
 - Set marker.ref on phandle markers
 - Avoid adding markers if there's any conflicting type markers.
---
 checks.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 82 insertions(+), 5 deletions(-)

diff --git a/checks.c b/checks.c
index e2690e90f90c..b7ac1732b0b8 100644
--- a/checks.c
+++ b/checks.c
@@ -58,6 +58,56 @@ struct check {
 #define CHECK(nm_, fn_, d_, ...) \
 	CHECK_ENTRY(nm_, fn_, d_, false, false, __VA_ARGS__)
 
+static struct marker *marker_add(struct marker **list, enum markertype type,
+				 unsigned int offset, char * ref)
+{
+	struct marker *m = *list;
+
+	m = xmalloc(sizeof(*m));
+	m->type = type;
+	m->offset = offset;
+	m->next = NULL;
+	m->ref = ref;
+
+	/* Find the insertion point, markers are in order by offset */
+	while (*list && ((*list)->offset < m->offset))
+		list = &((*list)->next);
+
+	if (*list) {
+		m->next = (*list)->next;
+		(*list)->next = m;
+	} else
+		*list = m;
+
+	return m;
+}
+
+static void marker_add_type_annotations(struct property *prop,
+					enum markertype type,
+					unsigned int entrylen)
+{
+	unsigned int offset;
+
+	assert(is_type_marker(type));
+
+	if (has_type_markers(prop->val.markers))
+		return;
+
+	for (offset = 0; offset < prop->val.len; offset += entrylen)
+		marker_add(&prop->val.markers, type, offset, NULL);
+}
+
+static void marker_add_phandle_annotation(struct property *prop,
+					  unsigned int cell,
+					  char *ref)
+{
+	if (!cell && has_type_markers(prop->val.markers))
+		return;
+
+	marker_add(&prop->val.markers, TYPE_UINT32, cell * sizeof(cell_t), NULL);
+	marker_add(&prop->val.markers, REF_PHANDLE, cell * sizeof(cell_t), ref);
+}
+
 static inline void  PRINTF(5, 6) check_msg(struct check *c, struct dt_info *dti,
 					   struct node *node,
 					   struct property *prop,
@@ -260,8 +310,12 @@ static void check_is_cell(struct check *c, struct dt_info *dti,
 	if (!prop)
 		return; /* Not present, assumed ok */
 
-	if (prop->val.len != sizeof(cell_t))
+	if (prop->val.len != sizeof(cell_t)) {
 		FAIL_PROP(c, dti, node, prop, "property is not a single cell");
+		return;
+	}
+
+	marker_add_type_annotations(prop, TYPE_UINT32, prop->val.len);
 }
 #define WARNING_IF_NOT_CELL(nm, propname) \
 	WARNING(nm, check_is_cell, (propname))
@@ -526,6 +580,8 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti,
 		return 0;
 	}
 
+	marker_add_type_annotations(prop, TYPE_UINT32, prop->val.len);
+
 	return phandle;
 }
 
@@ -774,10 +830,14 @@ static void check_reg_format(struct check *c, struct dt_info *dti,
 	size_cells = node_size_cells(node->parent);
 	entrylen = (addr_cells + size_cells) * sizeof(cell_t);
 
-	if (!is_multiple_of(prop->val.len, entrylen))
+	if (!is_multiple_of(prop->val.len, entrylen)) {
 		FAIL_PROP(c, dti, node, prop, "property has invalid length (%d bytes) "
 			  "(#address-cells == %d, #size-cells == %d)",
 			  prop->val.len, addr_cells, size_cells);
+		return;
+	}
+
+	marker_add_type_annotations(prop, TYPE_UINT32, entrylen);
 }
 WARNING(reg_format, check_reg_format, NULL, &addr_size_cells);
 
@@ -821,6 +881,8 @@ static void check_ranges_format(struct check *c, struct dt_info *dti,
 			  "#size-cells == %d)", ranges, prop->val.len,
 			  p_addr_cells, c_addr_cells, c_size_cells);
 	}
+
+	marker_add_type_annotations(prop, TYPE_UINT32, entrylen);
 }
 WARNING(ranges_format, check_ranges_format, "ranges", &addr_size_cells);
 WARNING(dma_ranges_format, check_ranges_format, "dma-ranges", &addr_size_cells);
@@ -1389,6 +1451,7 @@ static void check_property_phandle_args(struct check *c,
 {
 	struct node *root = dti->dt;
 	unsigned int cell, cellsize = 0;
+	bool has_type = has_type_markers(prop->val.markers);
 
 	if (!is_multiple_of(prop->val.len, sizeof(cell_t))) {
 		FAIL_PROP(c, dti, node, prop,
@@ -1417,7 +1480,7 @@ static void check_property_phandle_args(struct check *c,
 		}
 
 		/* If we have markers, verify the current cell is a phandle */
-		if (prop->val.markers) {
+		if (has_type) {
 			struct marker *m = prop->val.markers;
 			for_each_marker_of_type(m, REF_PHANDLE) {
 				if (m->offset == (cell * sizeof(cell_t)))
@@ -1454,7 +1517,11 @@ static void check_property_phandle_args(struct check *c,
 			FAIL_PROP(c, dti, node, prop,
 				  "property size (%d) too small for cell size %d",
 				  prop->val.len, cellsize);
+			break;
 		}
+
+		if (!has_type)
+			marker_add_phandle_annotation(prop, cell, provider_node->fullpath);
 	}
 }
 
@@ -1629,10 +1696,13 @@ static void check_interrupts_property(struct check *c,
 				FAIL_PROP(c, dti, parent, prop, "Bad phandle");
 				return;
 			}
-			if (!node_is_interrupt_provider(irq_node))
+			if (!node_is_interrupt_provider(irq_node)) {
 				FAIL(c, dti, irq_node,
 				     "Missing interrupt-controller or interrupt-map property");
+				return;
+			}
 
+			marker_add_phandle_annotation(prop, 0, irq_node->fullpath);
 			break;
 		}
 
@@ -1655,7 +1725,10 @@ static void check_interrupts_property(struct check *c,
 		FAIL_PROP(c, dti, node, prop,
 			  "size is (%d), expected multiple of %d",
 			  irq_prop->val.len, (int)(irq_cells * sizeof(cell_t)));
+		return;
 	}
+
+	marker_add_type_annotations(irq_prop, TYPE_UINT32, irq_cells * sizeof(cell_t));
 }
 WARNING(interrupts_property, check_interrupts_property, &phandle_references);
 
@@ -1776,8 +1849,12 @@ static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti,
 		return NULL;
 
 	node = get_node_by_phandle(dti->dt, phandle);
-	if (!node)
+	if (!node) {
 		FAIL_PROP(c, dti, endpoint, prop, "graph phandle is not valid");
+		return NULL;
+	}
+
+	marker_add_phandle_annotation(prop, 0, node->fullpath);
 
 	return node;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
                     ` (2 preceding siblings ...)
  2021-07-27 18:30   ` [PATCH v3 3/5] checks: Add markers on known properties Rob Herring
@ 2021-07-27 18:30   ` Rob Herring
       [not found]     ` <20210727183023.3212077-5-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  2021-07-27 18:30   ` [PATCH v3 5/5] treesource: Maintain phandle label/path on output Rob Herring
  4 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

YAML output was restricted to dts input as there are some dependencies
on source annotations which get lost with other input formats. With the
addition of markers by the checks, YAML output from dtb format becomes
more useful.

Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
v3:
 - no change
---
 dtc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dtc.c b/dtc.c
index bc786c543b7e..acfeac1da234 100644
--- a/dtc.c
+++ b/dtc.c
@@ -353,8 +353,6 @@ int main(int argc, char *argv[])
 		dt_to_source(outf, dti);
 #ifndef NO_YAML
 	} else if (streq(outform, "yaml")) {
-		if (!streq(inform, "dts"))
-			die("YAML output format requires dts input format\n");
 		dt_to_yaml(outf, dti);
 #endif
 	} else if (streq(outform, "dtb")) {
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* [PATCH v3 5/5] treesource: Maintain phandle label/path on output
       [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
                     ` (3 preceding siblings ...)
  2021-07-27 18:30   ` [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output Rob Herring
@ 2021-07-27 18:30   ` Rob Herring
       [not found]     ` <20210727183023.3212077-6-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
  4 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-07-27 18:30 UTC (permalink / raw)
  To: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

The dts output will just output phandle integer values, but often the
necessary markers are present with path or label references. Improve the
output and maintain phandle label or path references when present in dts
output.

Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
v3:
 - Add phandle properties to type-preservation test
v2:
 - New patch
---
 tests/type-preservation.dt.yaml |  3 +++
 tests/type-preservation.dts     |  3 +++
 treesource.c                    | 25 +++++++++++++++++++------
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/tests/type-preservation.dt.yaml b/tests/type-preservation.dt.yaml
index ee8cfdebe9be..a0cc64cc4b69 100644
--- a/tests/type-preservation.dt.yaml
+++ b/tests/type-preservation.dt.yaml
@@ -13,8 +13,11 @@
     int64: [!u64 [0x200000000]]
     int64-array: [!u64 [0x100000000, 0x0]]
     a-string-with-nulls: ["foo\0bar", "baz"]
+    a-phandle: [[!phandle 0x1]]
+    a-phandle-with-args: [[!phandle 0x1, 0x0, 0x1], [!phandle 0x1, 0x2, 0x3]]
     subsubnode:
       compatible: ["subsubnode1", "subsubnode"]
+      phandle: [[0x1]]
       subsubsubnode:
         compatible: ["subsubsubnode1", [0x1234], "subsubsubnode"]
 ...
diff --git a/tests/type-preservation.dts b/tests/type-preservation.dts
index 3e380ba6c8a5..921ea21172d1 100644
--- a/tests/type-preservation.dts
+++ b/tests/type-preservation.dts
@@ -16,9 +16,12 @@
 		int64 = /bits/ 64 <0x200000000>;
 		int64-array = /bits/ 64 <0x100000000 0x00> int64_array_label_end:;
 		a-string-with-nulls = "foo\0bar", "baz";
+		a-phandle = <&subsub1>;
+		a-phandle-with-args = <&subsub1 0x00 0x01>, <&subsub1 0x02 0x03>;
 
 		subsub1: subsubnode {
 			compatible = "subsubnode1", "subsubnode";
+			phandle = <0x01>;
 
 			subsubsub1: subsubsubnode {
 				compatible = "subsubsubnode1", <0x1234>, valuea: valueb: "subsubsubnode";
diff --git a/treesource.c b/treesource.c
index db2ff69f5ccb..33fedee82d58 100644
--- a/treesource.c
+++ b/treesource.c
@@ -208,26 +208,39 @@ static void write_propval(FILE *f, struct property *prop)
 		size_t chunk_len = (m->next ? m->next->offset : len) - m->offset;
 		size_t data_len = type_marker_length(m) ? : len - m->offset;
 		const char *p = &prop->val.val[m->offset];
+		struct marker *m_phandle;
 
 		if (is_type_marker(m->type)) {
 			emit_type = m->type;
 			fprintf(f, " %s", delim_start[emit_type]);
 		} else if (m->type == LABEL)
 			fprintf(f, " %s:", m->ref);
-		else if (m->offset)
-			fputc(' ', f);
 
-		if (emit_type == TYPE_NONE) {
-			assert(chunk_len == 0);
+		if (emit_type == TYPE_NONE || chunk_len == 0)
 			continue;
-		}
 
 		switch(emit_type) {
 		case TYPE_UINT16:
 			write_propval_int(f, p, chunk_len, 2);
 			break;
 		case TYPE_UINT32:
-			write_propval_int(f, p, chunk_len, 4);
+			m_phandle = prop->val.markers;
+			for_each_marker_of_type(m_phandle, REF_PHANDLE)
+				if (m->offset == m_phandle->offset)
+					break;
+
+			if (m_phandle) {
+				if (m_phandle->ref[0] == '/')
+					fprintf(f, "&{%s}", m_phandle->ref);
+				else
+					fprintf(f, "&%s", m_phandle->ref);
+				if (chunk_len > 4) {
+					fputc(' ', f);
+					write_propval_int(f, p + 4, chunk_len - 4, 4);
+				}
+			} else {
+				write_propval_int(f, p, chunk_len, 4);
+			}
 			break;
 		case TYPE_UINT64:
 			write_propval_int(f, p, chunk_len, 8);
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 1/5] Move marker functions to dtc.h
       [not found]     ` <20210727183023.3212077-2-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2021-08-26  4:00       ` David Gibson
  2021-09-27 20:56         ` Rob Herring
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2021-08-26  4:00 UTC (permalink / raw)
  To: Rob Herring; +Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 2934 bytes --]

On Tue, Jul 27, 2021 at 12:30:19PM -0600, Rob Herring wrote:
> In preparation to share the marker related functions, let's move them all
> out of treeresource.c into dtc.h. Rework the next_type_marker()
> implementation to use for_each_marker() instead of open coding it.
> 
> Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Applied, thanks.

> ---
> v3:
>  - New patch
> ---
>  dtc.h        | 23 ++++++++++++++++++++++-
>  treesource.c | 23 +----------------------
>  2 files changed, 23 insertions(+), 23 deletions(-)
> 
> diff --git a/dtc.h b/dtc.h
> index cf2c6ac6e81e..0a1f54991026 100644
> --- a/dtc.h
> +++ b/dtc.h
> @@ -116,6 +116,12 @@ enum markertype {
>  	TYPE_UINT64,
>  	TYPE_STRING,
>  };
> +
> +static inline bool is_type_marker(enum markertype type)
> +{
> +	return type >= TYPE_UINT8;
> +}
> +
>  extern const char *markername(enum markertype markertype);
>  
>  struct  marker {
> @@ -140,7 +146,22 @@ struct data {
>  	for_each_marker(m) \
>  		if ((m)->type == (t))
>  
> -size_t type_marker_length(struct marker *m);
> +static inline struct marker *next_type_marker(struct marker *m)
> +{
> +	for_each_marker(m)
> +		if (is_type_marker(m->type))
> +			break;
> +	return m;
> +}
> +
> +static inline size_t type_marker_length(struct marker *m)
> +{
> +	struct marker *next = next_type_marker(m->next);
> +
> +	if (next)
> +		return next->offset - m->offset;
> +	return 0;
> +}
>  
>  void data_free(struct data d);
>  
> diff --git a/treesource.c b/treesource.c
> index 061ba8c9c5e8..db2ff69f5ccb 100644
> --- a/treesource.c
> +++ b/treesource.c
> @@ -124,27 +124,6 @@ static void write_propval_int(FILE *f, const char *p, size_t len, size_t width)
>  	}
>  }
>  
> -static bool has_data_type_information(struct marker *m)
> -{
> -	return m->type >= TYPE_UINT8;
> -}
> -
> -static struct marker *next_type_marker(struct marker *m)
> -{
> -	while (m && !has_data_type_information(m))
> -		m = m->next;
> -	return m;
> -}
> -
> -size_t type_marker_length(struct marker *m)
> -{
> -	struct marker *next = next_type_marker(m->next);
> -
> -	if (next)
> -		return next->offset - m->offset;
> -	return 0;
> -}
> -
>  static const char *delim_start[] = {
>  	[TYPE_UINT8] = "[",
>  	[TYPE_UINT16] = "/bits/ 16 <",
> @@ -230,7 +209,7 @@ static void write_propval(FILE *f, struct property *prop)
>  		size_t data_len = type_marker_length(m) ? : len - m->offset;
>  		const char *p = &prop->val.val[m->offset];
>  
> -		if (has_data_type_information(m)) {
> +		if (is_type_marker(m->type)) {
>  			emit_type = m->type;
>  			fprintf(f, " %s", delim_start[emit_type]);
>  		} else if (m->type == LABEL)

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 1/5] Move marker functions to dtc.h
  2021-08-26  4:00       ` David Gibson
@ 2021-09-27 20:56         ` Rob Herring
       [not found]           ` <CAL_Jsq+QAnyu=Fj_RtRu-dS4Ta0bZJWFUABsK1uKbnAKpXi1mA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-09-27 20:56 UTC (permalink / raw)
  To: David Gibson; +Cc: Devicetree Compiler

On Wed, Aug 25, 2021 at 11:09 PM David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>
> On Tue, Jul 27, 2021 at 12:30:19PM -0600, Rob Herring wrote:
> > In preparation to share the marker related functions, let's move them all
> > out of treeresource.c into dtc.h. Rework the next_type_marker()
> > implementation to use for_each_marker() instead of open coding it.
> >
> > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>
> Applied, thanks.

I was about to say this never got pushed out. Then I looked at bit
harder and found this gem:

commit ff3a30c115ad7354689dc7858604356ecb7f9b1c
Author: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Date:   Tue Jul 27 12:30:19 2021 -0600

    asm: Use .asciz and .ascii instead of .string

    We use the .string pseudo-op both in some of our test assembly files
    and in our -Oasm output.  We expect this to emit a \0 terminated
    string into the .o file.  However for certain targets (e.g. HP
    PA-RISC) it doesn't include the \0.  Use .asciz instead, which
    explicitly does what we want.

    There's also one place we can use .ascii (which explicitly emits a
    string *without* \0 termination) instead of multiple .byte directives.

    Signed-off-by: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>

 dtc.h            | 23 ++++++++++++++++++++++-
 flattree.c       |  6 +++---
 tests/base01.asm | 38 +++++++++++++++++++-------------------
 tests/test01.asm | 52 ++++++++++++++++++++++++++--------------------------
 tests/trees.S    | 10 ++++------
 treesource.c     | 23 +----------------------
 6 files changed, 75 insertions(+), 77 deletions(-)


Perhaps your own commits should be sent to the list as well...

Are you going to apply or review the rest of my series that's been
sitting on the list for 2 months now?

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 3/5] checks: Add markers on known properties
       [not found]     ` <20210727183023.3212077-4-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2021-10-11  5:09       ` David Gibson
  2021-10-11 13:57         ` Rob Herring
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2021-10-11  5:09 UTC (permalink / raw)
  To: Rob Herring; +Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 7491 bytes --]

On Tue, Jul 27, 2021 at 12:30:21PM -0600, Rob Herring wrote:
> For properties we already have checks for, we know the type and how to
> parse them. Use this to add type and phandle markers so we have them when
> the source did not (e.g. dtb format).
> 
> Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> ---
> v3:
>  - Rework marker_add() and callers into 3 functions. marker_add() just
>    adds a marker. marker_add_type_annotations() adds type annotations
>    at each entry offset. marker_add_phandle_annotation() adds a phandle
>    and type annotation at a phandle offset.
>  - Only add type info when no type markers are present at all.
>  - Keep phandle ref check on phandle+args properties.
> v2:
>  - Set marker.ref on phandle markers
>  - Avoid adding markers if there's any conflicting type markers.
> ---
>  checks.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 82 insertions(+), 5 deletions(-)
> 
> diff --git a/checks.c b/checks.c
> index e2690e90f90c..b7ac1732b0b8 100644
> --- a/checks.c
> +++ b/checks.c
> @@ -58,6 +58,56 @@ struct check {
>  #define CHECK(nm_, fn_, d_, ...) \
>  	CHECK_ENTRY(nm_, fn_, d_, false, false, __VA_ARGS__)
>  
> +static struct marker *marker_add(struct marker **list, enum markertype type,
> +				 unsigned int offset, char * ref)
> +{
> +	struct marker *m = *list;
> +
> +	m = xmalloc(sizeof(*m));
> +	m->type = type;
> +	m->offset = offset;
> +	m->next = NULL;
> +	m->ref = ref;
> +
> +	/* Find the insertion point, markers are in order by offset */
> +	while (*list && ((*list)->offset < m->offset))
> +		list = &((*list)->next);
> +
> +	if (*list) {
> +		m->next = (*list)->next;
> +		(*list)->next = m;
> +	} else
> +		*list = m;
> +
> +	return m;
> +}
> +
> +static void marker_add_type_annotations(struct property *prop,
> +					enum markertype type,
> +					unsigned int entrylen)
> +{
> +	unsigned int offset;
> +
> +	assert(is_type_marker(type));
> +
> +	if (has_type_markers(prop->val.markers))
> +		return;
> +
> +	for (offset = 0; offset < prop->val.len; offset += entrylen)
> +		marker_add(&prop->val.markers, type, offset, NULL);
> +}
> +
> +static void marker_add_phandle_annotation(struct property *prop,
> +					  unsigned int cell,
> +					  char *ref)
> +{
> +	if (!cell && has_type_markers(prop->val.markers))
> +		return;

So, you allow adding a phandle annotation if the property has no
existing type markers *or* you're adding it after position 0.  That
seems a bit arbitrary and fragile.

> +
> +	marker_add(&prop->val.markers, TYPE_UINT32, cell * sizeof(cell_t), NULL);
> +	marker_add(&prop->val.markers, REF_PHANDLE, cell * sizeof(cell_t), ref);
> +}
> +
>  static inline void  PRINTF(5, 6) check_msg(struct check *c, struct dt_info *dti,
>  					   struct node *node,
>  					   struct property *prop,
> @@ -260,8 +310,12 @@ static void check_is_cell(struct check *c, struct dt_info *dti,
>  	if (!prop)
>  		return; /* Not present, assumed ok */
>  
> -	if (prop->val.len != sizeof(cell_t))
> +	if (prop->val.len != sizeof(cell_t)) {
>  		FAIL_PROP(c, dti, node, prop, "property is not a single cell");
> +		return;
> +	}
> +
> +	marker_add_type_annotations(prop, TYPE_UINT32, prop->val.len);
>  }
>  #define WARNING_IF_NOT_CELL(nm, propname) \
>  	WARNING(nm, check_is_cell, (propname))
> @@ -526,6 +580,8 @@ static cell_t check_phandle_prop(struct check *c, struct dt_info *dti,
>  		return 0;
>  	}
>  
> +	marker_add_type_annotations(prop, TYPE_UINT32, prop->val.len);
> +
>  	return phandle;
>  }
>  
> @@ -774,10 +830,14 @@ static void check_reg_format(struct check *c, struct dt_info *dti,
>  	size_cells = node_size_cells(node->parent);
>  	entrylen = (addr_cells + size_cells) * sizeof(cell_t);
>  
> -	if (!is_multiple_of(prop->val.len, entrylen))
> +	if (!is_multiple_of(prop->val.len, entrylen)) {
>  		FAIL_PROP(c, dti, node, prop, "property has invalid length (%d bytes) "
>  			  "(#address-cells == %d, #size-cells == %d)",
>  			  prop->val.len, addr_cells, size_cells);
> +		return;
> +	}
> +
> +	marker_add_type_annotations(prop, TYPE_UINT32, entrylen);
>  }
>  WARNING(reg_format, check_reg_format, NULL, &addr_size_cells);
>  
> @@ -821,6 +881,8 @@ static void check_ranges_format(struct check *c, struct dt_info *dti,
>  			  "#size-cells == %d)", ranges, prop->val.len,
>  			  p_addr_cells, c_addr_cells, c_size_cells);
>  	}
> +
> +	marker_add_type_annotations(prop, TYPE_UINT32, entrylen);
>  }
>  WARNING(ranges_format, check_ranges_format, "ranges", &addr_size_cells);
>  WARNING(dma_ranges_format, check_ranges_format, "dma-ranges", &addr_size_cells);
> @@ -1389,6 +1451,7 @@ static void check_property_phandle_args(struct check *c,
>  {
>  	struct node *root = dti->dt;
>  	unsigned int cell, cellsize = 0;
> +	bool has_type = has_type_markers(prop->val.markers);
>  
>  	if (!is_multiple_of(prop->val.len, sizeof(cell_t))) {
>  		FAIL_PROP(c, dti, node, prop,
> @@ -1417,7 +1480,7 @@ static void check_property_phandle_args(struct check *c,
>  		}
>  
>  		/* If we have markers, verify the current cell is a phandle */
> -		if (prop->val.markers) {
> +		if (has_type) {
>  			struct marker *m = prop->val.markers;
>  			for_each_marker_of_type(m, REF_PHANDLE) {
>  				if (m->offset == (cell * sizeof(cell_t)))
> @@ -1454,7 +1517,11 @@ static void check_property_phandle_args(struct check *c,
>  			FAIL_PROP(c, dti, node, prop,
>  				  "property size (%d) too small for cell size %d",
>  				  prop->val.len, cellsize);
> +			break;
>  		}
> +
> +		if (!has_type)
> +			marker_add_phandle_annotation(prop, cell, provider_node->fullpath);
>  	}
>  }
>  
> @@ -1629,10 +1696,13 @@ static void check_interrupts_property(struct check *c,
>  				FAIL_PROP(c, dti, parent, prop, "Bad phandle");
>  				return;
>  			}
> -			if (!node_is_interrupt_provider(irq_node))
> +			if (!node_is_interrupt_provider(irq_node)) {
>  				FAIL(c, dti, irq_node,
>  				     "Missing interrupt-controller or interrupt-map property");
> +				return;
> +			}
>  
> +			marker_add_phandle_annotation(prop, 0, irq_node->fullpath);
>  			break;
>  		}
>  
> @@ -1655,7 +1725,10 @@ static void check_interrupts_property(struct check *c,
>  		FAIL_PROP(c, dti, node, prop,
>  			  "size is (%d), expected multiple of %d",
>  			  irq_prop->val.len, (int)(irq_cells * sizeof(cell_t)));
> +		return;
>  	}
> +
> +	marker_add_type_annotations(irq_prop, TYPE_UINT32, irq_cells * sizeof(cell_t));
>  }
>  WARNING(interrupts_property, check_interrupts_property, &phandle_references);
>  
> @@ -1776,8 +1849,12 @@ static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti,
>  		return NULL;
>  
>  	node = get_node_by_phandle(dti->dt, phandle);
> -	if (!node)
> +	if (!node) {
>  		FAIL_PROP(c, dti, endpoint, prop, "graph phandle is not valid");
> +		return NULL;
> +	}
> +
> +	marker_add_phandle_annotation(prop, 0, node->fullpath);

Shouldn't this be the same as check_property_phandle_args() and verify
the type annotation if there's one already, only adding it if there
are no pre-existing types.

>  
>  	return node;
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]     ` <20210727183023.3212077-5-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2021-10-11  5:15       ` David Gibson
  2021-10-11 13:22         ` Rob Herring
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2021-10-11  5:15 UTC (permalink / raw)
  To: Rob Herring; +Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 1641 bytes --]

On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> YAML output was restricted to dts input as there are some dependencies
> on source annotations which get lost with other input formats. With the
> addition of markers by the checks, YAML output from dtb format becomes
> more useful.
> 
> Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Urgh.  There's not really anything wrong with this patch of itself,
but it really underlines my feeling that the whole yaml output thing
is a bunch of hacks in pursuit of a bogus goal.

Yaml output wants to include information that simply isn't present in
the flattened tree format (without considering bindings), so it relies
on formatting conventions in the dts, hence this test in the first
place.  This alleges it removes a restriction, but it only works if a
bunch of extra heuristics are able to guess the types correctly.

> ---
> v3:
>  - no change
> ---
>  dtc.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/dtc.c b/dtc.c
> index bc786c543b7e..acfeac1da234 100644
> --- a/dtc.c
> +++ b/dtc.c
> @@ -353,8 +353,6 @@ int main(int argc, char *argv[])
>  		dt_to_source(outf, dti);
>  #ifndef NO_YAML
>  	} else if (streq(outform, "yaml")) {
> -		if (!streq(inform, "dts"))
> -			die("YAML output format requires dts input format\n");
>  		dt_to_yaml(outf, dti);
>  #endif
>  	} else if (streq(outform, "dtb")) {

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 5/5] treesource: Maintain phandle label/path on output
       [not found]     ` <20210727183023.3212077-6-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
@ 2021-10-11  5:21       ` David Gibson
  0 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2021-10-11  5:21 UTC (permalink / raw)
  To: Rob Herring; +Cc: devicetree-compiler-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: text/plain, Size: 3952 bytes --]

On Tue, Jul 27, 2021 at 12:30:23PM -0600, Rob Herring wrote:
> The dts output will just output phandle integer values, but often the
> necessary markers are present with path or label references. Improve the
> output and maintain phandle label or path references when present in dts
> output.
> 
> Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

I actually like this one, regardless of the rest, so I've applied it.

> ---
> v3:
>  - Add phandle properties to type-preservation test
> v2:
>  - New patch
> ---
>  tests/type-preservation.dt.yaml |  3 +++
>  tests/type-preservation.dts     |  3 +++
>  treesource.c                    | 25 +++++++++++++++++++------
>  3 files changed, 25 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/type-preservation.dt.yaml b/tests/type-preservation.dt.yaml
> index ee8cfdebe9be..a0cc64cc4b69 100644
> --- a/tests/type-preservation.dt.yaml
> +++ b/tests/type-preservation.dt.yaml
> @@ -13,8 +13,11 @@
>      int64: [!u64 [0x200000000]]
>      int64-array: [!u64 [0x100000000, 0x0]]
>      a-string-with-nulls: ["foo\0bar", "baz"]
> +    a-phandle: [[!phandle 0x1]]
> +    a-phandle-with-args: [[!phandle 0x1, 0x0, 0x1], [!phandle 0x1, 0x2, 0x3]]
>      subsubnode:
>        compatible: ["subsubnode1", "subsubnode"]
> +      phandle: [[0x1]]
>        subsubsubnode:
>          compatible: ["subsubsubnode1", [0x1234], "subsubsubnode"]
>  ...
> diff --git a/tests/type-preservation.dts b/tests/type-preservation.dts
> index 3e380ba6c8a5..921ea21172d1 100644
> --- a/tests/type-preservation.dts
> +++ b/tests/type-preservation.dts
> @@ -16,9 +16,12 @@
>  		int64 = /bits/ 64 <0x200000000>;
>  		int64-array = /bits/ 64 <0x100000000 0x00> int64_array_label_end:;
>  		a-string-with-nulls = "foo\0bar", "baz";
> +		a-phandle = <&subsub1>;
> +		a-phandle-with-args = <&subsub1 0x00 0x01>, <&subsub1 0x02 0x03>;
>  
>  		subsub1: subsubnode {
>  			compatible = "subsubnode1", "subsubnode";
> +			phandle = <0x01>;
>  
>  			subsubsub1: subsubsubnode {
>  				compatible = "subsubsubnode1", <0x1234>, valuea: valueb: "subsubsubnode";
> diff --git a/treesource.c b/treesource.c
> index db2ff69f5ccb..33fedee82d58 100644
> --- a/treesource.c
> +++ b/treesource.c
> @@ -208,26 +208,39 @@ static void write_propval(FILE *f, struct property *prop)
>  		size_t chunk_len = (m->next ? m->next->offset : len) - m->offset;
>  		size_t data_len = type_marker_length(m) ? : len - m->offset;
>  		const char *p = &prop->val.val[m->offset];
> +		struct marker *m_phandle;
>  
>  		if (is_type_marker(m->type)) {
>  			emit_type = m->type;
>  			fprintf(f, " %s", delim_start[emit_type]);
>  		} else if (m->type == LABEL)
>  			fprintf(f, " %s:", m->ref);
> -		else if (m->offset)
> -			fputc(' ', f);
>  
> -		if (emit_type == TYPE_NONE) {
> -			assert(chunk_len == 0);
> +		if (emit_type == TYPE_NONE || chunk_len == 0)
>  			continue;
> -		}
>  
>  		switch(emit_type) {
>  		case TYPE_UINT16:
>  			write_propval_int(f, p, chunk_len, 2);
>  			break;
>  		case TYPE_UINT32:
> -			write_propval_int(f, p, chunk_len, 4);
> +			m_phandle = prop->val.markers;
> +			for_each_marker_of_type(m_phandle, REF_PHANDLE)
> +				if (m->offset == m_phandle->offset)
> +					break;
> +
> +			if (m_phandle) {
> +				if (m_phandle->ref[0] == '/')
> +					fprintf(f, "&{%s}", m_phandle->ref);
> +				else
> +					fprintf(f, "&%s", m_phandle->ref);
> +				if (chunk_len > 4) {
> +					fputc(' ', f);
> +					write_propval_int(f, p + 4, chunk_len - 4, 4);
> +				}
> +			} else {
> +				write_propval_int(f, p, chunk_len, 4);
> +			}
>  			break;
>  		case TYPE_UINT64:
>  			write_propval_int(f, p, chunk_len, 8);

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 1/5] Move marker functions to dtc.h
       [not found]           ` <CAL_Jsq+QAnyu=Fj_RtRu-dS4Ta0bZJWFUABsK1uKbnAKpXi1mA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-10-11  5:29             ` David Gibson
  0 siblings, 0 replies; 23+ messages in thread
From: David Gibson @ 2021-10-11  5:29 UTC (permalink / raw)
  To: Rob Herring; +Cc: Devicetree Compiler

[-- Attachment #1: Type: text/plain, Size: 3185 bytes --]

On Mon, Sep 27, 2021 at 03:56:43PM -0500, Rob Herring wrote:
> On Wed, Aug 25, 2021 at 11:09 PM David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> >
> > On Tue, Jul 27, 2021 at 12:30:19PM -0600, Rob Herring wrote:
> > > In preparation to share the marker related functions, let's move them all
> > > out of treeresource.c into dtc.h. Rework the next_type_marker()
> > > implementation to use for_each_marker() instead of open coding it.
> > >
> > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> >
> > Applied, thanks.
> 
> I was about to say this never got pushed out. Then I looked at bit
> harder and found this gem:
> 
> commit ff3a30c115ad7354689dc7858604356ecb7f9b1c
> Author: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

Yikes.  I'm not sure how that happened.  I must have screwed up some
git command during a re-org / rebase.  Good news is it postdates the
latest tagged version.  Bad news is it's committed to a public branch
that's not supposed to rebase.

The question is, is it worth forcing anyone who pulled HEAD in the
interim to do a rebase/repull in order to fix it.

> Date:   Tue Jul 27 12:30:19 2021 -0600
> 
>     asm: Use .asciz and .ascii instead of .string
> 
>     We use the .string pseudo-op both in some of our test assembly files
>     and in our -Oasm output.  We expect this to emit a \0 terminated
>     string into the .o file.  However for certain targets (e.g. HP
>     PA-RISC) it doesn't include the \0.  Use .asciz instead, which
>     explicitly does what we want.
> 
>     There's also one place we can use .ascii (which explicitly emits a
>     string *without* \0 termination) instead of multiple .byte directives.
> 
>     Signed-off-by: David Gibson <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org>
> 
>  dtc.h            | 23 ++++++++++++++++++++++-
>  flattree.c       |  6 +++---
>  tests/base01.asm | 38 +++++++++++++++++++-------------------
>  tests/test01.asm | 52 ++++++++++++++++++++++++++--------------------------
>  tests/trees.S    | 10 ++++------
>  treesource.c     | 23 +----------------------
>  6 files changed, 75 insertions(+), 77 deletions(-)
> 
> 
> Perhaps your own commits should be sent to the list as well...

Alas, I don't think that would help here.  If there were a problem
with the actual patch content you'd have a point.  However, that error
would have been introduced during commit / merge / git shenannigans,
rather than while writing the patch, so list review wouldn't help.

> Are you going to apply or review the rest of my series that's been
> sitting on the list for 2 months now?

I'm sorry.  I'm moving onto new projects and it's hard to find time to
keep on top of this.  Plus, the fact that I still consider the whole
yaml based schema stuff a fundamentally misguided design doesn't help
with my enthusiasm.  I've merged one and reviewed the rest now.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
  2021-10-11  5:15       ` David Gibson
@ 2021-10-11 13:22         ` Rob Herring
       [not found]           ` <CAL_Jsq+sMrRWqqPgcvoaiY0rLSp_s+gXOqJ9OuFLQ-3piUHSVA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-10-11 13:22 UTC (permalink / raw)
  To: David Gibson; +Cc: Devicetree Compiler

On Mon, Oct 11, 2021 at 2:19 AM David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>
> On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > YAML output was restricted to dts input as there are some dependencies
> > on source annotations which get lost with other input formats. With the
> > addition of markers by the checks, YAML output from dtb format becomes
> > more useful.
> >
> > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
>
> Urgh.  There's not really anything wrong with this patch of itself,
> but it really underlines my feeling that the whole yaml output thing
> is a bunch of hacks in pursuit of a bogus goal.

Validating DTs is a bogus goal?

> Yaml output wants to include information that simply isn't present in
> the flattened tree format (without considering bindings), so it relies
> on formatting conventions in the dts, hence this test in the first
> place.  This alleges it removes a restriction, but it only works if a
> bunch of extra heuristics are able to guess the types correctly.

The goal here is to validate dtb files which I'd think you'd be in
favor of given your above opinions. For that to work, we have to
transform the data into the right types somewhere. We don't need any
heuristics for that. For the most part, it is done using the
definitive type information from the schemas themselves to format the
data. The exception is #*-cells patterns which need to parse the tree
to construct the type information. Given dtc already has all that
knowledge in checks, it's easier to do it there rather than
reimplement the same parsing in python.

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 3/5] checks: Add markers on known properties
  2021-10-11  5:09       ` David Gibson
@ 2021-10-11 13:57         ` Rob Herring
  0 siblings, 0 replies; 23+ messages in thread
From: Rob Herring @ 2021-10-11 13:57 UTC (permalink / raw)
  To: David Gibson; +Cc: Devicetree Compiler

On Mon, Oct 11, 2021 at 2:19 AM David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>
> On Tue, Jul 27, 2021 at 12:30:21PM -0600, Rob Herring wrote:
> > For properties we already have checks for, we know the type and how to
> > parse them. Use this to add type and phandle markers so we have them when
> > the source did not (e.g. dtb format).
> >
> > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > ---
> > v3:
> >  - Rework marker_add() and callers into 3 functions. marker_add() just
> >    adds a marker. marker_add_type_annotations() adds type annotations
> >    at each entry offset. marker_add_phandle_annotation() adds a phandle
> >    and type annotation at a phandle offset.
> >  - Only add type info when no type markers are present at all.
> >  - Keep phandle ref check on phandle+args properties.
> > v2:
> >  - Set marker.ref on phandle markers
> >  - Avoid adding markers if there's any conflicting type markers.
> > ---
> >  checks.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 82 insertions(+), 5 deletions(-)
> >
> > diff --git a/checks.c b/checks.c
> > index e2690e90f90c..b7ac1732b0b8 100644
> > --- a/checks.c
> > +++ b/checks.c
> > @@ -58,6 +58,56 @@ struct check {
> >  #define CHECK(nm_, fn_, d_, ...) \
> >       CHECK_ENTRY(nm_, fn_, d_, false, false, __VA_ARGS__)
> >
> > +static struct marker *marker_add(struct marker **list, enum markertype type,
> > +                              unsigned int offset, char * ref)
> > +{
> > +     struct marker *m = *list;
> > +
> > +     m = xmalloc(sizeof(*m));
> > +     m->type = type;
> > +     m->offset = offset;
> > +     m->next = NULL;
> > +     m->ref = ref;
> > +
> > +     /* Find the insertion point, markers are in order by offset */
> > +     while (*list && ((*list)->offset < m->offset))
> > +             list = &((*list)->next);
> > +
> > +     if (*list) {
> > +             m->next = (*list)->next;
> > +             (*list)->next = m;
> > +     } else
> > +             *list = m;
> > +
> > +     return m;
> > +}
> > +
> > +static void marker_add_type_annotations(struct property *prop,
> > +                                     enum markertype type,
> > +                                     unsigned int entrylen)
> > +{
> > +     unsigned int offset;
> > +
> > +     assert(is_type_marker(type));
> > +
> > +     if (has_type_markers(prop->val.markers))
> > +             return;
> > +
> > +     for (offset = 0; offset < prop->val.len; offset += entrylen)
> > +             marker_add(&prop->val.markers, type, offset, NULL);
> > +}
> > +
> > +static void marker_add_phandle_annotation(struct property *prop,
> > +                                       unsigned int cell,
> > +                                       char *ref)
> > +{
> > +     if (!cell && has_type_markers(prop->val.markers))
> > +             return;
>
> So, you allow adding a phandle annotation if the property has no
> existing type markers *or* you're adding it after position 0.  That
> seems a bit arbitrary and fragile.

[...]

> > @@ -1776,8 +1849,12 @@ static struct node *get_remote_endpoint(struct check *c, struct dt_info *dti,
> >               return NULL;
> >
> >       node = get_node_by_phandle(dti->dt, phandle);
> > -     if (!node)
> > +     if (!node) {
> >               FAIL_PROP(c, dti, endpoint, prop, "graph phandle is not valid");
> > +             return NULL;
> > +     }
> > +
> > +     marker_add_phandle_annotation(prop, 0, node->fullpath);
>
> Shouldn't this be the same as check_property_phandle_args() and verify
> the type annotation if there's one already, only adding it if there
> are no pre-existing types.

That's why marker_add_phandle_annotation() has the above check, but
yeah, I guess for consistency, checking for existing type markers
needs to always be outside marker_add_phandle_annotation() calls.

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]           ` <CAL_Jsq+sMrRWqqPgcvoaiY0rLSp_s+gXOqJ9OuFLQ-3piUHSVA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-10-13  3:12             ` David Gibson
  2021-10-14  1:29               ` Rob Herring
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2021-10-13  3:12 UTC (permalink / raw)
  To: Rob Herring; +Cc: Devicetree Compiler

[-- Attachment #1: Type: text/plain, Size: 2964 bytes --]

On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> >
> > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > YAML output was restricted to dts input as there are some dependencies
> > > on source annotations which get lost with other input formats. With the
> > > addition of markers by the checks, YAML output from dtb format becomes
> > > more useful.
> > >
> > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> >
> > Urgh.  There's not really anything wrong with this patch of itself,
> > but it really underlines my feeling that the whole yaml output thing
> > is a bunch of hacks in pursuit of a bogus goal.
> 
> Validating DTs is a bogus goal?

Goal probably wasn't the right word.  Validating DTs is fine.  The
bogosity comes from doing the conversion to YAML essentially without
reference to the bindings / schemas.  Bindings describe how to
interpret the DT's bytestrings into meaninful numbers or whatever, so
using the bindings is the only reliable way of converting those
bytestrings into some more semantically useful representation.

> > Yaml output wants to include information that simply isn't present in
> > the flattened tree format (without considering bindings), so it relies
> > on formatting conventions in the dts, hence this test in the first
> > place.  This alleges it removes a restriction, but it only works if a
> > bunch of extra heuristics are able to guess the types correctly.
> 
> The goal here is to validate dtb files which I'd think you'd be in
> favor of given your above opinions. For that to work, we have to
> transform the data into the right types somewhere.

Yes - and that should be done with reference to specific bindings, not
using fragile heuristics.

> We don't need any
> heuristics for that. For the most part, it is done using the
> definitive type information from the schemas themselves to format the
> data.

Exactly.  That type information should come *from the schemas*.  Not
from separately maintained and fragile approximations to parts of the
schemas embedded into dtc.

> The exception is #*-cells patterns which need to parse the tree
> to construct the type information. Given dtc already has all that
> knowledge in checks, it's easier to do it there rather than
> reimplement the same parsing in python.

dtc only has parts of that knowledge in checks.  The checks have been
written with the assumption that in ambiguous cases we can just punt
and not run the check.  For the goal of truly parsing everything, the
current design of the checks subsystem really isn't adequate.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
  2021-10-13  3:12             ` David Gibson
@ 2021-10-14  1:29               ` Rob Herring
       [not found]                 ` <CAL_Jsq+iv7eM+LZ1O8d3V18dHraEAyfgdT8ucFKKVXZc9jEp4g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-10-14  1:29 UTC (permalink / raw)
  To: David Gibson; +Cc: Devicetree Compiler

On Wed, Oct 13, 2021 at 1:26 AM David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>
> On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > >
> > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > YAML output was restricted to dts input as there are some dependencies
> > > > on source annotations which get lost with other input formats. With the
> > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > more useful.
> > > >
> > > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > >
> > > Urgh.  There's not really anything wrong with this patch of itself,
> > > but it really underlines my feeling that the whole yaml output thing
> > > is a bunch of hacks in pursuit of a bogus goal.
> >
> > Validating DTs is a bogus goal?
>
> Goal probably wasn't the right word.  Validating DTs is fine.  The
> bogosity comes from doing the conversion to YAML essentially without
> reference to the bindings / schemas.  Bindings describe how to
> interpret the DT's bytestrings into meaninful numbers or whatever, so
> using the bindings is the only reliable way of converting those
> bytestrings into some more semantically useful representation.

That is exactly the direction I'm going. The YAML format can change if
we need it to (remember the plugin interface?).

> > > Yaml output wants to include information that simply isn't present in
> > > the flattened tree format (without considering bindings), so it relies
> > > on formatting conventions in the dts, hence this test in the first
> > > place.  This alleges it removes a restriction, but it only works if a
> > > bunch of extra heuristics are able to guess the types correctly.
> >
> > The goal here is to validate dtb files which I'd think you'd be in
> > favor of given your above opinions. For that to work, we have to
> > transform the data into the right types somewhere.
>
> Yes - and that should be done with reference to specific bindings, not
> using fragile heuristics.
>
> > We don't need any
> > heuristics for that. For the most part, it is done using the
> > definitive type information from the schemas themselves to format the
> > data.
>
> Exactly.  That type information should come *from the schemas*.  Not
> from separately maintained and fragile approximations to parts of the
> schemas embedded into dtc.

The same can be said for every client program, too. But we're so far
away from all knowledge about a binding flowing from a single source.
I'd love it if we could just generate the parsing code out of the
schemas to populate typed C structs for the OS to consume. The reality
is that knowledge about bindings resides in multiple places and dtc is
one of them.

> > The exception is #*-cells patterns which need to parse the tree
> > to construct the type information. Given dtc already has all that
> > knowledge in checks, it's easier to do it there rather than
> > reimplement the same parsing in python.
>
> dtc only has parts of that knowledge in checks.  The checks have been
> written with the assumption that in ambiguous cases we can just punt
> and not run the check.  For the goal of truly parsing everything, the
> current design of the checks subsystem really isn't adequate.

Yes, but handling 'foos' plus '#foo-cells' is a limited problem space
compared to all bindings and not one that fits well with binding
schemas. dtc already knows how to parse these properties and we don't
get new ones frequently. I'm just trying to use the knowledge that's
already in dtc.

I'm a bit worried about doing more in python too, because running
validation on 1000+ DT files is already ~2 hours. And we're only a
little over halfway converting bindings to schemas (though that's
probably a long tail of older and less used bindings).

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                 ` <CAL_Jsq+iv7eM+LZ1O8d3V18dHraEAyfgdT8ucFKKVXZc9jEp4g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-11-03  4:42                   ` David Gibson
  2021-11-03 15:59                     ` Rob Herring
  0 siblings, 1 reply; 23+ messages in thread
From: David Gibson @ 2021-11-03  4:42 UTC (permalink / raw)
  To: Rob Herring; +Cc: Devicetree Compiler

[-- Attachment #1: Type: text/plain, Size: 8374 bytes --]

On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> >
> > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > >
> > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > on source annotations which get lost with other input formats. With the
> > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > more useful.
> > > > >
> > > > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > > >
> > > > Urgh.  There's not really anything wrong with this patch of itself,
> > > > but it really underlines my feeling that the whole yaml output thing
> > > > is a bunch of hacks in pursuit of a bogus goal.
> > >
> > > Validating DTs is a bogus goal?
> >
> > Goal probably wasn't the right word.  Validating DTs is fine.  The
> > bogosity comes from doing the conversion to YAML essentially without
> > reference to the bindings / schemas.  Bindings describe how to
> > interpret the DT's bytestrings into meaninful numbers or whatever, so
> > using the bindings is the only reliable way of converting those
> > bytestrings into some more semantically useful representation.
> 
> That is exactly the direction I'm going.

Ok, that's good to hear.

> The YAML format can change if
> we need it to (remember the plugin interface?).

See, I find that worrying, not reassuring.  It feels like dtc is
chasing a fuzzy moving target with the yaml output.  I can see no
clear line between what parts of the decoding should be done by dtc
(in making the yaml type choices) and what parts should be done by
whatever consumes it.  Even if we could define a line, AFAICT it would
necessarily require dtc to know about *every* binding.  Not every part
of every binding, but at least part of every binding (enough to make
those type choices).

Encoding even part of every binding is an unbounded amount of work,
and not something that was ever really intended to be in dtc's scope.

Now, I realize I kind of started that fuzziness by introducing the
checks.  But there's a real difference between having some checks for
the most common errors and *requiring* annotation from the checks in
order to consume the output.  I don't see any sensible place to stop
with incorporating of this stuff into the checks, short of absorbing
the entire validation effort, which I don't think either of us wants.

In the meantime the only real spec for what dtc needs to output in
yaml mode is "what the current validation tools want", which means you
have to watch for version synchronization between dtc and the
validation tools which sounds like a real pain.


On top of that even if we had a clear boundary between "first stage"
and "second stage" validation, I think YAML has some pretty serious
drawbacks as the format for the first to communicate to the second.
The main one being that we can't safely communicate 64-bit ints across
it (since YAML is JSON-derived, its "numbers" are actually floats,
which can't safely carry integers above ~2^53).  It also can't
naturally represent "blobs" which are sometimes in dtbs, if they're
not valid Unicode.  Then there's the "Norway problem"[0].  I'm pretty
sure we quote all our strings so we won't hit that one, but it
definitely gives me the heebie-jeebies about trusting YAML parsers
with anything requiring precision.

> > > > Yaml output wants to include information that simply isn't present in
> > > > the flattened tree format (without considering bindings), so it relies
> > > > on formatting conventions in the dts, hence this test in the first
> > > > place.  This alleges it removes a restriction, but it only works if a
> > > > bunch of extra heuristics are able to guess the types correctly.
> > >
> > > The goal here is to validate dtb files which I'd think you'd be in
> > > favor of given your above opinions. For that to work, we have to
> > > transform the data into the right types somewhere.
> >
> > Yes - and that should be done with reference to specific bindings, not
> > using fragile heuristics.
> >
> > > We don't need any
> > > heuristics for that. For the most part, it is done using the
> > > definitive type information from the schemas themselves to format the
> > > data.
> >
> > Exactly.  That type information should come *from the schemas*.  Not
> > from separately maintained and fragile approximations to parts of the
> > schemas embedded into dtc.
> 
> The same can be said for every client program, too. But we're so far
> away from all knowledge about a binding flowing from a single source.
> I'd love it if we could just generate the parsing code out of the
> schemas to populate typed C structs for the OS to consume. The reality
> is that knowledge about bindings resides in multiple places and dtc is
> one of them.

That's really not true on the dtb client side.  No, we don't have
automated tooling translating a machine readable binding into code.
However, generally all the knowledge *is* in the (human readable)
binding, and the client will have a (manual) translation of all that
into code for the properties it cares about.

Automated tooling would be great, but even absent that, dtb clients
read and decode *bytestrings*, not structured data, and dtc generates
bytestrings just fine.

> > > The exception is #*-cells patterns which need to parse the tree
> > > to construct the type information. Given dtc already has all that
> > > knowledge in checks, it's easier to do it there rather than
> > > reimplement the same parsing in python.
> >
> > dtc only has parts of that knowledge in checks.  The checks have been
> > written with the assumption that in ambiguous cases we can just punt
> > and not run the check.  For the goal of truly parsing everything, the
> > current design of the checks subsystem really isn't adequate.
> 
> Yes, but handling 'foos' plus '#foo-cells' is a limited problem space

Every thing like this is a limited problem space, but there's an
unbounded number of possible things.  Like I say there's no clear
boundary to what dtc should be doing and what it shouldn't.  Given
what can be done with YAML, we're pretty much being deliberately
incomplete if dtc does anything short of reliably and correctly typing
*every* property, which in turn means knowing (part of) *every*
binding.  I'm not really willing for that to be in scope for dtc.

> compared to all bindings and not one that fits well with binding
> schemas.

Yeah.. the way what I've seen of json etc. schemas work doesn't really
mesh well with the sorts of constraints we have.  But I don't think a
messy split between "first stage" and "second stage" validation
particularly helps with that.

> dtc already knows how to parse these properties and we don't
> get new ones frequently. I'm just trying to use the knowledge that's
> already in dtc.

Again, there's a real difference betwen knowing about some of them in
order to catch the most common mistakes, and *having* to know about
all of them in order to produce correct output.

> I'm a bit worried about doing more in python too, because running
> validation on 1000+ DT files is already ~2 hours. And we're only a
> little over halfway converting bindings to schemas (though that's
> probably a long tail of older and less used bindings).

Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
preprocessor written in C (or Rust, or Go) with the rest of the
validation tools.  Then it would be colocated with the rest of the
binding information and can be updated in lockstep.  Or better yet,
write a preprocessor that goes direct from dtb to Python native data
types, avoiding the problems with YAML.

[0] https://hitchdev.com/strictyaml/why/implicit-typing-removed/

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
  2021-11-03  4:42                   ` David Gibson
@ 2021-11-03 15:59                     ` Rob Herring
       [not found]                       ` <CAL_JsqLNyfe8Ou4RXeLm0ie1vvJ+z15J6EDgB96dPbCC5qvHVQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-11-03 15:59 UTC (permalink / raw)
  To: David Gibson, Simon Glass; +Cc: Devicetree Compiler

On Tue, Nov 2, 2021 at 11:42 PM David Gibson
<david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
>
> On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > >
> > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > >
> > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > on source annotations which get lost with other input formats. With the
> > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > more useful.
> > > > > >
> > > > > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > > > >
> > > > > Urgh.  There's not really anything wrong with this patch of itself,
> > > > > but it really underlines my feeling that the whole yaml output thing
> > > > > is a bunch of hacks in pursuit of a bogus goal.
> > > >
> > > > Validating DTs is a bogus goal?
> > >
> > > Goal probably wasn't the right word.  Validating DTs is fine.  The
> > > bogosity comes from doing the conversion to YAML essentially without
> > > reference to the bindings / schemas.  Bindings describe how to
> > > interpret the DT's bytestrings into meaninful numbers or whatever, so
> > > using the bindings is the only reliable way of converting those
> > > bytestrings into some more semantically useful representation.
> >
> > That is exactly the direction I'm going.
>
> Ok, that's good to hear.
>
> > The YAML format can change if
> > we need it to (remember the plugin interface?).
>
> See, I find that worrying, not reassuring.  It feels like dtc is
> chasing a fuzzy moving target with the yaml output.

I meant either it goes away entirely or a 2.0 version rather than
continual incremental changes.

>  I can see no
> clear line between what parts of the decoding should be done by dtc
> (in making the yaml type choices) and what parts should be done by
> whatever consumes it.  Even if we could define a line, AFAICT it would
> necessarily require dtc to know about *every* binding.  Not every part
> of every binding, but at least part of every binding (enough to make
> those type choices).
>
> Encoding even part of every binding is an unbounded amount of work,
> and not something that was ever really intended to be in dtc's scope.
>
> Now, I realize I kind of started that fuzziness by introducing the
> checks.  But there's a real difference between having some checks for
> the most common errors and *requiring* annotation from the checks in
> order to consume the output.  I don't see any sensible place to stop
> with incorporating of this stuff into the checks, short of absorbing
> the entire validation effort, which I don't think either of us wants.

Only in the form of a plugin. A big part of that was to get source
line numbers for warnings.

>
> In the meantime the only real spec for what dtc needs to output in
> yaml mode is "what the current validation tools want", which means you
> have to watch for version synchronization between dtc and the
> validation tools which sounds like a real pain.

In practice, the format hasn't changed. The lack of spec was more to
avoid any explicit endorsement of the format (and well, laziness).

> On top of that even if we had a clear boundary between "first stage"
> and "second stage" validation, I think YAML has some pretty serious
> drawbacks as the format for the first to communicate to the second.
> The main one being that we can't safely communicate 64-bit ints across
> it (since YAML is JSON-derived, its "numbers" are actually floats,
> which can't safely carry integers above ~2^53).  It also can't
> naturally represent "blobs" which are sometimes in dtbs, if they're
> not valid Unicode.  Then there's the "Norway problem"[0].  I'm pretty
> sure we quote all our strings so we won't hit that one, but it
> definitely gives me the heebie-jeebies about trusting YAML parsers
> with anything requiring precision.

Fortunately, we've avoided problems there. Perhaps that's because we
generally don't care about the actual value of numbers in validation.
I did hit the Norway problem with booleans, but YAML 1.2 addresses
that.

> > > > > Yaml output wants to include information that simply isn't present in
> > > > > the flattened tree format (without considering bindings), so it relies
> > > > > on formatting conventions in the dts, hence this test in the first
> > > > > place.  This alleges it removes a restriction, but it only works if a
> > > > > bunch of extra heuristics are able to guess the types correctly.
> > > >
> > > > The goal here is to validate dtb files which I'd think you'd be in
> > > > favor of given your above opinions. For that to work, we have to
> > > > transform the data into the right types somewhere.
> > >
> > > Yes - and that should be done with reference to specific bindings, not
> > > using fragile heuristics.
> > >
> > > > We don't need any
> > > > heuristics for that. For the most part, it is done using the
> > > > definitive type information from the schemas themselves to format the
> > > > data.
> > >
> > > Exactly.  That type information should come *from the schemas*.  Not
> > > from separately maintained and fragile approximations to parts of the
> > > schemas embedded into dtc.
> >
> > The same can be said for every client program, too. But we're so far
> > away from all knowledge about a binding flowing from a single source.
> > I'd love it if we could just generate the parsing code out of the
> > schemas to populate typed C structs for the OS to consume. The reality
> > is that knowledge about bindings resides in multiple places and dtc is
> > one of them.
>
> That's really not true on the dtb client side.  No, we don't have
> automated tooling translating a machine readable binding into code.
> However, generally all the knowledge *is* in the (human readable)
> binding, and the client will have a (manual) translation of all that
> into code for the properties it cares about.
>
> Automated tooling would be great, but even absent that, dtb clients
> read and decode *bytestrings*, not structured data, and dtc generates
> bytestrings just fine.
>
> > > > The exception is #*-cells patterns which need to parse the tree
> > > > to construct the type information. Given dtc already has all that
> > > > knowledge in checks, it's easier to do it there rather than
> > > > reimplement the same parsing in python.
> > >
> > > dtc only has parts of that knowledge in checks.  The checks have been
> > > written with the assumption that in ambiguous cases we can just punt
> > > and not run the check.  For the goal of truly parsing everything, the
> > > current design of the checks subsystem really isn't adequate.
> >
> > Yes, but handling 'foos' plus '#foo-cells' is a limited problem space
>
> Every thing like this is a limited problem space, but there's an
> unbounded number of possible things.  Like I say there's no clear
> boundary to what dtc should be doing and what it shouldn't.  Given
> what can be done with YAML, we're pretty much being deliberately
> incomplete if dtc does anything short of reliably and correctly typing
> *every* property, which in turn means knowing (part of) *every*
> binding.  I'm not really willing for that to be in scope for dtc.
>
> > compared to all bindings and not one that fits well with binding
> > schemas.
>
> Yeah.. the way what I've seen of json etc. schemas work doesn't really
> mesh well with the sorts of constraints we have.  But I don't think a
> messy split between "first stage" and "second stage" validation
> particularly helps with that.
>
> > dtc already knows how to parse these properties and we don't
> > get new ones frequently. I'm just trying to use the knowledge that's
> > already in dtc.
>
> Again, there's a real difference betwen knowing about some of them in
> order to catch the most common mistakes, and *having* to know about
> all of them in order to produce correct output.
>
> > I'm a bit worried about doing more in python too, because running
> > validation on 1000+ DT files is already ~2 hours. And we're only a
> > little over halfway converting bindings to schemas (though that's
> > probably a long tail of older and less used bindings).
>
> Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> preprocessor written in C (or Rust, or Go) with the rest of the
> validation tools.  Then it would be colocated with the rest of the
> binding information and can be updated in lockstep.

That's a great idea. I found some code on the internet written in C
that already does dtb->yaml conversion, so I can use that. Do you
think it is any good[1]? ;)

>  Or better yet,
> write a preprocessor that goes direct from dtb to Python native data
> types, avoiding the problems with YAML.

That's exactly what the plugin did. Maybe the last patch should have
been removing YAML output. You seemed fairly lukewarm on the whole
thing, so it seemed like it was going to take more time than I had to
spend on it.

Maybe using pylibfdt could work here though it doesn't already
unflatten the tree into dictionaries. Maybe that already exists
somewhere. Simon?

Rob

[1] https://git.kernel.org/pub/scm/utils/dtc/dtc.git/

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                       ` <CAL_JsqLNyfe8Ou4RXeLm0ie1vvJ+z15J6EDgB96dPbCC5qvHVQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-11-03 19:16                         ` Rob Herring
       [not found]                           ` <CAL_JsqKeNne0Bf0ahG_h977snsBtsk3hbQOPO-6RiyFSyiOsfA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2021-11-09  4:41                         ` David Gibson
  1 sibling, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-11-03 19:16 UTC (permalink / raw)
  To: David Gibson, Simon Glass; +Cc: Devicetree Compiler

On Wed, Nov 3, 2021 at 10:59 AM Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> On Tue, Nov 2, 2021 at 11:42 PM David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> >
> > On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > >
> > > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > >
> > > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > > on source annotations which get lost with other input formats. With the
> > > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > > more useful.

> > Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> > preprocessor written in C (or Rust, or Go) with the rest of the
> > validation tools.  Then it would be colocated with the rest of the
> > binding information and can be updated in lockstep.
>
> That's a great idea. I found some code on the internet written in C
> that already does dtb->yaml conversion, so I can use that. Do you
> think it is any good[1]? ;)
>
> >  Or better yet,
> > write a preprocessor that goes direct from dtb to Python native data
> > types, avoiding the problems with YAML.
>
> That's exactly what the plugin did. Maybe the last patch should have
> been removing YAML output. You seemed fairly lukewarm on the whole
> thing, so it seemed like it was going to take more time than I had to
> spend on it.
>
> Maybe using pylibfdt could work here though it doesn't already
> unflatten the tree into dictionaries. Maybe that already exists
> somewhere. Simon?

dtoc in u-boot is one though it unpacks into custom classes rather
than pure dict I need. But implementing unpacking was quite simple.
The harder part seems to be the lack of any packaging for pylibfdt.

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                           ` <CAL_JsqKeNne0Bf0ahG_h977snsBtsk3hbQOPO-6RiyFSyiOsfA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-11-03 19:56                             ` Simon Glass
       [not found]                               ` <CAPnjgZ3BNPW+L4BrhQ9W4nLovSgcchWPfbznYxtf20sv2ntYaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Simon Glass @ 2021-11-03 19:56 UTC (permalink / raw)
  To: Rob Herring; +Cc: David Gibson, Devicetree Compiler

Hi,

On Wed, 3 Nov 2021 at 13:17, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> On Wed, Nov 3, 2021 at 10:59 AM Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> >
> > On Tue, Nov 2, 2021 at 11:42 PM David Gibson
> > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > >
> > > On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > > > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > >
> > > > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > > >
> > > > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > > > on source annotations which get lost with other input formats. With the
> > > > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > > > more useful.
>
> > > Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> > > preprocessor written in C (or Rust, or Go) with the rest of the
> > > validation tools.  Then it would be colocated with the rest of the
> > > binding information and can be updated in lockstep.
> >
> > That's a great idea. I found some code on the internet written in C
> > that already does dtb->yaml conversion, so I can use that. Do you
> > think it is any good[1]? ;)
> >
> > >  Or better yet,
> > > write a preprocessor that goes direct from dtb to Python native data
> > > types, avoiding the problems with YAML.
> >
> > That's exactly what the plugin did. Maybe the last patch should have
> > been removing YAML output. You seemed fairly lukewarm on the whole
> > thing, so it seemed like it was going to take more time than I had to
> > spend on it.
> >
> > Maybe using pylibfdt could work here though it doesn't already
> > unflatten the tree into dictionaries. Maybe that already exists
> > somewhere. Simon?
>
> dtoc in u-boot is one though it unpacks into custom classes rather
> than pure dict I need. But implementing unpacking was quite simple.
> The harder part seems to be the lack of any packaging for pylibfdt.

Packaging?

Yes dtoc has some Python classes for reading the DT and updating it
(with a separate 'commit' step). I am not entirely happy with the
state of it though. If you want to tidy it up and upstream it, that
would be great. If you me some pointers as to what you need, I could
try it. I need to get back to the fdtgrep stuff in the next week or
do, so the timing is right.

Regards,
Simon

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                               ` <CAPnjgZ3BNPW+L4BrhQ9W4nLovSgcchWPfbznYxtf20sv2ntYaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-11-03 22:42                                 ` Rob Herring
       [not found]                                   ` <CAL_JsqLdKsVO9kzkZcqL160COrpEp6GsR10AyPvmSG3B=grAXg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 23+ messages in thread
From: Rob Herring @ 2021-11-03 22:42 UTC (permalink / raw)
  To: Simon Glass; +Cc: David Gibson, Devicetree Compiler

On Wed, Nov 3, 2021 at 2:56 PM Simon Glass <sjg-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
>
> Hi,
>
> On Wed, 3 Nov 2021 at 13:17, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> >
> > On Wed, Nov 3, 2021 at 10:59 AM Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > >
> > > On Tue, Nov 2, 2021 at 11:42 PM David Gibson
> > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > >
> > > > On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > > > > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > >
> > > > > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > > > >
> > > > > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > > > > on source annotations which get lost with other input formats. With the
> > > > > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > > > > more useful.
> >
> > > > Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> > > > preprocessor written in C (or Rust, or Go) with the rest of the
> > > > validation tools.  Then it would be colocated with the rest of the
> > > > binding information and can be updated in lockstep.
> > >
> > > That's a great idea. I found some code on the internet written in C
> > > that already does dtb->yaml conversion, so I can use that. Do you
> > > think it is any good[1]? ;)
> > >
> > > >  Or better yet,
> > > > write a preprocessor that goes direct from dtb to Python native data
> > > > types, avoiding the problems with YAML.
> > >
> > > That's exactly what the plugin did. Maybe the last patch should have
> > > been removing YAML output. You seemed fairly lukewarm on the whole
> > > thing, so it seemed like it was going to take more time than I had to
> > > spend on it.
> > >
> > > Maybe using pylibfdt could work here though it doesn't already
> > > unflatten the tree into dictionaries. Maybe that already exists
> > > somewhere. Simon?
> >
> > dtoc in u-boot is one though it unpacks into custom classes rather
> > than pure dict I need. But implementing unpacking was quite simple.
> > The harder part seems to be the lack of any packaging for pylibfdt.
>
> Packaging?

In PyPi: pip install libfdt

Or packaged by the distro. Then I can just add it as a dependency to
dtschema and it all just works for users.

> Yes dtoc has some Python classes for reading the DT and updating it
> (with a separate 'commit' step). I am not entirely happy with the
> state of it though. If you want to tidy it up and upstream it, that
> would be great. If you me some pointers as to what you need, I could
> try it. I need to get back to the fdtgrep stuff in the next week or
> do, so the timing is right.

I've got something working. The Node and Prop classes didn't really
work for me as I need the DT represented as a dict. (And using it
would be a licensing issue too as dtschema is BSD).

Rob

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                                   ` <CAL_JsqLdKsVO9kzkZcqL160COrpEp6GsR10AyPvmSG3B=grAXg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2021-11-03 22:48                                     ` Simon Glass
  0 siblings, 0 replies; 23+ messages in thread
From: Simon Glass @ 2021-11-03 22:48 UTC (permalink / raw)
  To: Rob Herring; +Cc: David Gibson, Devicetree Compiler

Hi Rob,

On Wed, 3 Nov 2021 at 16:42, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
>
> On Wed, Nov 3, 2021 at 2:56 PM Simon Glass <sjg-F7+t8E8rja9g9hUCZPvPmw@public.gmane.org> wrote:
> >
> > Hi,
> >
> > On Wed, 3 Nov 2021 at 13:17, Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > >
> > > On Wed, Nov 3, 2021 at 10:59 AM Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > > >
> > > > On Tue, Nov 2, 2021 at 11:42 PM David Gibson
> > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > >
> > > > > On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > > > > > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > > >
> > > > > > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > > > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > > > > >
> > > > > > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > > > > > on source annotations which get lost with other input formats. With the
> > > > > > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > > > > > more useful.
> > >
> > > > > Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> > > > > preprocessor written in C (or Rust, or Go) with the rest of the
> > > > > validation tools.  Then it would be colocated with the rest of the
> > > > > binding information and can be updated in lockstep.
> > > >
> > > > That's a great idea. I found some code on the internet written in C
> > > > that already does dtb->yaml conversion, so I can use that. Do you
> > > > think it is any good[1]? ;)
> > > >
> > > > >  Or better yet,
> > > > > write a preprocessor that goes direct from dtb to Python native data
> > > > > types, avoiding the problems with YAML.
> > > >
> > > > That's exactly what the plugin did. Maybe the last patch should have
> > > > been removing YAML output. You seemed fairly lukewarm on the whole
> > > > thing, so it seemed like it was going to take more time than I had to
> > > > spend on it.
> > > >
> > > > Maybe using pylibfdt could work here though it doesn't already
> > > > unflatten the tree into dictionaries. Maybe that already exists
> > > > somewhere. Simon?
> > >
> > > dtoc in u-boot is one though it unpacks into custom classes rather
> > > than pure dict I need. But implementing unpacking was quite simple.
> > > The harder part seems to be the lack of any packaging for pylibfdt.
> >
> > Packaging?
>
> In PyPi: pip install libfdt
>
> Or packaged by the distro. Then I can just add it as a dependency to
> dtschema and it all just works for users.
>
> > Yes dtoc has some Python classes for reading the DT and updating it
> > (with a separate 'commit' step). I am not entirely happy with the
> > state of it though. If you want to tidy it up and upstream it, that
> > would be great. If you me some pointers as to what you need, I could
> > try it. I need to get back to the fdtgrep stuff in the next week or
> > do, so the timing is right.
>
> I've got something working. The Node and Prop classes didn't really
> work for me as I need the DT represented as a dict. (And using it
> would be a licensing issue too as dtschema is BSD).

OK, sounds good.

Regards,
Simon

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output
       [not found]                       ` <CAL_JsqLNyfe8Ou4RXeLm0ie1vvJ+z15J6EDgB96dPbCC5qvHVQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2021-11-03 19:16                         ` Rob Herring
@ 2021-11-09  4:41                         ` David Gibson
  1 sibling, 0 replies; 23+ messages in thread
From: David Gibson @ 2021-11-09  4:41 UTC (permalink / raw)
  To: Rob Herring; +Cc: Simon Glass, Devicetree Compiler

[-- Attachment #1: Type: text/plain, Size: 12144 bytes --]

On Wed, Nov 03, 2021 at 10:59:39AM -0500, Rob Herring wrote:
> On Tue, Nov 2, 2021 at 11:42 PM David Gibson
> <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> >
> > On Wed, Oct 13, 2021 at 08:29:53PM -0500, Rob Herring wrote:
> > > On Wed, Oct 13, 2021 at 1:26 AM David Gibson
> > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > >
> > > > On Mon, Oct 11, 2021 at 08:22:54AM -0500, Rob Herring wrote:
> > > > > On Mon, Oct 11, 2021 at 2:19 AM David Gibson
> > > > > <david-xT8FGy+AXnRB3Ne2BGzF6laj5H9X9Tb+@public.gmane.org> wrote:
> > > > > >
> > > > > > On Tue, Jul 27, 2021 at 12:30:22PM -0600, Rob Herring wrote:
> > > > > > > YAML output was restricted to dts input as there are some dependencies
> > > > > > > on source annotations which get lost with other input formats. With the
> > > > > > > addition of markers by the checks, YAML output from dtb format becomes
> > > > > > > more useful.
> > > > > > >
> > > > > > > Signed-off-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
> > > > > >
> > > > > > Urgh.  There's not really anything wrong with this patch of itself,
> > > > > > but it really underlines my feeling that the whole yaml output thing
> > > > > > is a bunch of hacks in pursuit of a bogus goal.
> > > > >
> > > > > Validating DTs is a bogus goal?
> > > >
> > > > Goal probably wasn't the right word.  Validating DTs is fine.  The
> > > > bogosity comes from doing the conversion to YAML essentially without
> > > > reference to the bindings / schemas.  Bindings describe how to
> > > > interpret the DT's bytestrings into meaninful numbers or whatever, so
> > > > using the bindings is the only reliable way of converting those
> > > > bytestrings into some more semantically useful representation.
> > >
> > > That is exactly the direction I'm going.
> >
> > Ok, that's good to hear.
> >
> > > The YAML format can change if
> > > we need it to (remember the plugin interface?).
> >
> > See, I find that worrying, not reassuring.  It feels like dtc is
> > chasing a fuzzy moving target with the yaml output.
> 
> I meant either it goes away entirely or a 2.0 version rather than
> continual incremental changes.

That makes sense... but every change to the type tagging implicitly
changes the yaml output.  So there have already been a number of
incremental changes, and here you're proposing more.

Again, the problem is I can't see any natural scope to what dtc does
in terms of type tagging in between nothing and being fully integrated
with schema validation (which implies knowing at least part of every
binding).

> >  I can see no
> > clear line between what parts of the decoding should be done by dtc
> > (in making the yaml type choices) and what parts should be done by
> > whatever consumes it.  Even if we could define a line, AFAICT it would
> > necessarily require dtc to know about *every* binding.  Not every part
> > of every binding, but at least part of every binding (enough to make
> > those type choices).
> >
> > Encoding even part of every binding is an unbounded amount of work,
> > and not something that was ever really intended to be in dtc's scope.
> >
> > Now, I realize I kind of started that fuzziness by introducing the
> > checks.  But there's a real difference between having some checks for
> > the most common errors and *requiring* annotation from the checks in
> > order to consume the output.  I don't see any sensible place to stop
> > with incorporating of this stuff into the checks, short of absorbing
> > the entire validation effort, which I don't think either of us wants.
> 
> Only in the form of a plugin. A big part of that was to get source
> line numbers for warnings.

Again, that's not the case in the actual patches I've seen so far.

> > In the meantime the only real spec for what dtc needs to output in
> > yaml mode is "what the current validation tools want", which means you
> > have to watch for version synchronization between dtc and the
> > validation tools which sounds like a real pain.
> 
> In practice, the format hasn't changed. The lack of spec was more to
> avoid any explicit endorsement of the format (and well, laziness).

As I've said, every change to type tagging changes the YAML format, so
that really doesn't seem true to me.

> > On top of that even if we had a clear boundary between "first stage"
> > and "second stage" validation, I think YAML has some pretty serious
> > drawbacks as the format for the first to communicate to the second.
> > The main one being that we can't safely communicate 64-bit ints across
> > it (since YAML is JSON-derived, its "numbers" are actually floats,
> > which can't safely carry integers above ~2^53).  It also can't
> > naturally represent "blobs" which are sometimes in dtbs, if they're
> > not valid Unicode.  Then there's the "Norway problem"[0].  I'm pretty
> > sure we quote all our strings so we won't hit that one, but it
> > definitely gives me the heebie-jeebies about trusting YAML parsers
> > with anything requiring precision.
> 
> Fortunately, we've avoided problems there. Perhaps that's because we
> generally don't care about the actual value of numbers in validation.
> I did hit the Norway problem with booleans, but YAML 1.2 addresses
> that.
> 
> > > > > > Yaml output wants to include information that simply isn't present in
> > > > > > the flattened tree format (without considering bindings), so it relies
> > > > > > on formatting conventions in the dts, hence this test in the first
> > > > > > place.  This alleges it removes a restriction, but it only works if a
> > > > > > bunch of extra heuristics are able to guess the types correctly.
> > > > >
> > > > > The goal here is to validate dtb files which I'd think you'd be in
> > > > > favor of given your above opinions. For that to work, we have to
> > > > > transform the data into the right types somewhere.
> > > >
> > > > Yes - and that should be done with reference to specific bindings, not
> > > > using fragile heuristics.
> > > >
> > > > > We don't need any
> > > > > heuristics for that. For the most part, it is done using the
> > > > > definitive type information from the schemas themselves to format the
> > > > > data.
> > > >
> > > > Exactly.  That type information should come *from the schemas*.  Not
> > > > from separately maintained and fragile approximations to parts of the
> > > > schemas embedded into dtc.
> > >
> > > The same can be said for every client program, too. But we're so far
> > > away from all knowledge about a binding flowing from a single source.
> > > I'd love it if we could just generate the parsing code out of the
> > > schemas to populate typed C structs for the OS to consume. The reality
> > > is that knowledge about bindings resides in multiple places and dtc is
> > > one of them.
> >
> > That's really not true on the dtb client side.  No, we don't have
> > automated tooling translating a machine readable binding into code.
> > However, generally all the knowledge *is* in the (human readable)
> > binding, and the client will have a (manual) translation of all that
> > into code for the properties it cares about.
> >
> > Automated tooling would be great, but even absent that, dtb clients
> > read and decode *bytestrings*, not structured data, and dtc generates
> > bytestrings just fine.
> >
> > > > > The exception is #*-cells patterns which need to parse the tree
> > > > > to construct the type information. Given dtc already has all that
> > > > > knowledge in checks, it's easier to do it there rather than
> > > > > reimplement the same parsing in python.
> > > >
> > > > dtc only has parts of that knowledge in checks.  The checks have been
> > > > written with the assumption that in ambiguous cases we can just punt
> > > > and not run the check.  For the goal of truly parsing everything, the
> > > > current design of the checks subsystem really isn't adequate.
> > >
> > > Yes, but handling 'foos' plus '#foo-cells' is a limited problem space
> >
> > Every thing like this is a limited problem space, but there's an
> > unbounded number of possible things.  Like I say there's no clear
> > boundary to what dtc should be doing and what it shouldn't.  Given
> > what can be done with YAML, we're pretty much being deliberately
> > incomplete if dtc does anything short of reliably and correctly typing
> > *every* property, which in turn means knowing (part of) *every*
> > binding.  I'm not really willing for that to be in scope for dtc.
> >
> > > compared to all bindings and not one that fits well with binding
> > > schemas.
> >
> > Yeah.. the way what I've seen of json etc. schemas work doesn't really
> > mesh well with the sorts of constraints we have.  But I don't think a
> > messy split between "first stage" and "second stage" validation
> > particularly helps with that.
> >
> > > dtc already knows how to parse these properties and we don't
> > > get new ones frequently. I'm just trying to use the knowledge that's
> > > already in dtc.
> >
> > Again, there's a real difference betwen knowing about some of them in
> > order to catch the most common mistakes, and *having* to know about
> > all of them in order to produce correct output.
> >
> > > I'm a bit worried about doing more in python too, because running
> > > validation on 1000+ DT files is already ~2 hours. And we're only a
> > > little over halfway converting bindings to schemas (though that's
> > > probably a long tail of older and less used bindings).
> >
> > Heh.  Ok, but there's no reason you couldn't bundle a dtb->yaml
> > preprocessor written in C (or Rust, or Go) with the rest of the
> > validation tools.  Then it would be colocated with the rest of the
> > binding information and can be updated in lockstep.
> 
> That's a great idea. I found some code on the internet written in C
> that already does dtb->yaml conversion, so I can use that. Do you
> think it is any good[1]? ;)

Hardy har har.  But more seriously: clearly dtc *doesn't* suit your
needs for this right now, since you keep sending patches to change its
behaviour here.  I can't see where those changes can converge sensibly
short of knowing every schema.  The difference with a tool inside the
validation repo is that it can be updated in lockstep with the schemas
here, so "provide just enough info that the schema checker needs"
becomes a workable goal where it's not for dtc as an independent
project.

If you want to fork dtc as a first step to making such a tool, then by
all means go ahead.  What I'm not comfortable doing is merging and
maintaining a bunch of things for type tagging without a clear picture
of what the end goal is (and maybe not even then, depending on how
much work is involved in that end goal).

> >  Or better yet,
> > write a preprocessor that goes direct from dtb to Python native data
> > types, avoiding the problems with YAML.
> 
> That's exactly what the plugin did. Maybe the last patch should have
> been removing YAML output.

Well, maybe.

> You seemed fairly lukewarm on the whole
> thing, so it seemed like it was going to take more time than I had to
> spend on it.

It's been long enough that I don't clearly remember why.  I think part
of it was that the interchange format between dtc and the plugin
seemed very ad-hoc, and therefore hard to keep stable.  That's
kind of the same problem I see with YAML as a typed output format
going into something that cares about the types.

> Maybe using pylibfdt could work here though it doesn't already
> unflatten the tree into dictionaries. Maybe that already exists
> somewhere. Simon?

You could certainly accept dtb input using pylibfdt.  It probably will
be pretty slow to do that, if that's a concern.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2021-11-09  4:41 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-27 18:30 [PATCH v3 0/5] Improve output type formatting Rob Herring
     [not found] ` <20210727183023.3212077-1-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2021-07-27 18:30   ` [PATCH v3 1/5] Move marker functions to dtc.h Rob Herring
     [not found]     ` <20210727183023.3212077-2-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2021-08-26  4:00       ` David Gibson
2021-09-27 20:56         ` Rob Herring
     [not found]           ` <CAL_Jsq+QAnyu=Fj_RtRu-dS4Ta0bZJWFUABsK1uKbnAKpXi1mA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-10-11  5:29             ` David Gibson
2021-07-27 18:30   ` [PATCH v3 2/5] Add has_type_markers() helper Rob Herring
2021-07-27 18:30   ` [PATCH v3 3/5] checks: Add markers on known properties Rob Herring
     [not found]     ` <20210727183023.3212077-4-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2021-10-11  5:09       ` David Gibson
2021-10-11 13:57         ` Rob Herring
2021-07-27 18:30   ` [PATCH v3 4/5] dtc: Drop dts source restriction for yaml output Rob Herring
     [not found]     ` <20210727183023.3212077-5-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2021-10-11  5:15       ` David Gibson
2021-10-11 13:22         ` Rob Herring
     [not found]           ` <CAL_Jsq+sMrRWqqPgcvoaiY0rLSp_s+gXOqJ9OuFLQ-3piUHSVA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-10-13  3:12             ` David Gibson
2021-10-14  1:29               ` Rob Herring
     [not found]                 ` <CAL_Jsq+iv7eM+LZ1O8d3V18dHraEAyfgdT8ucFKKVXZc9jEp4g-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-11-03  4:42                   ` David Gibson
2021-11-03 15:59                     ` Rob Herring
     [not found]                       ` <CAL_JsqLNyfe8Ou4RXeLm0ie1vvJ+z15J6EDgB96dPbCC5qvHVQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-11-03 19:16                         ` Rob Herring
     [not found]                           ` <CAL_JsqKeNne0Bf0ahG_h977snsBtsk3hbQOPO-6RiyFSyiOsfA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-11-03 19:56                             ` Simon Glass
     [not found]                               ` <CAPnjgZ3BNPW+L4BrhQ9W4nLovSgcchWPfbznYxtf20sv2ntYaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-11-03 22:42                                 ` Rob Herring
     [not found]                                   ` <CAL_JsqLdKsVO9kzkZcqL160COrpEp6GsR10AyPvmSG3B=grAXg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2021-11-03 22:48                                     ` Simon Glass
2021-11-09  4:41                         ` David Gibson
2021-07-27 18:30   ` [PATCH v3 5/5] treesource: Maintain phandle label/path on output Rob Herring
     [not found]     ` <20210727183023.3212077-6-robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
2021-10-11  5:21       ` David Gibson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.