All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] proper handling of #pragma
@ 2018-01-03 16:14 foobar
  2018-01-04 17:42 ` Christopher Li
  2018-01-20 15:12 ` Luc Van Oostenryck
  0 siblings, 2 replies; 3+ messages in thread
From: foobar @ 2018-01-03 16:14 UTC (permalink / raw)
  To: linux-sparse; +Cc: foobar

#pragma directives other than #pragma once are not really meant for the preprocessor to consume.

in preprocessor_only mode, apart from pragma once, they should all be printed to stdout (unless they are under #ifdef that's not taken).

i've produced a patch that does that, however i'm not really happy about it, since i'm unclear about how to deal
with the pragma in non-preprocessor mode.

i suppose the token-stream of the pragma-directive should be converted into some kind of special symbol, which can then be either consumed or ignored when doing the usual iteration over the symbol list.
but i didn't find an obvious symbol type to assign...

(my plan is to create a build tool based on sparse that gets directives as a special pragma).

thanks!

testcode:
#pragma once
#if 1
#pragma omp wanna "see this" in cpp output
#else
#pragma omp nope "this one not"
#endif

here's the initial version of my patch

Subject: [PATCH] preprocessor: properly emit #pragma directives

3 months after the first commit, handling of pragma directives was
implemented, with the idea of turning the token into __pragma__ for
later internal consumption. however never anything came out of this idea,
and #pragma was basically left broken. apart from #pragma once, the
directive needs to persist in the preprocessor output, it is for later
consumption of the compiler (apparently the assumption was that sparse
would never be used as a stand-alone preprocessor).

however even the emission of the internal symbol name __pragma__ was
broken.
---
 ident-list.h  |  1 -
 pre-process.c | 71 ++++++++++++++++++++++++++++++++---------------------------
 2 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/ident-list.h b/ident-list.h
index 1308757..e2188bc 100644
--- a/ident-list.h
+++ b/ident-list.h
@@ -59,7 +59,6 @@ IDENT_RESERVED(__label__);
  * sparse. */
 IDENT(defined);
 IDENT(once);
-__IDENT(pragma_ident, "__pragma__", 0);
 __IDENT(__VA_ARGS___ident, "__VA_ARGS__", 0);
 __IDENT(__LINE___ident, "__LINE__", 0);
 __IDENT(__FILE___ident, "__FILE__", 0);
diff --git a/pre-process.c b/pre-process.c
index 8800dce..751d9ea 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -211,7 +211,7 @@ static void expand_list(struct token **list)
 	}
 }
 
-static void preprocessor_line(struct stream *stream, struct token **line);
+static int preprocessor_line(struct stream *stream, struct token **line);
 
 static struct token *collect_arg(struct token *prev, int vararg, struct position *pos, int count)
 {
@@ -225,9 +225,10 @@ static struct token *collect_arg(struct token *prev, int vararg, struct position
 			if (!next->pos.noexpand) {
 				sparse_error(next->pos,
 					     "directive in argument list");
-				preprocessor_line(stream, p);
-				__free_token(next);	/* Free the '#' token */
-				continue;
+				if(preprocessor_line(stream, p)) {
+					__free_token(next);	/* Free the '#' token */
+					continue;
+				}
 			}
 		}
 		switch (token_type(next)) {
@@ -1808,31 +1809,25 @@ static int handle_split_include(struct stream *stream, struct token **line, stru
 }
 
 /*
- * We replace "#pragma xxx" with "__pragma__" in the token
- * stream. Just as an example.
- *
- * We'll just #define that away for now, but the theory here
- * is that we can use this to insert arbitrary token sequences
- * to turn the pragmas into internal front-end sequences for
- * when we actually start caring about them.
- *
- * So eventually this will turn into some kind of extended
- * __attribute__() like thing, except called __pragma__(xxx).
+ * pragma, unlike all other directives starting with '#' is not
+ * meant for the preprocessor to consume, but it should be
+ * echoed back exactly as in the source code.
+ * the only exception is pragma once, which is targetting the
+ * cpp. preprocessor_line() assumes that any line starting with '#'
+ * can be dropped and manipulates the list to point to the token after
+ * the next newline. however, we need to undo this here if what we get
+ * is not "pragma once. thus we pass the start token pointing to '#'
+ * unlike all other handler function, so we can safely restore the list.
  */
-static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
+static int handle_pragma(struct stream *stream, struct token **line, struct token *start)
 {
-	struct token *next = *line;
-
+	struct token *token = start->next;
 	if (match_ident(token->next, &once_ident) && eof_token(token->next->next)) {
 		stream->once = 1;
 		return 1;
 	}
-	token->ident = &pragma_ident;
-	token->pos.newline = 1;
-	token->pos.whitespace = 1;
-	token->pos.pos = 1;
-	*line = token;
-	token->next = next;
+
+	*line = start;
 	return 0;
 }
 
@@ -1904,14 +1899,15 @@ static void init_preprocessor(void)
 	counter_macro = 0;
 }
 
-static void handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start)
+/* return 1 if the entire preprocessor line was consumed, 0 if it needs to be preserved */
+static int handle_preprocessor_line(struct stream *stream, struct token **line, struct token *start)
 {
 	int (*handler)(struct stream *, struct token **, struct token *);
 	struct token *token = start->next;
-	int is_normal = 1;
+	int is_normal = 1, ret;
 
 	if (eof_token(token))
-		return;
+		return 1;
 
 	if (token_type(token) == TOKEN_IDENT) {
 		struct symbol *sym = lookup_symbol(token->ident, NS_PREPROCESSOR);
@@ -1932,14 +1928,24 @@ static void handle_preprocessor_line(struct stream *stream, struct token **line,
 		if (false_nesting)
 			goto out;
 	}
+
+	ret = 1; /* was the entire line consumed ? */
+
+	/* pragma handler is special */
+	if(handler == handle_pragma) {
+		token = start;
+		ret = 0;
+	}
+
 	if (!handler(stream, line, token))	/* all set */
-		return;
+		return ret;
 
 out:
 	free_preprocessor_line(token);
+	return 1;
 }
 
-static void preprocessor_line(struct stream *stream, struct token **line)
+static int preprocessor_line(struct stream *stream, struct token **line)
 {
 	struct token *start = *line, *next;
 	struct token **tp = &start->next;
@@ -1952,7 +1958,7 @@ static void preprocessor_line(struct stream *stream, struct token **line)
 	}
 	*line = next;
 	*tp = &eof_token_entry;
-	handle_preprocessor_line(stream, line, start);
+	return handle_preprocessor_line(stream, line, start);
 }
 
 static void do_preprocess(struct token **list)
@@ -1964,9 +1970,10 @@ static void do_preprocess(struct token **list)
 
 		if (next->pos.newline && match_op(next, '#')) {
 			if (!next->pos.noexpand) {
-				preprocessor_line(stream, list);
-				__free_token(next);	/* Free the '#' token */
-				continue;
+				if(preprocessor_line(stream, list)) {
+					__free_token(next);	/* Free the '#' token */
+					continue;
+				}
 			}
 		}
 
-- 
2.13.3


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [RFC] proper handling of #pragma
  2018-01-03 16:14 [RFC] proper handling of #pragma foobar
@ 2018-01-04 17:42 ` Christopher Li
  2018-01-20 15:12 ` Luc Van Oostenryck
  1 sibling, 0 replies; 3+ messages in thread
From: Christopher Li @ 2018-01-04 17:42 UTC (permalink / raw)
  To: foobar; +Cc: Linux-Sparse

On Thu, Jan 4, 2018 at 12:14 AM, foobar <foobar@redchan.it> wrote:
> #pragma directives other than #pragma once are not really meant for the preprocessor to consume.

I am not sure I understand this. #pragma is not part of the normal C
language grammar.
It should be consumed at the preprocessor stage, it has some side effect
for the compiler.

>
> in preprocessor_only mode, apart from pragma once, they should all be printed to stdout (unless they are under #ifdef that's not taken).

I wonder what does the C spec say about this.

> i've produced a patch that does that, however i'm not really happy about it, since i'm unclear about how to deal
> with the pragma in non-preprocessor mode.
>
> i suppose the token-stream of the pragma-directive should be converted into some kind of special symbol, which can then be either consumed or ignored when doing the usual iteration over the symbol list.
> but i didn't find an obvious symbol type to assign...

If it is preprocessor symbol. It does not need to have C type. It can
have the name space
set to preprocessor. In theory you can also have a symbol_list keep track of
all the pragma you see in the source file. Not sure that will serve
you any purpose.

> (my plan is to create a build tool based on sparse that gets directives as a special pragma).
>
> thanks!
>
> testcode:
> #pragma once
> #if 1
> #pragma omp wanna "see this" in cpp output
> #else
> #pragma omp nope "this one not"
> #endif
>
> here's the initial version of my patch
>
> Subject: [PATCH] preprocessor: properly emit #pragma directives
>
> 3 months after the first commit, handling of pragma directives was
> implemented, with the idea of turning the token into __pragma__ for
> later internal consumption. however never anything came out of this idea,
> and #pragma was basically left broken. apart from #pragma once, the
> directive needs to persist in the preprocessor output, it is for later
> consumption of the compiler (apparently the assumption was that sparse
> would never be used as a stand-alone preprocessor).

If __pragma__ exist pass the preprocessor stage.
It can appear in any part of the C source file. The C parser need
to consume it at almost any context. That will really complicate stuff.
I think that is a bad idea.

There might be other ways to do what you want.

If it is pre-processor only, then that might be fine. Because there is
no C parser to consume the output.

Chris

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [RFC] proper handling of #pragma
  2018-01-03 16:14 [RFC] proper handling of #pragma foobar
  2018-01-04 17:42 ` Christopher Li
@ 2018-01-20 15:12 ` Luc Van Oostenryck
  1 sibling, 0 replies; 3+ messages in thread
From: Luc Van Oostenryck @ 2018-01-20 15:12 UTC (permalink / raw)
  To: foobar; +Cc: linux-sparse

On Wed, Jan 03, 2018 at 04:14:19PM +0000, foobar wrote:
> #pragma directives other than #pragma once are not really meant for the preprocessor to consume.
> 
> in preprocessor_only mode, apart from pragma once, they should all be printed to stdout (unless they are under #ifdef that's not taken).
> 
> i've produced a patch that does that, however i'm not really happy about it, since i'm unclear about how to deal
> with the pragma in non-preprocessor mode.
> 
> i suppose the token-stream of the pragma-directive should be converted into some kind of special symbol, which can then be either consumed or ignored when doing the usual iteration over the symbol list.
> but i didn't find an obvious symbol type to assign...

Here under, I've added a draft patch for handling the top level pragmas.
Something must also be added when parsing statements (especially for omp pragmas),
and probably also at declaration level.

From d0d375a35b0803e0ac43eac09123e949a7c890c6 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Sat, 6 Jan 2018 05:24:43 +0100
Subject: [PATCH 1/3] pragma: allow to parse pragmas as toplevel

Just get all the pragma's tokens but for now ignore them
as well as the pragma token itself.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
---
 parse.c      | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/parse.c b/parse.c
index e255345fd..5c52ce727 100644
--- a/parse.c
+++ b/parse.c
@@ -76,6 +76,7 @@ static struct token *parse_range_statement(struct token *token, struct statement
 static struct token *parse_asm_statement(struct token *token, struct statement *stmt);
 static struct token *toplevel_asm_declaration(struct token *token, struct symbol_list **list);
 static struct token *parse_static_assert(struct token *token, struct symbol_list **unused);
+static struct token *toplevel_pragma(struct token *token, struct symbol_list *     *list);
 
 typedef struct token *attr_t(struct token *, struct symbol *,
 			     struct decl_state *);
@@ -341,6 +342,10 @@ static struct symbol_op static_assert_op = {
 	.toplevel = parse_static_assert,
 };
 
+static struct symbol_op pragma_op = {
+	.toplevel = toplevel_pragma,
+};
+
 static struct symbol_op packed_op = {
 	.attribute = attribute_packed,
 };
@@ -482,6 +487,8 @@ static struct init_keyword {
 	/* Static assertion */
 	{ "_Static_assert", NS_KEYWORD, .op = &static_assert_op },
 
+	{ "#pragma",	NS_KEYWORD, .op = &pragma_op },
+
 	/* Storage class */
 	{ "auto",	NS_TYPEDEF, .op = &auto_op },
 	{ "register",	NS_TYPEDEF, .op = &register_op },
@@ -2777,6 +2784,16 @@ static struct token *toplevel_asm_declaration(struct token *token, struct symbol
 	return token;
 }
 
+static struct token *toplevel_pragma(struct token *token, struct symbol_list **list)
+{
+	// ignore for now
+	// exact handling here depends on what is reinserted
+	do
+		token = token->next;
+	while (!token->pos.newline);
+
+	return token;
+}
+
 struct token *external_declaration(struct token *token, struct symbol_list **list,
 		validate_decl_t validate_decl)
 {
-- 

For the special symbol, it's not absolutely needed but it may make things a bit neater.

> diff --git a/ident-list.h b/ident-list.h
> index 1308757..e2188bc 100644
> --- a/ident-list.h
> +++ b/ident-list.h
> @@ -59,7 +59,6 @@ IDENT_RESERVED(__label__);
>   * sparse. */
>  IDENT(defined);
>  IDENT(once);
> -__IDENT(pragma_ident, "__pragma__", 0);

You'll want to keep this one or something very similar.

>  __IDENT(__VA_ARGS___ident, "__VA_ARGS__", 0);
>  __IDENT(__LINE___ident, "__LINE__", 0);
>  __IDENT(__FILE___ident, "__FILE__", 0);
> diff --git a/pre-process.c b/pre-process.c
> index 8800dce..751d9ea 100644
> --- a/pre-process.c
> +++ b/pre-process.c
> @@ -211,7 +211,7 @@ static void expand_list(struct token **list)
>  	}
>  }

As I understand it, all your remaining changes are there to go around the fact
that the current code seems to be designed to simply drop everything after
a preprocessor directive.
It's true, but it's also quite easy to simply reinsert what is needed
in the next lines. Something like:

From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Sat, 6 Jan 2018 04:34:49 +0100
Subject: [PATCH 2/2] pragma: insert back the pragma's args into the stream

Currently, the pragma directives are replaced with an internal
identifier and all the corresponding arguments are simply dropped.
These identifier are then also dropped by the following call to
expand_one_symbol().

Change this to:
- mark the identifier as noexpand so it's not dropped anymore
- insert back all the arguments into the stream.
---
 pre-process.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/pre-process.c b/pre-process.c
index 8800dce53..577f6d0c2 100644
--- a/pre-process.c
+++ b/pre-process.c
@@ -1808,16 +1808,11 @@ static int handle_split_include(struct stream *stream, struct token **line, stru
 }
 
 /*
- * We replace "#pragma xxx" with "__pragma__" in the token
- * stream. Just as an example.
- *
- * We'll just #define that away for now, but the theory here
- * is that we can use this to insert arbitrary token sequences
- * to turn the pragmas into internal front-end sequences for
- * when we actually start caring about them.
- *
- * So eventually this will turn into some kind of extended
+ * Eventually this will turn into some kind of extended
  * __attribute__() like thing, except called __pragma__(xxx).
+ *
+ * For now we just replace '#' 'pragma' xxx... by an internal
+ * identifier.
  */
 static int handle_pragma(struct stream *stream, struct token **line, struct token *token)
 {
@@ -1831,7 +1826,13 @@ static int handle_pragma(struct stream *stream, struct token **line, struct toke
 	token->pos.newline = 1;
 	token->pos.whitespace = 1;
 	token->pos.pos = 1;
+	token->pos.noexpand = 1;
 	*line = token;
+
+	// search for the end-of-line
+	while (!eof_token(token->next))
+		token = token->next;
+	// a special symbol could make this loop unneeded.
+	// and insert the whole line in the stream
 	token->next = next;
 	return 0;
 }
-- 

Cheers,
-- Luc Van Oostenryck

^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2018-01-20 15:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-03 16:14 [RFC] proper handling of #pragma foobar
2018-01-04 17:42 ` Christopher Li
2018-01-20 15:12 ` Luc Van Oostenryck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.