From: Jaskaran Singh <jaskaran.singh@collabora.com>
To: cocci@systeme.lip6.fr
Subject: [Cocci] [PATCH v2 1/3] parsing_c: includes_cache: Implement a name cache
Date: Thu, 10 Sep 2020 11:47:01 +0530 [thread overview]
Message-ID: <20200910061703.2397-2-jaskaran.singh@collabora.com> (raw)
In-Reply-To: <20200910061703.2397-1-jaskaran.singh@collabora.com>
Implement a name cache and includes dependency graph to optimize
performance for recursive parsing of header files.
The following is a high-level description of what has been implemented:
- As header files are recursively parsed, they are scanned for the
following:
- fields of structs/unions/enums
- typedefs
- function prototypes
- global variables
The names of the above are stored in a "name cache", i.e. a hashtable
to map the name to the files it is declared in.
- A dependency graph is built to determine dependencies between all the
files in the codebase.
- In the type annotation phase of the C subsystem, if a function call,
struct/union field or identifier is encountered, the type of which is
not known to the annoter, the name cache is checked for the name.
- The name cache gives a list of files that the name is declared/defined
in. These files are cross checked with the dependency graph to
determine if any of these are reachable by the file that the annoter is
working on.
- If a reachable header file is found, that file is parsed and all of
the above listed constructs are extracted from it.
Suggested-by: Julia Lawall <julia.lawall@inria.fr>
Signed-off-by: Jaskaran Singh <jaskaran.singh@collabora.com>
---
Makefile | 2 +-
parsing_c/includes_cache.ml | 290 +++++++++++++++++++++++++++++++++++
parsing_c/includes_cache.mli | 47 ++++++
3 files changed, 338 insertions(+), 1 deletion(-)
create mode 100644 parsing_c/includes_cache.ml
create mode 100644 parsing_c/includes_cache.mli
diff --git a/Makefile b/Makefile
index e25174413..f8d3424c0 100644
--- a/Makefile
+++ b/Makefile
@@ -50,7 +50,7 @@ SOURCES_parsing_cocci := \
SOURCES_parsing_c := \
token_annot.ml flag_parsing_c.ml parsing_stat.ml \
token_c.ml ast_c.ml includes.ml control_flow_c.ml \
- visitor_c.ml lib_parsing_c.ml control_flow_c_build.ml \
+ visitor_c.ml lib_parsing_c.ml includes_cache.ml control_flow_c_build.ml \
pretty_print_c.ml semantic_c.ml lexer_parser.ml parser_c.mly \
lexer_c.mll parse_string_c.ml token_helpers.ml token_views_c.ml \
cpp_token_c.ml parsing_hacks.ml cpp_analysis_c.ml \
diff --git a/parsing_c/includes_cache.ml b/parsing_c/includes_cache.ml
new file mode 100644
index 000000000..ca5c91822
--- /dev/null
+++ b/parsing_c/includes_cache.ml
@@ -0,0 +1,290 @@
+(*
+ * This file is part of Coccinelle, licensed under the terms of the GPL v2.
+ * See copyright.txt in the Coccinelle source code for more information.
+ * The Coccinelle source code can be obtained at http://coccinelle.lip6.fr
+ *)
+
+open Common
+module Lib = Lib_parsing_c
+
+(*****************************************************************************)
+(* Wrappers *)
+(*****************************************************************************)
+let pr_inc s =
+ if !Flag_parsing_c.verbose_includes
+ then Common.pr2 s
+
+(*****************************************************************************)
+(* Graph types/modules *)
+(*****************************************************************************)
+
+(* Filenames as keys to check paths from file A to file B. *)
+module Key : Set.OrderedType with type t = Common.filename = struct
+ type t = Common.filename
+ let compare = String.compare
+end
+
+module KeySet = Set.Make (Key)
+
+module KeyMap = Map.Make (Key)
+
+module Node : Set.OrderedType with type t = unit = struct
+ type t = unit
+ let compare = compare
+end
+
+module Edge : Set.OrderedType with type t = unit = struct
+ type t = unit
+ let compare = compare
+end
+
+module KeyEdgePair : Set.OrderedType with type t = Key.t * Edge.t =
+struct
+ type t = Key.t * Edge.t
+ let compare = compare
+end
+
+module KeyEdgeSet = Set.Make (KeyEdgePair)
+
+module G = Ograph_simple.Make
+ (Key) (KeySet) (KeyMap) (Node) (Edge) (KeyEdgePair) (KeyEdgeSet)
+
+
+(*****************************************************************************)
+(* Includes dependency graph *)
+(*****************************************************************************)
+
+(* Header file includes dependency graph *)
+let dependency_graph = ref (new G.ograph_mutable)
+
+(* Check if a path exists between one node to another.
+ * Almost a copy of dfs_iter in commons/ograph_extended.ml with minor changes.
+ * Return true if g satisfies predicate f else false
+ *)
+let dfs_exists xi f g =
+ let already = Hashtbl.create 101 in
+ let rec aux_dfs xs =
+ let h xi =
+ if Hashtbl.mem already xi
+ then false
+ else
+ begin
+ Hashtbl.add already xi true;
+ if f xi
+ then true
+ else
+ begin
+ let f' (key, _) keyset = KeySet.add key keyset in
+ let newset =
+ try KeyEdgeSet.fold f' (g#successors xi) KeySet.empty
+ with Not_found -> KeySet.empty in
+ aux_dfs newset
+ end
+ end in
+ KeySet.exists h xs in
+ aux_dfs (KeySet.singleton xi)
+
+let add_to_dependency_graph parent file =
+ let add_node a =
+ if not (KeyMap.mem a !dependency_graph#nodes)
+ then !dependency_graph#add_node a () in
+ let add_arc (a, b) =
+ add_node a;
+ add_node b;
+ !dependency_graph#add_arc (a, b) () in
+ add_arc (parent, file)
+
+let path_exists_dependency_graph filea fileb =
+ dfs_exists filea (fun a -> a = fileb) !dependency_graph
+
+let print_dependency_graph _ =
+ G.print_ograph_generic
+ ~str_of_key:(fun k -> "\"" ^ k ^ "\"")
+ ~str_of_node:(fun k _ -> k)
+ "/tmp/headers.dot"
+ !dependency_graph
+
+
+(*****************************************************************************)
+(* Name cache *)
+(*****************************************************************************)
+
+type cache_exp =
+ | CacheField of string (* Name of the struct/union it is defined in *)
+ | CacheEnumConst
+ | CacheVarFunc
+ | CacheTypedef
+
+(* Not very elegant. Basically a copy-paste of namedef in type_annoter_c. *)
+type cache_return =
+ | RetVarOrFunc of string * Ast_c.exp_type
+ | RetEnumConstant of string * string option
+ | RetTypeDef of string * Ast_c.fullType
+ | RetStructUnionNameDef of string * (Ast_c.structUnion * Ast_c.structType)
+ Ast_c.wrap
+
+(* Map name to list of files it is defined in *)
+let name_cache : (string, (filename * cache_exp) list) Hashtbl.t ref =
+ ref (Hashtbl.create 101)
+
+let add_to_name_cache name (file, exp) =
+ let l =
+ try Hashtbl.find !name_cache name
+ with Not_found -> [] in
+ Hashtbl.replace !name_cache name ([(file, exp)] @ l)
+
+(* Visitor to cache names in a given file. *)
+let cache_name_visitor file =
+
+ let cache_struct_fields sname def =
+ let _cache_field field =
+ match (Ast_c.unwrap field) with
+ Ast_c.Simple (name, _)
+ | Ast_c.BitField (name, _, _, _) ->
+ name +>
+ do_option
+ (fun n ->
+ add_to_name_cache (Ast_c.str_of_name n)
+ (file, CacheField(sname))) in
+ let _cache_struct_fields field =
+ match field with
+ Ast_c.DeclarationField(Ast_c.FieldDeclList(l)) ->
+ List.iter _cache_field (Ast_c.unwrap l)
+ | _ -> () in
+ List.iter _cache_struct_fields def in
+
+ let cache_enum_constants def =
+ def +>
+ List.iter
+ (fun ec ->
+ add_to_name_cache
+ ((Ast_c.unwrap ec) +> fst +> Ast_c.str_of_name)
+ (file, CacheEnumConst)) in
+
+ { Visitor_c.default_visitor_c with
+ Visitor_c.ktoplevel = fun (k, bigf) p ->
+ match p with
+ Ast_c.Declaration
+ (Ast_c.DeclList (defs, _)) ->
+ let cache_name x =
+ (match (Ast_c.unwrap x) with
+ {Ast_c.v_namei = Some (name, _);
+ Ast_c.v_type = typ;
+ Ast_c.v_storage = strg} ->
+ (* Cache typedefs/variables/functions *)
+ let exp_type =
+ match (fst strg) with
+ Ast_c.StoTypedef -> CacheTypedef
+ | _ -> CacheVarFunc in
+ add_to_name_cache
+ (Ast_c.str_of_name name) (file, exp_type)
+ | {Ast_c.v_namei = None; Ast_c.v_type = typ} ->
+ (match (Ast_c.unwrap (snd typ)) with
+ Ast_c.StructUnion (_, Some n, def) ->
+ (* Cache field names *)
+ cache_struct_fields n def
+ | Ast_c.Enum(_, def) ->
+ (* Cache enumeration constants *)
+ cache_enum_constants def
+ | _ -> k p)) in
+ List.iter cache_name defs
+ | _ -> k p
+ }
+
+
+let get_type_visitor file l =
+ let add_to_ret ret =
+ l := [ret] @ !l in
+ let get_enum_constants sopt def =
+ def +>
+ List.iter
+ (fun ec ->
+ let s = (Ast_c.unwrap ec) +> fst +> Ast_c.str_of_name in
+ add_to_ret (RetEnumConstant(s, sopt))) in
+ { Visitor_c.default_visitor_c with
+ Visitor_c.ktoplevel = fun (k, bigf) p ->
+ match p with
+ Ast_c.Declaration
+ (Ast_c.DeclList (defs, _)) ->
+ let get_name x =
+ (match (Ast_c.unwrap x) with
+ {Ast_c.v_namei = Some (n, _);
+ Ast_c.v_type = typ;
+ Ast_c.v_storage = strg} ->
+ (* Cache typedefs/variables/functions *)
+ let f _ =
+ let s = Ast_c.str_of_name n in
+ match (fst strg) with
+ Ast_c.StoTypedef ->
+ add_to_ret (RetTypeDef(s, typ))
+ | Ast_c.NoSto
+ | Ast_c.Sto _ ->
+ add_to_ret
+ (RetVarOrFunc(s, (typ, Ast_c.NotLocalVar))) in
+ f ()
+ | {Ast_c.v_namei = None; Ast_c.v_type = typ} ->
+ (match (Ast_c.unwrap (snd typ)) with
+ Ast_c.StructUnion (su, snameopt, def) ->
+ (match snameopt with
+ Some s ->
+ let def' = Lib.al_fields def in
+ let ii = Ast_c.get_ii_typeC_take_care (snd typ) in
+ let ii' = Lib.al_ii ii in
+ add_to_ret
+ (RetStructUnionNameDef (s, ((su, def'),ii')))
+ | None -> k p)
+ | Ast_c.Enum(sopt, def) ->
+ get_enum_constants sopt def
+ | _ -> k p)) in
+ List.iter get_name defs
+ | _ -> k p
+ }
+
+let extract_names file ast =
+ ignore (Visitor_c.vk_program (cache_name_visitor file) ast)
+
+(* Use the visitor to get all the types from a file. *)
+let get_types_from_name_cache file name exp_types parse_fn =
+ if !Includes.include_headers_for_types && Includes.is_header file
+ then []
+ else
+ let rec find_file l =
+ match l with
+ [] -> None
+ | (x, y)::xs ->
+ if List.mem y exp_types
+ then
+ begin
+ if path_exists_dependency_graph file x
+ then Some (x, y)
+ else find_file xs
+ end
+ else find_file xs in
+ let file_list =
+ try Hashtbl.find !name_cache name
+ with Not_found -> [] in
+ match (find_file file_list) with
+ None -> []
+ | Some (x, t) ->
+ let ast = parse_fn x in
+ let env = ref [] in
+ ignore
+ (Visitor_c.vk_program
+ (get_type_visitor file env) ast);
+ !env
+
+
+(*****************************************************************************)
+(* Set of parsed files *)
+(*****************************************************************************)
+
+module StringSet = Set.Make (String)
+
+(* Set of files parsed *)
+let seen_files = ref (StringSet.empty)
+
+let has_been_parsed file =
+ StringSet.mem file !seen_files
+
+let add_to_parsed_files file =
+ seen_files := StringSet.add file !seen_files
diff --git a/parsing_c/includes_cache.mli b/parsing_c/includes_cache.mli
new file mode 100644
index 000000000..2384cb2f7
--- /dev/null
+++ b/parsing_c/includes_cache.mli
@@ -0,0 +1,47 @@
+(*
+ * This file is part of Coccinelle, licensed under the terms of the GPL v2.
+ * See copyright.txt in the Coccinelle source code for more information.
+ * The Coccinelle source code can be obtained at http://coccinelle.lip6.fr
+ *)
+
+(*****************************************************************************)
+(* Includes dependency graph *)
+(*****************************************************************************)
+
+val add_to_dependency_graph : Common.filename -> Common.filename -> unit
+
+val print_dependency_graph : unit -> unit
+
+(*****************************************************************************)
+(* Name cache *)
+(*****************************************************************************)
+
+type cache_exp =
+ | CacheField of string (* Name of the struct/union it is defined in *)
+ | CacheEnumConst
+ | CacheVarFunc
+ | CacheTypedef
+
+type cache_return =
+ | RetVarOrFunc of string * Ast_c.exp_type
+ | RetEnumConstant of string * string option
+ | RetTypeDef of string * Ast_c.fullType
+ | RetStructUnionNameDef of string * (Ast_c.structUnion * Ast_c.structType)
+ Ast_c.wrap
+
+val extract_names : Common.filename -> Ast_c.program -> unit
+
+val get_types_from_name_cache :
+ Common.filename ->
+ string ->
+ cache_exp list ->
+ (Common.filename -> Ast_c.program) ->
+ cache_return list
+
+(*****************************************************************************)
+(* Set of parsed files *)
+(*****************************************************************************)
+
+val has_been_parsed : Common.filename -> bool
+
+val add_to_parsed_files : Common.filename -> unit
--
2.21.3
_______________________________________________
Cocci mailing list
Cocci@systeme.lip6.fr
https://systeme.lip6.fr/mailman/listinfo/cocci
next prev parent reply other threads:[~2020-09-10 6:18 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-10 6:17 [Cocci] [PATCH v2 0/3] parsing_c: Optimize recursive header file parsing Jaskaran Singh
2020-09-10 6:17 ` Jaskaran Singh [this message]
2020-09-10 6:17 ` [Cocci] [PATCH v2 2/3] parsing_c: parse_c: Build name cache and includes dependency graph Jaskaran Singh
2020-09-10 6:17 ` [Cocci] [PATCH v2 3/3] parsing_c: type_annoter_c: Use name cache for type annotation Jaskaran Singh
2020-09-10 6:19 ` [Cocci] [PATCH v2 0/3] parsing_c: Optimize recursive header file parsing Jaskaran Singh
2020-09-15 19:23 ` Julia Lawall
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200910061703.2397-2-jaskaran.singh@collabora.com \
--to=jaskaran.singh@collabora.com \
--cc=cocci@systeme.lip6.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).