git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-cget: prints elements of C code in the git repository
@ 2009-03-24 10:09 Roel Kluin
  2009-03-24 12:05 ` Johannes Schindelin
  0 siblings, 1 reply; 16+ messages in thread
From: Roel Kluin @ 2009-03-24 10:09 UTC (permalink / raw)
  To: git

Maybe something like this is useful?
------------------------------>8-------------8<---------------------------------
Add git-cget.sh: prints elements of C code in the git repository.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
---
diff --git a/Makefile b/Makefile
index 1087884..c21ba91 100644
--- a/Makefile
+++ b/Makefile
@@ -260,6 +260,7 @@ BASIC_LDFLAGS =
 SCRIPT_SH += git-am.sh
 SCRIPT_SH += git-bisect.sh
 SCRIPT_SH += git-filter-branch.sh
+SCRIPT_SH += git-cget.sh
 SCRIPT_SH += git-lost-found.sh
 SCRIPT_SH += git-merge-octopus.sh
 SCRIPT_SH += git-merge-one-file.sh
diff --git a/git-cget.sh b/git-cget.sh
new file mode 100755
index 0000000..08ea65f
--- /dev/null
+++ b/git-cget.sh
@@ -0,0 +1,409 @@
+#!/bin/bash
+# FIXME: make C++ style members
+
+int="[0-9]"
+hex="[a-f0-9]"
+hEx="[A-Fa-f0-9]"
+HEX="[A-F0-9]"
+upp="[A-Z]"
+up_="[A-Z_]"
+low="[a-z0-9]"
+lo_="[a-z0-9_]"
+alp="[A-Za-z]"
+al_="[A-Za-z_]"
+ALN="[A-Z0-9]"
+AN_="[A-Z0-9_]"
+aln="[A-Za-z0-9]"
+an_="[A-Za-z0-9_]"
+
+em='!'			# because of bash banging
+
+D="$int*\.?$int+x?$hex*[uUlL]{0,3}[fF]?"		# a number, float or hex
+# more strict and catches it (costs one backreference for (git )grep)
+SD="($int+[uUlLfF]?|$int+[uU]?[lL][lL]?|0x$hex+|0x$HEX+|$int+[lL][lL][uU]|$int*\.$int+[fF]?)"
+
+V="$al_+$an_*"		# variable/function name (or definition)
+K="$up_+$AN_*"		# definition (in capitals)
+
+# to catch variables that are members or arrays:
+W="[a-zA-Z0-9_>.-]*"
+SW="$V(\[[^][]*\]|\[[^][]*\[[^][]*\][^][]*\]|\.$V|->$V)*"	 # more strict, 1 backref
+
+s="[[:space:]]*"
+S="[[:space:]]+"
+
+# useful to ensure the end of a variable name:
+Q="[^[:alnum:]_]"
+Q2="[^[:alnum:]_>.]" # the '>' is tricky, it's an operator as well
+
+# match comments
+comm1="\/\*([^*]+|\**[^*/])*\*+\/"				# 1 backref
+comm2="\/\/([^\n]+|[n\\]+)*"				# 1 backref
+
+# match the end of the line, including comments:
+cendl="$s($comm1|$comm2|$s)*($|\n)"			 # 4 backrefs
+
+# strings and characters can contain things we want to match
+str="\"([^\\\"]+|\\\\.)*\""						 # 1 backref
+ch1="'[^\\']'"
+ch2="'\\\\.[^']*'"
+ch="$ch1|$ch2"
+
+# when using grep [^\/] also cases excludes '\', so we correct for that
+# match something that is not comment, string or character (c-code): 3 backrefs
+ccode="([^\"'/]+|\/[^*\"'/]|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?\\\\|\/?$comm2)*"
+
+# TODO: use backref cheaper versions
+nps="[^()]*"
+nstdps="(\($nps(\($nps(\($nps(\($nps(\($nps\)$nps)*\)$nps)*\)$nps)*\)$nps)*\)$nps)*"
+npz="$nps$nstdps"
+nnps="\($npz\)"
+
+ncs="[^}{]*"
+nstdcs="(\{$ncs(\{$ncs(\{$ncs(\{$ncs(\{$ncs\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*"
+ncz="$ncs$nstdcs"
+nncs="\{$ncz\}"
+
+delimitstr="s/([][{}(|)+*?\\/.^])/\\\\\1/g"
+delimit()
+{
+	sed -r "$delimitstr"
+}
+
+# unmatch: match anything except the given.
+#
+# If given is a string, then it also matches longer
+# or shorter strings.
+#
+# otherwise it delimits characters and then creates a match
+#
+unmatch()
+{
+	local sedstr=
+	# TODO: unmatch numbers
+	# if [ -n "`echo "$1" | grep -E "^$SD$"`" ]; then
+	if [ -n "`echo "$1" | grep -E "^$V$"`" ]; then
+		sedstr=":loop
+			s/^($an_+)?($an_)(\|.*)?$/\1\|\1[^\2]\3/
+			t loop
+			s/^[|](.*)$/($al_$an_{${#1}}|\1)$an_*/"
+	fi
+	# FIXME: unmatch non alnum characters:
+	# with the below "\*" results in ([^\\]|\\[^*])*,
+	# but it should be ([^\\]|\\+[^*\\])*
+	#else
+	#	local a="[^][{}(|)+*?\\/.^]"
+	#	local b="[][{}(|)+*?\\/.^]"
+	#	sedstr="$delimitstr
+	#		:loop
+	#		s/^(($a|[\\]$b)+)?(($a)|[\\]($b))(\|.*)?$/\1\|\1[^\4\5]\6/
+	#		t loop
+	#		s/^[|](.*)$/(\1)*/"
+	#fi
+	echo $1 | sed -r "$sedstr"
+}
+
+# excludes testing in strings, chars and comment
+excl_code()
+{
+	echo "([^$1\"'/$2]*|\/[^$1\"*'/$2]*|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?$comm2)*"
+}
+
+# usage: nestc "(" ")" [number]
+# only works for nesting single chars. (TODO: multiple)
+nestc()
+{
+	local i;
+	[ $# -eq 1 ] && i=5 || i=$3;
+	# first and 2nd are flipped to enable matching
+	# square brackets "]["
+	local p="$(excl_code "$2$1" "$4")"
+	local ret="$p"
+	while [ $i -gt 0 ]; do
+		ret="${p}([$1]${ret}[$2]${p})*"
+		i=$(($i-1));
+	done
+	echo "$ret"
+}
+
+blank_it()
+{
+	echo "$1" | sed -r "
+		:a
+		s/([[:alnum:]])[[:space:]]+([[:alnum:]])/\1[[:space:]]+\2/g
+		s/[[:space:]]+/[[:space:]]*/g
+		$!{
+			N; ba
+		}"
+}
+
+# usage: grep_12 "struct list_head {" "include/linux/slub_def.h"
+# greps included as well, when not found
+grep_12()
+{
+	flist=
+	_grep_12 "$1" "$2"
+}
+
+_grep_12()
+{
+	grep -qE "$1" "$2";
+	if [ $? -eq 0 ]; then
+		echo "$2";
+	else
+		[ -n "$flist" ] && flist="$flist|"
+		flist="$flist$2"
+		local b="${2%\/*}"
+		for f in `sed -rn "/^$s#${s}include${s}[\"<]/{
+				$delimitstr
+				s/^$s#${s}include$s<([^>]+)>$cendl/include\/\1/p
+				s/^$s#${s}include$s\"([^\"]+)\"$cendl/${b//\//\/}\/\1/p
+				}" "$2" | grep -vE "($flist)" `; do
+			[ -f "$f" ] && _grep_12 "$1" "$f" && return 0;
+			b="${f%\/*}"
+		done
+	fi
+}
+
+get_matching_files()
+{
+	local files="${@:2}"
+	if [ -z "$files" ]; then
+		grep -lE "$1" $(git-ls-files "*.[ch]");
+		return;
+	fi
+	for f in $files; do
+		[ -f "$f" ] && echo -n "$(grep_12 "$1" "$f") "
+		[ -d "$f" ] && grep -lE "$1" $(git-ls-files "*.[ch]")
+	done
+}
+
+
+_sedit()
+{
+	sed -r -n "
+		:a
+		/$1/${em}b
+		/$2/${em}{
+			H; N; ba
+		}
+		$4
+		:b
+		/$3/${em}{
+			H; N; bb
+		}
+		$5
+	" "${@:6}"
+}
+
+sedit()
+{
+	local print2="p"
+	[ $(($1 & 4 )) -eq 4 ] && print2="="
+
+	local stages="` blank_it "$2@$3@$4" | tr "@" " "`"
+	local sed2str="/ \+@$/{N; s/@\n//}";
+	if [ $(($1 & 1 )) -eq 1 ]; then
+		_sedit $stages "#" "$print2" "${@:5}" | sed -r "$sed2str";
+	else
+		local print1=
+		[ $(($1 & 2 )) -eq 0 ] && print1="`echo -e "+@\n="`"
+		[ $1 -eq 4 ] && sed2str="/ \+@$/{N; N; s/\/\/ vi ([^+]*) \+@\n([0-9]*)\n([0-9]*)/\1:\2-\3/}"
+
+		for f in "${@:5}"; do
+			_sedit $stages "i// vi $f $print1" "$print2" "$f"
+		done | sed -r "$sed2str";
+	fi
+}
+
+get_func()
+{
+	# TODO: C++ member stuff
+	local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+	local func_args="$(nestc "(" ")" 10 "+%/?#&|<>.^-")"
+	local func_body="$(nestc "{" "}" 10)"
+
+	local fl="${@:3}"
+	if [ -z "$fl" ]; then
+		local match="^($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?$";
+		match="`blank_it "$match"`";
+		fl="`git-ls-files "*.[ch]"`"
+		fl="`grep -lE "$match" $fl`";
+		[ -z "$fl" ] && return;
+	fi
+
+	local stage1="^$fret ($2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?)?$";
+	local stage2="^$fret $2 \($func_args\) \{";
+	local stage3="^$fret $2 \($func_args\) \{$func_body\}";
+	sedit $1 "$stage1" "$stage2" "$stage3" $fl
+}
+
+
+# get struct/enum/union
+get_elem()
+{
+	local sA="($S|$comm1)"
+	local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+	local struct_body="$(nestc "{" "}" 10)"
+	local stage1=;
+	local stage2=;
+	local stage3=;
+
+	local match="^ (($sB$2)?$sA$sB)?$3($sA$sB)?((\{$struct_body)+\}?)?$cendl"
+	match="`blank_it "$match"`"
+	local fl="`get_matching_files "$match" ${@:4}`"
+	if [ -n "$fl" ]; then
+		stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2($sA$sB($3($sA$sB)?((\{$struct_body)+\}?)?)?)?)$cendl"
+		stage2="^ $sB$2$sA$sB$3($sA$sB)? \{"
+		stage3="^ $sB$2$sA$sB$3($sA$sB)? \{$struct_body\}"
+		sedit $1 "$stage1" "$stage2" "$stage3" $fl
+	fi
+
+	match="^( (($sB$2)?($sA$sB| )\{)?($struct_body\})+)? $sB$3($sA$sB| );$cendl"
+	match="`blank_it "$match"`"
+	fl="`get_matching_files "$match" ${@:4}`"
+	if [ -n "$fl" ]; then
+		stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2(($sA$sB| )((\{$struct_body)+(\} $sB($3($sA$sB| );)?)?)?)?)$cendl"
+		stage2="^ $sB$2($sA$sB| )\{"
+		stage3="^ $sB$2($sA$sB| )\{$struct_body\} $sB$3($sA$sB| );$cendl"
+		# there can be false positives due to stage1.
+		# here we remove them
+		sedit $1 "$stage1" "$stage2" "$stage3" $fl |
+			sed -r "/^\/\/ vi .* \+[0-9]*$/{
+				:a
+				$!{
+					d; b
+				}
+				N
+				/\/\/ vi .* \+[0-9]*\n\/\/ vi /${em}b
+				D; ba
+			}";
+
+	fi
+}
+
+get_def()
+{
+	local stage1="^ # define$S$2$Q"
+	local match="`blank_it "$stage1"`";
+	local stage2="^"
+	local stage3="[^\\]$"
+	sedit $1 "$stage1" "$stage2" "$stage3" `get_matching_files "$match" ${@:3}`
+}
+
+get_any()
+{
+	local defm=" # define$S$2$Q"
+	local sA="($S|$comm1)"
+	local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+	local body="$(nestc "{" "}" 10)"
+
+	local elemsl="(($sB(struct|enum|union))?$sA$sB)?"
+	local elemsr1="$2($sA$sB)?((\{$body)+\}?)?"
+
+	local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+	local func_args="$(nestc "(" ")" 10 "+%/?#^&|<>.-")"
+	local funcm="($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$body)+\}?)?)?)?";
+
+	local match="`blank_it "^($defm| $elemsl$elemsr1$| (($elemsl\{)?($body\})+)? $sB$2$sA$sB;$|$funcm$)"`";
+	defm="`blank_it "^$defm"`"
+	funcm="`blank_it "^$funcm$"`"
+	git-grep -E "$match" | while read l; do
+		f="${l%:*}"
+		l="${l#*:}"
+		if [ "`echo "$l" | grep -E "^$defm"`" ]; then
+			get_def $1 "$2" $f
+		elif [ "`echo "$l" | grep -E "^$funcm$"`" ]; then
+			get_func $1 "$2" $f
+		else
+			for e in struct enum union; do
+				z="(($sB$e)?$sA$sB)?"
+				echo "$l" | grep -Eq "`blank_it "^ ( $z$elemsr1| (($z\{)?($body\})+)? $sB$2$sA$sB;)$cendl"`"
+				[ $? -eq 0 ] && get_elem $1 "$e" "$2" $f
+			done
+		fi
+	done
+}
+
+usage()
+{
+cat << EOF
+USAGE: git-get.sh [OPTION]... PATTERN [FILE]...
+print elements of C code with name PATTERN in the git repository,
+where PATTERN is a extended regular expression
+
+Options to specify which element(s) should be printed:
+	-f	function
+	-s	struct
+	-d	definition
+	-m	macro
+	-e	enum
+	-u	union
+
+Options to alter the output:
+	-b	only print body
+	-n	only print file and lineranges
+	-?	print this help
+
+EOF
+}
+
+parseopts()
+{
+	local fl=			# file list
+	local getflag=0
+	local printflag=0
+
+	while [ $# -ne 0 ]; do
+		while getopts "fdmseubn" optname; do
+			case "$optname" in
+				"f") getflag=$((getflag|1)) ;;
+				"d") getflag=$((getflag|2)) ;;
+				"m") getflag=$((getflag|4)) ;;
+				"s") getflag=$((getflag|8)) ;;
+				"e") getflag=$((getflag|16)) ;;
+				"u") getflag=$((getflag|32)) ;;
+				"b") printflag=3 ;;
+				"n") printflag=4 ;;
+				"?") usage; exit 0; ;;
+			esac
+		done
+		shift $((OPTIND-1))
+		[ $# -eq 0 ] && break;
+		OPTIND=0
+		if [ -f "$1" ]; then
+			fl="$fl $1";
+		else
+			name="$1";
+		fi
+		shift
+	done
+	[ -z "$name" ] && usage;
+	if [ $getflag -eq 0 ]; then
+		get_any $printflag "$name" $fl
+		return;
+	fi
+	[ $((getflag&1)) -eq 1 ] && get_func $printflag "$name" $fl
+	if [ $((getflag&6)) -ne 0 ]; then
+		if [ $((getflag&6)) -eq 2 ]; then
+					dom="($S.*)?";		# print only definitions
+		elif [ $((getflag&6)) -eq 4 ]; then
+					dom="\(.*";		# print only macros
+		else
+			dom="(\(.*|$S.*)?";	# catch definition or macro
+		fi
+		get_def $printflag "$name$dom" $fl
+	fi
+	if [ $((getflag&8)) -eq 8 ]; then
+		get_elem $printflag "struct" "$name" $fl
+	fi
+	if [ $((getflag&16)) -eq 16 ]; then
+		get_elem $printflag "enum" "$name" $fl
+	fi
+	if [ $((getflag&32)) -eq 32 ]; then
+		get_elem $printflag "union" "$name" $fl
+	fi
+}
+
+#main
+parseopts "$@"

^ permalink raw reply related	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-03-27 11:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-24 10:09 [PATCH] git-cget: prints elements of C code in the git repository Roel Kluin
2009-03-24 12:05 ` Johannes Schindelin
2009-03-24 13:30   ` Roel Kluin
2009-03-24 13:52     ` Johannes Schindelin
2009-03-24 15:08       ` Ping Yin
2009-03-24 13:59     ` Jeff King
2009-03-24 14:04       ` Stefan Karpinski
2009-03-24 14:05       ` Johannes Schindelin
2009-03-24 14:38       ` Mike Ralphson
2009-03-24 14:56         ` Johannes Schindelin
2009-03-24 16:37           ` Mike Ralphson
2009-03-24 16:33         ` Steven Tweed
2009-03-25  4:23           ` Jeff King
2009-03-24 16:17       ` roel kluin
2009-03-27  9:22   ` roel kluin
2009-03-27 11:26     ` Johannes Schindelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).