git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] git-cget: prints elements of C code in the git repository
@ 2009-03-24 10:09 Roel Kluin
  2009-03-24 12:05 ` Johannes Schindelin
  0 siblings, 1 reply; 16+ messages in thread
From: Roel Kluin @ 2009-03-24 10:09 UTC (permalink / raw)
  To: git

Maybe something like this is useful?
------------------------------>8-------------8<---------------------------------
Add git-cget.sh: prints elements of C code in the git repository.

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
---
diff --git a/Makefile b/Makefile
index 1087884..c21ba91 100644
--- a/Makefile
+++ b/Makefile
@@ -260,6 +260,7 @@ BASIC_LDFLAGS =
 SCRIPT_SH += git-am.sh
 SCRIPT_SH += git-bisect.sh
 SCRIPT_SH += git-filter-branch.sh
+SCRIPT_SH += git-cget.sh
 SCRIPT_SH += git-lost-found.sh
 SCRIPT_SH += git-merge-octopus.sh
 SCRIPT_SH += git-merge-one-file.sh
diff --git a/git-cget.sh b/git-cget.sh
new file mode 100755
index 0000000..08ea65f
--- /dev/null
+++ b/git-cget.sh
@@ -0,0 +1,409 @@
+#!/bin/bash
+# FIXME: make C++ style members
+
+int="[0-9]"
+hex="[a-f0-9]"
+hEx="[A-Fa-f0-9]"
+HEX="[A-F0-9]"
+upp="[A-Z]"
+up_="[A-Z_]"
+low="[a-z0-9]"
+lo_="[a-z0-9_]"
+alp="[A-Za-z]"
+al_="[A-Za-z_]"
+ALN="[A-Z0-9]"
+AN_="[A-Z0-9_]"
+aln="[A-Za-z0-9]"
+an_="[A-Za-z0-9_]"
+
+em='!'			# because of bash banging
+
+D="$int*\.?$int+x?$hex*[uUlL]{0,3}[fF]?"		# a number, float or hex
+# more strict and catches it (costs one backreference for (git )grep)
+SD="($int+[uUlLfF]?|$int+[uU]?[lL][lL]?|0x$hex+|0x$HEX+|$int+[lL][lL][uU]|$int*\.$int+[fF]?)"
+
+V="$al_+$an_*"		# variable/function name (or definition)
+K="$up_+$AN_*"		# definition (in capitals)
+
+# to catch variables that are members or arrays:
+W="[a-zA-Z0-9_>.-]*"
+SW="$V(\[[^][]*\]|\[[^][]*\[[^][]*\][^][]*\]|\.$V|->$V)*"	 # more strict, 1 backref
+
+s="[[:space:]]*"
+S="[[:space:]]+"
+
+# useful to ensure the end of a variable name:
+Q="[^[:alnum:]_]"
+Q2="[^[:alnum:]_>.]" # the '>' is tricky, it's an operator as well
+
+# match comments
+comm1="\/\*([^*]+|\**[^*/])*\*+\/"				# 1 backref
+comm2="\/\/([^\n]+|[n\\]+)*"				# 1 backref
+
+# match the end of the line, including comments:
+cendl="$s($comm1|$comm2|$s)*($|\n)"			 # 4 backrefs
+
+# strings and characters can contain things we want to match
+str="\"([^\\\"]+|\\\\.)*\""						 # 1 backref
+ch1="'[^\\']'"
+ch2="'\\\\.[^']*'"
+ch="$ch1|$ch2"
+
+# when using grep [^\/] also cases excludes '\', so we correct for that
+# match something that is not comment, string or character (c-code): 3 backrefs
+ccode="([^\"'/]+|\/[^*\"'/]|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?\\\\|\/?$comm2)*"
+
+# TODO: use backref cheaper versions
+nps="[^()]*"
+nstdps="(\($nps(\($nps(\($nps(\($nps(\($nps\)$nps)*\)$nps)*\)$nps)*\)$nps)*\)$nps)*"
+npz="$nps$nstdps"
+nnps="\($npz\)"
+
+ncs="[^}{]*"
+nstdcs="(\{$ncs(\{$ncs(\{$ncs(\{$ncs(\{$ncs\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*"
+ncz="$ncs$nstdcs"
+nncs="\{$ncz\}"
+
+delimitstr="s/([][{}(|)+*?\\/.^])/\\\\\1/g"
+delimit()
+{
+	sed -r "$delimitstr"
+}
+
+# unmatch: match anything except the given.
+#
+# If given is a string, then it also matches longer
+# or shorter strings.
+#
+# otherwise it delimits characters and then creates a match
+#
+unmatch()
+{
+	local sedstr=
+	# TODO: unmatch numbers
+	# if [ -n "`echo "$1" | grep -E "^$SD$"`" ]; then
+	if [ -n "`echo "$1" | grep -E "^$V$"`" ]; then
+		sedstr=":loop
+			s/^($an_+)?($an_)(\|.*)?$/\1\|\1[^\2]\3/
+			t loop
+			s/^[|](.*)$/($al_$an_{${#1}}|\1)$an_*/"
+	fi
+	# FIXME: unmatch non alnum characters:
+	# with the below "\*" results in ([^\\]|\\[^*])*,
+	# but it should be ([^\\]|\\+[^*\\])*
+	#else
+	#	local a="[^][{}(|)+*?\\/.^]"
+	#	local b="[][{}(|)+*?\\/.^]"
+	#	sedstr="$delimitstr
+	#		:loop
+	#		s/^(($a|[\\]$b)+)?(($a)|[\\]($b))(\|.*)?$/\1\|\1[^\4\5]\6/
+	#		t loop
+	#		s/^[|](.*)$/(\1)*/"
+	#fi
+	echo $1 | sed -r "$sedstr"
+}
+
+# excludes testing in strings, chars and comment
+excl_code()
+{
+	echo "([^$1\"'/$2]*|\/[^$1\"*'/$2]*|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?$comm2)*"
+}
+
+# usage: nestc "(" ")" [number]
+# only works for nesting single chars. (TODO: multiple)
+nestc()
+{
+	local i;
+	[ $# -eq 1 ] && i=5 || i=$3;
+	# first and 2nd are flipped to enable matching
+	# square brackets "]["
+	local p="$(excl_code "$2$1" "$4")"
+	local ret="$p"
+	while [ $i -gt 0 ]; do
+		ret="${p}([$1]${ret}[$2]${p})*"
+		i=$(($i-1));
+	done
+	echo "$ret"
+}
+
+blank_it()
+{
+	echo "$1" | sed -r "
+		:a
+		s/([[:alnum:]])[[:space:]]+([[:alnum:]])/\1[[:space:]]+\2/g
+		s/[[:space:]]+/[[:space:]]*/g
+		$!{
+			N; ba
+		}"
+}
+
+# usage: grep_12 "struct list_head {" "include/linux/slub_def.h"
+# greps included as well, when not found
+grep_12()
+{
+	flist=
+	_grep_12 "$1" "$2"
+}
+
+_grep_12()
+{
+	grep -qE "$1" "$2";
+	if [ $? -eq 0 ]; then
+		echo "$2";
+	else
+		[ -n "$flist" ] && flist="$flist|"
+		flist="$flist$2"
+		local b="${2%\/*}"
+		for f in `sed -rn "/^$s#${s}include${s}[\"<]/{
+				$delimitstr
+				s/^$s#${s}include$s<([^>]+)>$cendl/include\/\1/p
+				s/^$s#${s}include$s\"([^\"]+)\"$cendl/${b//\//\/}\/\1/p
+				}" "$2" | grep -vE "($flist)" `; do
+			[ -f "$f" ] && _grep_12 "$1" "$f" && return 0;
+			b="${f%\/*}"
+		done
+	fi
+}
+
+get_matching_files()
+{
+	local files="${@:2}"
+	if [ -z "$files" ]; then
+		grep -lE "$1" $(git-ls-files "*.[ch]");
+		return;
+	fi
+	for f in $files; do
+		[ -f "$f" ] && echo -n "$(grep_12 "$1" "$f") "
+		[ -d "$f" ] && grep -lE "$1" $(git-ls-files "*.[ch]")
+	done
+}
+
+
+_sedit()
+{
+	sed -r -n "
+		:a
+		/$1/${em}b
+		/$2/${em}{
+			H; N; ba
+		}
+		$4
+		:b
+		/$3/${em}{
+			H; N; bb
+		}
+		$5
+	" "${@:6}"
+}
+
+sedit()
+{
+	local print2="p"
+	[ $(($1 & 4 )) -eq 4 ] && print2="="
+
+	local stages="` blank_it "$2@$3@$4" | tr "@" " "`"
+	local sed2str="/ \+@$/{N; s/@\n//}";
+	if [ $(($1 & 1 )) -eq 1 ]; then
+		_sedit $stages "#" "$print2" "${@:5}" | sed -r "$sed2str";
+	else
+		local print1=
+		[ $(($1 & 2 )) -eq 0 ] && print1="`echo -e "+@\n="`"
+		[ $1 -eq 4 ] && sed2str="/ \+@$/{N; N; s/\/\/ vi ([^+]*) \+@\n([0-9]*)\n([0-9]*)/\1:\2-\3/}"
+
+		for f in "${@:5}"; do
+			_sedit $stages "i// vi $f $print1" "$print2" "$f"
+		done | sed -r "$sed2str";
+	fi
+}
+
+get_func()
+{
+	# TODO: C++ member stuff
+	local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+	local func_args="$(nestc "(" ")" 10 "+%/?#&|<>.^-")"
+	local func_body="$(nestc "{" "}" 10)"
+
+	local fl="${@:3}"
+	if [ -z "$fl" ]; then
+		local match="^($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?$";
+		match="`blank_it "$match"`";
+		fl="`git-ls-files "*.[ch]"`"
+		fl="`grep -lE "$match" $fl`";
+		[ -z "$fl" ] && return;
+	fi
+
+	local stage1="^$fret ($2 ((\($func_args)+(\) ($comm1 )*((\{$func_body)+\}?)?)?)?)?$";
+	local stage2="^$fret $2 \($func_args\) \{";
+	local stage3="^$fret $2 \($func_args\) \{$func_body\}";
+	sedit $1 "$stage1" "$stage2" "$stage3" $fl
+}
+
+
+# get struct/enum/union
+get_elem()
+{
+	local sA="($S|$comm1)"
+	local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+	local struct_body="$(nestc "{" "}" 10)"
+	local stage1=;
+	local stage2=;
+	local stage3=;
+
+	local match="^ (($sB$2)?$sA$sB)?$3($sA$sB)?((\{$struct_body)+\}?)?$cendl"
+	match="`blank_it "$match"`"
+	local fl="`get_matching_files "$match" ${@:4}`"
+	if [ -n "$fl" ]; then
+		stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2($sA$sB($3($sA$sB)?((\{$struct_body)+\}?)?)?)?)$cendl"
+		stage2="^ $sB$2$sA$sB$3($sA$sB)? \{"
+		stage3="^ $sB$2$sA$sB$3($sA$sB)? \{$struct_body\}"
+		sedit $1 "$stage1" "$stage2" "$stage3" $fl
+	fi
+
+	match="^( (($sB$2)?($sA$sB| )\{)?($struct_body\})+)? $sB$3($sA$sB| );$cendl"
+	match="`blank_it "$match"`"
+	fl="`get_matching_files "$match" ${@:4}`"
+	if [ -n "$fl" ]; then
+		stage1="^ ($V|$V \(\([^()]*\)\)|$sB$2(($sA$sB| )((\{$struct_body)+(\} $sB($3($sA$sB| );)?)?)?)?)$cendl"
+		stage2="^ $sB$2($sA$sB| )\{"
+		stage3="^ $sB$2($sA$sB| )\{$struct_body\} $sB$3($sA$sB| );$cendl"
+		# there can be false positives due to stage1.
+		# here we remove them
+		sedit $1 "$stage1" "$stage2" "$stage3" $fl |
+			sed -r "/^\/\/ vi .* \+[0-9]*$/{
+				:a
+				$!{
+					d; b
+				}
+				N
+				/\/\/ vi .* \+[0-9]*\n\/\/ vi /${em}b
+				D; ba
+			}";
+
+	fi
+}
+
+get_def()
+{
+	local stage1="^ # define$S$2$Q"
+	local match="`blank_it "$stage1"`";
+	local stage2="^"
+	local stage3="[^\\]$"
+	sedit $1 "$stage1" "$stage2" "$stage3" `get_matching_files "$match" ${@:3}`
+}
+
+get_any()
+{
+	local defm=" # define$S$2$Q"
+	local sA="($S|$comm1)"
+	local sB="($V$S|$V \(\([^()]+\)\) |$comm1 )*"
+	local body="$(nestc "{" "}" 10)"
+
+	local elemsl="(($sB(struct|enum|union))?$sA$sB)?"
+	local elemsr1="$2($sA$sB)?((\{$body)+\}?)?"
+
+	local fret="( $V( |\*|$comm1)*$S( |\*|$comm1)*)+"
+	local func_args="$(nestc "(" ")" 10 "+%/?#^&|<>.-")"
+	local funcm="($fret)? $2 ((\($func_args)+(\) ($comm1 )*((\{$body)+\}?)?)?)?";
+
+	local match="`blank_it "^($defm| $elemsl$elemsr1$| (($elemsl\{)?($body\})+)? $sB$2$sA$sB;$|$funcm$)"`";
+	defm="`blank_it "^$defm"`"
+	funcm="`blank_it "^$funcm$"`"
+	git-grep -E "$match" | while read l; do
+		f="${l%:*}"
+		l="${l#*:}"
+		if [ "`echo "$l" | grep -E "^$defm"`" ]; then
+			get_def $1 "$2" $f
+		elif [ "`echo "$l" | grep -E "^$funcm$"`" ]; then
+			get_func $1 "$2" $f
+		else
+			for e in struct enum union; do
+				z="(($sB$e)?$sA$sB)?"
+				echo "$l" | grep -Eq "`blank_it "^ ( $z$elemsr1| (($z\{)?($body\})+)? $sB$2$sA$sB;)$cendl"`"
+				[ $? -eq 0 ] && get_elem $1 "$e" "$2" $f
+			done
+		fi
+	done
+}
+
+usage()
+{
+cat << EOF
+USAGE: git-get.sh [OPTION]... PATTERN [FILE]...
+print elements of C code with name PATTERN in the git repository,
+where PATTERN is a extended regular expression
+
+Options to specify which element(s) should be printed:
+	-f	function
+	-s	struct
+	-d	definition
+	-m	macro
+	-e	enum
+	-u	union
+
+Options to alter the output:
+	-b	only print body
+	-n	only print file and lineranges
+	-?	print this help
+
+EOF
+}
+
+parseopts()
+{
+	local fl=			# file list
+	local getflag=0
+	local printflag=0
+
+	while [ $# -ne 0 ]; do
+		while getopts "fdmseubn" optname; do
+			case "$optname" in
+				"f") getflag=$((getflag|1)) ;;
+				"d") getflag=$((getflag|2)) ;;
+				"m") getflag=$((getflag|4)) ;;
+				"s") getflag=$((getflag|8)) ;;
+				"e") getflag=$((getflag|16)) ;;
+				"u") getflag=$((getflag|32)) ;;
+				"b") printflag=3 ;;
+				"n") printflag=4 ;;
+				"?") usage; exit 0; ;;
+			esac
+		done
+		shift $((OPTIND-1))
+		[ $# -eq 0 ] && break;
+		OPTIND=0
+		if [ -f "$1" ]; then
+			fl="$fl $1";
+		else
+			name="$1";
+		fi
+		shift
+	done
+	[ -z "$name" ] && usage;
+	if [ $getflag -eq 0 ]; then
+		get_any $printflag "$name" $fl
+		return;
+	fi
+	[ $((getflag&1)) -eq 1 ] && get_func $printflag "$name" $fl
+	if [ $((getflag&6)) -ne 0 ]; then
+		if [ $((getflag&6)) -eq 2 ]; then
+					dom="($S.*)?";		# print only definitions
+		elif [ $((getflag&6)) -eq 4 ]; then
+					dom="\(.*";		# print only macros
+		else
+			dom="(\(.*|$S.*)?";	# catch definition or macro
+		fi
+		get_def $printflag "$name$dom" $fl
+	fi
+	if [ $((getflag&8)) -eq 8 ]; then
+		get_elem $printflag "struct" "$name" $fl
+	fi
+	if [ $((getflag&16)) -eq 16 ]; then
+		get_elem $printflag "enum" "$name" $fl
+	fi
+	if [ $((getflag&32)) -eq 32 ]; then
+		get_elem $printflag "union" "$name" $fl
+	fi
+}
+
+#main
+parseopts "$@"

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 10:09 [PATCH] git-cget: prints elements of C code in the git repository Roel Kluin
@ 2009-03-24 12:05 ` Johannes Schindelin
  2009-03-24 13:30   ` Roel Kluin
  2009-03-27  9:22   ` roel kluin
  0 siblings, 2 replies; 16+ messages in thread
From: Johannes Schindelin @ 2009-03-24 12:05 UTC (permalink / raw)
  To: Roel Kluin; +Cc: git

Hi,

Disclaimer: if you are offended by constructive criticism, or likely to
answer with insults to the comments I offer, please stop reading this mail
now (and please do not answer my mail, either). :-)

Still with me?  Good.  Nice to meet you.

Just for the record: responding to a patch is my strongest way of saying
that I appreciate your work.

On Tue, 24 Mar 2009, Roel Kluin wrote:

> Maybe something like this is useful?
> ------------------------------>8-------------8<---------------------------------
> Add git-cget.sh: prints elements of C code in the git repository.
> 
> Signed-off-by: Roel Kluin <roel.kluin@gmail.com>

I really like what this tries to do.

But there are some problems:

- the name is not expressive enough,

- the commit message is severely lacking (I had to apply it and run it to 
  find out what problem it tries to solve),

- it still uses dashed git commands,

- it might be better to put it into contrib/, otherwise it needs 
  documentation with examples, and tests,

- it misdetects functions: trying

	$ ./git-cget -f get_sha1.*

  shows this, amongst others:

-- snip --
// vi sha1_name.c +59
                return get_sha1_hex(hex, sha1) == 0;
        return found;
}

static int match_sha(unsigned len, const unsigned char *a, const unsigned 
char *b)
{
        do {
                if (*a != *b)
                        return 0;
                a++;
                b++;
                len -= 2;
        } while (len > 1);
        if (len)
                if ((*a ^ *b) & 0xf0)
                        return 0;
        return 1;
}
-- snap --

- maybe it would be nicer to integrate this into git grep itself?  Maybe 
  by allowing multi-line matches and default patterns, or pattern 
  templates?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 12:05 ` Johannes Schindelin
@ 2009-03-24 13:30   ` Roel Kluin
  2009-03-24 13:52     ` Johannes Schindelin
  2009-03-24 13:59     ` Jeff King
  2009-03-27  9:22   ` roel kluin
  1 sibling, 2 replies; 16+ messages in thread
From: Roel Kluin @ 2009-03-24 13:30 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git

Hi Johannes,

thanks for your pointers.

> - the name is not expressive enough,

How about get-def? If it's too long it will become tiresome to type.

> - the commit message is severely lacking (I had to apply it and run it to 
>   find out what problem it tries to solve),

This is probably better:

This script shows where elements of C code are defined in the git-repository.
It is still experimental, although it should work in most cases.

As invoking it with -? will tell:

USAGE: git get-def [OPTION]... PATTERN [FILE]...
print elements of C code with name PATTERN in the git repository,
where PATTERN is an extended regular expression

Options to specify which element(s) should be printed:
        -f      function
        -s      struct
        -d      definition
        -m      macro
        -e      enum
        -u      union

Options to alter the output:
        -b      only print body
        -n      only print file and lineranges

The pattern is required.

If no files are specified it will search the entire repository for
the specified elements with name PATTERN.

If no element is specified, it will search for any of the above
elements and print these when found.

> - it still uses dashed git commands,

Should be fixed in my next version

> - it might be better to put it into contrib/, otherwise it needs 
>   documentation with examples, and tests,

Ok.

> - it misdetects functions: trying
> 
> 	$ ./git-cget -f get_sha1.*
> 
>   shows this, amongst others:
> 
> -- snip --
> // vi sha1_name.c +59
>                 return get_sha1_hex(hex, sha1) == 0;
>         return found;
> }
> 
> static int match_sha(unsigned len, const unsigned char *a, const unsigned 
> char *b)
> {
>         do {
>                 if (*a != *b)
>                         return 0;
>                 a++;
>                 b++;
>                 len -= 2;
>         } while (len > 1);
>         if (len)
>                 if ((*a ^ *b) & 0xf0)
>                         return 0;
>         return 1;
> }
> -- snap --

Yes, it doesn't always work yet, I'll try to fix this in my next version.

> - maybe it would be nicer to integrate this into git grep itself?  Maybe 
>   by allowing multi-line matches and default patterns, or pattern 
>   templates?

I did attempt to program it in C, but I am more comfortable with bash. Also,
what would be more appropriate for this purpose, I think, is a git sed.

However, allowing multi-line matches may be an option, I'll look into it.

> Ciao,
> Dscho

Thanks,

Roel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:30   ` Roel Kluin
@ 2009-03-24 13:52     ` Johannes Schindelin
  2009-03-24 15:08       ` Ping Yin
  2009-03-24 13:59     ` Jeff King
  1 sibling, 1 reply; 16+ messages in thread
From: Johannes Schindelin @ 2009-03-24 13:52 UTC (permalink / raw)
  To: Roel Kluin; +Cc: git

Hi,

On Tue, 24 Mar 2009, Roel Kluin wrote:

> > - the name is not expressive enough,
> 
> How about get-def? If it's too long it will become tiresome to type.

IMHO "git grep-c" would be more to the point...

> > - the commit message is severely lacking (I had to apply it and run it 
> >   to find out what problem it tries to solve),
> 
> This is probably better:
> [...]

Yes, I like it!

> > - maybe it would be nicer to integrate this into git grep itself?  
> >   Maybe by allowing multi-line matches and default patterns, or 
> >   pattern templates?
> 
> I did attempt to program it in C, but I am more comfortable with bash.

Did I misunderstand your code?  I thought all it does is provide a way to 
match multiline patterns... but then, I could not do more than just 
quickly glance over the code.

> Also, what would be more appropriate for this purpose, I think, is a git 
> sed.

Just as it happens, this is a toy I am working on and off these days... 
strbuf_regsub() is already done, and half the needed code is in my version 
git grep.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:30   ` Roel Kluin
  2009-03-24 13:52     ` Johannes Schindelin
@ 2009-03-24 13:59     ` Jeff King
  2009-03-24 14:04       ` Stefan Karpinski
                         ` (3 more replies)
  1 sibling, 4 replies; 16+ messages in thread
From: Jeff King @ 2009-03-24 13:59 UTC (permalink / raw)
  To: Roel Kluin; +Cc: Johannes Schindelin, git

On Tue, Mar 24, 2009 at 02:30:28PM +0100, Roel Kluin wrote:

> > - the commit message is severely lacking (I had to apply it and run it to 
> >   find out what problem it tries to solve),
> 
> This is probably better:
> 
> This script shows where elements of C code are defined in the git-repository.
> It is still experimental, although it should work in most cases.

Wouldn't it be a lot easier to base this on something like ctags, which
already knows how to do all the parsing of C quirky C?

I.e.:

  git ls-files | xargs ctags
  vi -t get_sha1

Where you can replace "vi" with your editor of choice. And if you really
like getting the definition dumped to stdout, writing a command-line
viewer for the ctags format would be quite simple (and I would be
surprised if one hasn't been written already).

-Peff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:59     ` Jeff King
@ 2009-03-24 14:04       ` Stefan Karpinski
  2009-03-24 14:05       ` Johannes Schindelin
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 16+ messages in thread
From: Stefan Karpinski @ 2009-03-24 14:04 UTC (permalink / raw)
  To: Jeff King; +Cc: Roel Kluin, Johannes Schindelin, git

On Tue, Mar 24, 2009 at 9:59 AM, Jeff King <peff@peff.net> wrote:
>
> Wouldn't it be a lot easier to base this on something like ctags, which
> already knows how to do all the parsing of C quirky C?

Ctags also has the advantage of handling other languages. According to
the man page: C, Pascal, Fortran, YACC, lex, and lisp.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:59     ` Jeff King
  2009-03-24 14:04       ` Stefan Karpinski
@ 2009-03-24 14:05       ` Johannes Schindelin
  2009-03-24 14:38       ` Mike Ralphson
  2009-03-24 16:17       ` roel kluin
  3 siblings, 0 replies; 16+ messages in thread
From: Johannes Schindelin @ 2009-03-24 14:05 UTC (permalink / raw)
  To: Jeff King; +Cc: Roel Kluin, git

Hi,

On Tue, 24 Mar 2009, Jeff King wrote:

>   git ls-files | xargs ctags

Or 'git ls-files -z | xargs -0r ctags', which fails with Solaris' default 
xargs, though.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:59     ` Jeff King
  2009-03-24 14:04       ` Stefan Karpinski
  2009-03-24 14:05       ` Johannes Schindelin
@ 2009-03-24 14:38       ` Mike Ralphson
  2009-03-24 14:56         ` Johannes Schindelin
  2009-03-24 16:33         ` Steven Tweed
  2009-03-24 16:17       ` roel kluin
  3 siblings, 2 replies; 16+ messages in thread
From: Mike Ralphson @ 2009-03-24 14:38 UTC (permalink / raw)
  To: Jeff King, Roel Kluin; +Cc: Johannes Schindelin, git

2009/3/24 Jeff King <peff@peff.net>:
> Wouldn't it be a lot easier to base this on something like ctags, which
> already knows how to do all the parsing of C quirky C?
>
> I.e.:
>
>  git ls-files | xargs ctags
>  vi -t get_sha1
>
> Where you can replace "vi" with your editor of choice.

This kind of pipeline has the benefit that it can actually work on the
*repository*, and not just the working copy (as per the posted
script).

Although this kind of tool might be useful (I might even use it myself
for working on git, rather than as *part of git* on my other projects,
which are not written in C), it's a bit tangential even for contrib/
isn't it?

> And if you really like getting the definition dumped to stdout,
> writing a command-line viewer for the ctags format would be
> quite simple (and I would be surprised if one hasn't been
> written already).

http://users.telenet.be/bdr/pub/cshow/  ?

Mike

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 14:38       ` Mike Ralphson
@ 2009-03-24 14:56         ` Johannes Schindelin
  2009-03-24 16:37           ` Mike Ralphson
  2009-03-24 16:33         ` Steven Tweed
  1 sibling, 1 reply; 16+ messages in thread
From: Johannes Schindelin @ 2009-03-24 14:56 UTC (permalink / raw)
  To: Mike Ralphson; +Cc: Jeff King, Roel Kluin, git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 620 bytes --]

Hi,

On Tue, 24 Mar 2009, Mike Ralphson wrote:

> 2009/3/24 Jeff King <peff@peff.net>:
> > Wouldn't it be a lot easier to base this on something like ctags, which
> > already knows how to do all the parsing of C quirky C?
> >
> > I.e.:
> >
> >  git ls-files | xargs ctags
> >  vi -t get_sha1
> >
> > Where you can replace "vi" with your editor of choice.
> 
> This kind of pipeline has the benefit that it can actually work on the 
> *repository*, and not just the working copy (as per the posted script).

Did I miss something?  git ls-files pipes only the names to xargs, not the 
full contents, right?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:52     ` Johannes Schindelin
@ 2009-03-24 15:08       ` Ping Yin
  0 siblings, 0 replies; 16+ messages in thread
From: Ping Yin @ 2009-03-24 15:08 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Roel Kluin, git

On Tue, Mar 24, 2009 at 9:52 PM, Johannes Schindelin
<Johannes.Schindelin@gmx.de> wrote:

>> Also, what would be more appropriate for this purpose, I think, is a git
>> sed.
>
> Just as it happens, this is a toy I am working on and off these days...
> strbuf_regsub() is already done, and half the needed code is in my version
> git grep.
>

this is my version

[alias]
sed = !git ls-files --stage | grep ^100 | awk '{print $4}' | xargs sed

Then i can do

git sed -i 's/A/B/g'

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 13:59     ` Jeff King
                         ` (2 preceding siblings ...)
  2009-03-24 14:38       ` Mike Ralphson
@ 2009-03-24 16:17       ` roel kluin
  3 siblings, 0 replies; 16+ messages in thread
From: roel kluin @ 2009-03-24 16:17 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Schindelin, git

> Wouldn't it be a lot easier to base this on something like ctags, which
>  already knows how to do all the parsing of C quirky C?
>
>  I.e.:
>
>   git ls-files | xargs ctags
>   vi -t get_sha1

You are right, I'll see what I can do with it.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 14:38       ` Mike Ralphson
  2009-03-24 14:56         ` Johannes Schindelin
@ 2009-03-24 16:33         ` Steven Tweed
  2009-03-25  4:23           ` Jeff King
  1 sibling, 1 reply; 16+ messages in thread
From: Steven Tweed @ 2009-03-24 16:33 UTC (permalink / raw)
  To: Mike Ralphson; +Cc: Jeff King, Roel Kluin, Johannes Schindelin, git

On Tue, Mar 24, 2009 at 2:38 PM, Mike Ralphson <mike.ralphson@gmail.com> wrote:
> 2009/3/24 Jeff King <peff@peff.net>:
> This kind of pipeline has the benefit that it can actually work on the
> *repository*, and not just the working copy (as per the posted
> script).

Speaking of wanting things to work with the actual repository , one
thing that I've been meaning to continue work on if I get the time is
basically a 'show me any commit diff's that involve string s' (ie, the
locations in which a change involving s occurs rather than just
'current file contains s (in exactly the same ways the previous
version did). I'm extremely unlikely to actually produce anything
based on that in the near future. But one thing that struck me that
might be reasonably useful is some way of filtering the "context" that
a string occurs in (is it in generic code context, in a string or in a
comment). These are things that tend to be reasonably parametrisable
by regular expressions (although escaping string delimiters within in
strings makes completely correct behaviour tricky), so if git-grep
itself were to be extended I'd prefer something that can be specified
per search in a more generally usable way than something very C
specific. Of course this is a quite difficult task as the actual diff
may not contain enough lines to distinguish, eg, that it occurs within
a longer comment. (As already disclaimed, I'm unlikely to actually get
around to doing any of this work).

cheers, dave tweed

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 14:56         ` Johannes Schindelin
@ 2009-03-24 16:37           ` Mike Ralphson
  0 siblings, 0 replies; 16+ messages in thread
From: Mike Ralphson @ 2009-03-24 16:37 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Jeff King, Roel Kluin, git, Ping Yin, Steven Tweed

2009/3/24 Johannes Schindelin <Johannes.Schindelin@gmx.de>:
>> This kind of pipeline has the benefit that it can actually work on the
>> *repository*, and not just the working copy (as per the posted script).
>
> Did I miss something?  git ls-files pipes only the names to xargs, not the
> full contents, right?

See where I wrote "this kind of pipeline" vs "git ls-files" which was
just quoting Peff? 8-)

I dunno, maybe C folk using git would only be interested in the
index/working copy, but that tells me this script is even more
tenuously linked to git. We don't bundle a compiler just because it
can use git ls-files to work out what to compile...

The script might be best as a standalone tool which just happened to
take advantage of the VCS in use by reacting to its surroundings. Then
it would also be useful by hg, svn, cvs people etc.

If it actually used the information in the repository to be able to
run git blame on the definitions, or show a git log -p style list of
changes in the definition, that might be different.

Mike

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 16:33         ` Steven Tweed
@ 2009-03-25  4:23           ` Jeff King
  0 siblings, 0 replies; 16+ messages in thread
From: Jeff King @ 2009-03-25  4:23 UTC (permalink / raw)
  To: Steven Tweed; +Cc: Mike Ralphson, Roel Kluin, Johannes Schindelin, git

On Tue, Mar 24, 2009 at 04:33:13PM +0000, Steven Tweed wrote:

> Speaking of wanting things to work with the actual repository , one
> thing that I've been meaning to continue work on if I get the time is
> basically a 'show me any commit diff's that involve string s' (ie, the
> locations in which a change involving s occurs rather than just
> 'current file contains s (in exactly the same ways the previous
> version did). I'm extremely unlikely to actually produce anything

How about "git log -S", or does that somehow not meet your needs (and if
not, how)?

-Peff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-24 12:05 ` Johannes Schindelin
  2009-03-24 13:30   ` Roel Kluin
@ 2009-03-27  9:22   ` roel kluin
  2009-03-27 11:26     ` Johannes Schindelin
  1 sibling, 1 reply; 16+ messages in thread
From: roel kluin @ 2009-03-27  9:22 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git

Hi Johannes,

> - it misdetects functions: trying
>
>        $ ./git-cget -f get_sha1.*

The .* is not strict enough. What you want to do instead is:

$ ./git-cget -f "get_sha1[A-Za-z0-9_]*"

and that will give correct matches.

Roel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] git-cget: prints elements of C code in the git repository
  2009-03-27  9:22   ` roel kluin
@ 2009-03-27 11:26     ` Johannes Schindelin
  0 siblings, 0 replies; 16+ messages in thread
From: Johannes Schindelin @ 2009-03-27 11:26 UTC (permalink / raw)
  To: roel kluin; +Cc: git

Hi,

On Fri, 27 Mar 2009, roel kluin wrote:

> Hi Johannes,
> 
> > - it misdetects functions: trying
> >
> >        $ ./git-cget -f get_sha1.*
> 
> The .* is not strict enough. What you want to do instead is:
> 
> $ ./git-cget -f "get_sha1[A-Za-z0-9_]*"
> 
> and that will give correct matches.

No, the problem was that it did not show a function _definition_, but 
mistook a function _call_ for a definition.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2009-03-27 11:28 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-03-24 10:09 [PATCH] git-cget: prints elements of C code in the git repository Roel Kluin
2009-03-24 12:05 ` Johannes Schindelin
2009-03-24 13:30   ` Roel Kluin
2009-03-24 13:52     ` Johannes Schindelin
2009-03-24 15:08       ` Ping Yin
2009-03-24 13:59     ` Jeff King
2009-03-24 14:04       ` Stefan Karpinski
2009-03-24 14:05       ` Johannes Schindelin
2009-03-24 14:38       ` Mike Ralphson
2009-03-24 14:56         ` Johannes Schindelin
2009-03-24 16:37           ` Mike Ralphson
2009-03-24 16:33         ` Steven Tweed
2009-03-25  4:23           ` Jeff King
2009-03-24 16:17       ` roel kluin
2009-03-27  9:22   ` roel kluin
2009-03-27 11:26     ` Johannes Schindelin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).