git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH v2] prints elements of C code in the git repository
@ 2009-04-06 22:09 Roel Kluin
  0 siblings, 0 replies; only message in thread
From: Roel Kluin @ 2009-04-06 22:09 UTC (permalink / raw)
  To: Johannes Schindelin, peff, mike.ralphson; +Cc: git

This script searches the definition of elements of C and dumps them to
stdout. It now uses the ctags tags file. Thanks for previous comments,
Is this better?

Usage examples:

In your kernel source directory run to create your tags file:
scripts/tags.sh tags

Say the get-def.sh script lives in `../git/contrib/'.
To print the function definitions of kmalloc, do:
../git/contrib/get-def.sh -f kmalloc

You can search for a function with a specific multiline pattern:
../git/contrib/get-def.sh -f kmalloc "/size.*SLUB_DMA/p"

Maybe you want to search for any function within a directory
with a pattern. You may want to use definitions in get-def.sh:
source ../git/contrib/get-def.sh

bad_unsigned="`simple_sed \
	"unsigned ([^;{}()]*, )*($V)[#...#]\2 (<|>=) 0[^x]"`"
bad_unsigned_test="/$bad_unsigned/{
p
s/^.*$bad_unsigned.*$/Due to variable \2/p
}"

../git/contrib/get-def.sh -f "$V" kernel/ "$bad_unsigned_test" |
less

Since frequently searching for functions is slow you may want
to convert all functions to single lines and pipe that to a file
that can subsequently be parsed more quickly.

warning: this command may take a long time (once):
../git/contrib/get-def.sh -f "$V" -- git-ls-files "*.[ch]" \
"s/($S|$comm1|$comm2)*([\\]?\n|$comm1|$S$S)($S|$comm1)*/ /g;
p;" > ../allfuncs

And then for instance do:
sed -n -r "/$bad_unsigned_test/{
p
s/^.*$bad_unsigned_test.*$/Due to variable \2/p
}" ../allfuncs | less
------------------------------>8-------------8<---------------------------------
commit e61ffb20dffa95025e61e2706603a7c72fcad37f
Author: Roel Kluin <roel.kluin@gmail.com>
Date:   Mon Apr 6 23:58:28 2009 +0200

    This script searches the definition of elements of C and dumps them to
    stdout. It requires ctags and bash.
    
    As invoking it with -? will tell:
    
    USAGE: git get-def [OPTION]... PATTERN [FILE|DIRECTORY]... [SEDSCRIPT]
    print elements of C code with name PATTERN, an extended regular expression. If
    SEDSCRIPT is provided, it puts the entire element in the hold space before the
    execution of SEDSCRIPT.
    
    
    Options to specify which element(s) should be printed:
            -c      class name
            -d      define
            -f      function or method name
            -g      enumeration
            -m      member (of structure or class data)
            -s      structure
            -t      typedef
            -u      union
            -v      variable
            -D      only non-macro defines
            -M      only macro defines
    
    Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
---
diff --git a/contrib/get-def.sh b/contrib/get-def.sh
new file mode 100755
index 0000000..9428003
--- /dev/null
+++ b/contrib/get-def.sh
@@ -0,0 +1,254 @@
+#!/bin/bash
+# FIXME: make C++ style members
+
+int="[0-9]"
+hex="[a-f0-9]"
+hEx="[A-Fa-f0-9]"
+HEX="[A-F0-9]"
+upp="[A-Z]"
+up_="[A-Z_]"
+low="[a-z]"
+lo_="[a-z_]"
+alp="[A-Za-z]"
+al_="[A-Za-z_]"
+ALN="[A-Z0-9]"
+AN_="[A-Z0-9_]"
+aln="[A-Za-z0-9]"
+an_="[A-Za-z0-9_]"
+
+em='!'			# because of bash bang
+
+D="$int*\.?$int+x?$hex*[uUlL]{0,3}[fF]?"		# a number, float or hex
+# more strict and catches it (costs one backreference for (git )grep)
+SD="($int+[uUlLfF]?|$int+[uU]?[lL][lL]?|0x$hex+|0x$HEX+|$int+[lL][lL][uU]|$int*\.$int+[fF]?)"
+
+V="$al_+$an_*"		# variable/function name (or definition)
+K="$up_+$AN_*"		# definition (in capitals)
+
+# to catch variables that are members or arrays:
+W="[a-zA-Z0-9_>.-]*"
+SW="$V(\[[^][]*\]|\[[^][]*\[[^][]*\][^][]*\]|\.$V|->$V)*"	 # more strict, 1 backref
+
+s="[[:space:]]*"
+S="[[:space:]]+"
+
+# useful to ensure the end of a variable name:
+Q="[^[:alnum:]_]"
+Q2="[^[:alnum:]_>.]" # the '>' is tricky, it's an operator as well
+
+# match comments
+comm1="\/\*([^*]+|\**[^*/])*\*+\/"			# 1 backref
+comm2="\/\/([^\n]+|[n\\]+)*"				# 1 backref
+
+# match the end of the line, including comments:
+cendl="$s($comm1|$comm2|$s)*$"			 	# 3 backrefs
+cendln="$s($comm1|$comm2|$s)*($|\n)"			# 4 backrefs
+
+# strings and characters can contain things we want to match
+str="\"([^\\\"]+|\\\\.)*\""				# 1 backref
+ch1="'[^\\']'"
+ch2="'\\\\.[^']*'"
+ch="$ch1|$ch2"
+
+# match something that is not comment, string or character (c-code):
+ccode="([^\"'/]+|\/[^*\"'/]|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str)*"		# 3 backrefs
+ccoden="([^\"'/]+|\/[^*\"'/]|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?$comm2)*"	# 4 backrefs
+
+nps="[^()]*"
+nstdps="(\($nps(\($nps(\($nps(\($nps(\($nps\)$nps)*\)$nps)*\)$nps)*\)$nps)*\)$nps)*"
+npz="$nps$nstdps"
+nnps="\($npz\)"
+
+ncs="[^}{]*"
+nstdcs="(\{$ncs(\{$ncs(\{$ncs(\{$ncs(\{$ncs\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*\}$ncs)*"
+ncz="$ncs$nstdcs"
+nncs="\{$ncz\}"
+
+delimitstr="s/([][{}(|)+*?\\/.^])/\\\\\1/g"
+delimit()
+{
+	sed -r "$delimitstr"
+}
+
+# excludes testing in strings, chars and comment
+excl_code()
+{
+	local incl=""
+	for f in "${@:3}"; do
+		incl="$incl|\/?$f";
+	done
+	echo "([^$1\"'/$2]*$incl|\/[^$1\"*'/$2]*|\/?$comm1|\/?$ch1|\/?$ch2|\/?$str|\/?$comm2)*"
+}
+
+# usage: nestc "(" ")" [number]
+nestc()
+{
+	local i;
+	[ $# -eq 1 ] && i=5 || i=$3;
+	# first and 2nd are flipped to enable matching
+	# square brackets "]["
+	local p="$(excl_code "$2$1" "${@:4}")"
+	local ret="$p"
+	while [ $i -gt 0 ]; do
+		ret="${p}([$1]${ret}[$2]${p})*"
+		i=$(($i-1));
+	done
+	echo "$ret"
+}
+
+simple_sed()
+{
+	l="${1//\(...\)/\($(nestc "(" ")" 8 | delimit)\)}"
+	l="${1//\{...\}/\($(nestc "{" "}" 12 | delimit)\)}"
+	l="${l//[[]#...#[]]/$Q$ccode$Q2}"
+	l="${l//[[]...[&|][&|][]]/$s($(nestc "(" ")" 5 | delimit)[&|][&|])?$s}"
+	l="${l//[[][&|][&|]...[]]/$s([&|][&|]$(nestc "(" ")" 5 | delimit))?$s}"
+	l="${l//[[]...[]]/$ccode}"
+	l="${l//[[](\{...)\*[]]/(\{$(nestc "{" "}" 12))*}"
+	l="${l//[[](...\})\*[]]/($(nestc "{" "}" 12)\})*}"
+	echo "$l" | sed -r "
+		:a
+		s/([[:alnum:]])[[:space:]]+([[:alnum:]])/\1[[:space:]]+\2/g
+		s/[[:space:]]+/[[:space:]]*/g
+		$!{
+			N; ba
+		}"
+}
+
+usage()
+{
+cat << EOF
+USAGE: git get-def [OPTION]... PATTERN [FILE|DIRECTORY]... [SEDSCRIPT]
+
+print elements of C code with name PATTERN, an extended regular expression. If
+SEDSCRIPT is provided, it puts the entire element in the hold space before the
+execution of SEDSCRIPT.
+
+Options to specify which element(s) should be printed:
+	-c	class name
+	-d	define
+	-f	function or method name
+	-g	enumeration
+	-m	member (of structure or class data)
+	-s	structure
+	-t	typedef
+	-u	union
+	-v	variable
+	-D	only non-macro defines
+	-M	only macro defines
+	-?	print this help
+
+EOF
+}
+
+sedstr_matches()
+{
+	# TODO: distinction between simple defines and macros
+	local shead="$(nestc "(" ")" 5)"
+	local head="$(nestc "(" ")" 10)"
+	local body="$(nestc "{" "}" 10)"
+	local SP="$S|$comm1|[\\]"
+
+	local wrd="$V(($SP)?\($shead\))?"
+	local fret="($SP)*($wrd($SP|\*)+)+"
+	local A="($SP)*($wrd($SP)+)*"
+	local B="($SP)+($wrd($SP))*"
+	local C="(($SP)+$wrd)*($SP)*"
+	local t=
+	local match=
+	local pr=":__print;${3}"
+	for t in ${1//|/ }; do
+		case "$t" in
+			"c") match="^${A}class($B$2($C)?\{$body\}|($C)?\{$body\}$A$2)($C)?;";;
+			"d") match="^$s#(${SP})*define(${SP})+$2(($S|\().*[^\\])?$" ;;
+			"D") match="^$s#(${SP})*define(${SP})+$2($S.*[^\\])?$" ;;
+			"M") match="^$s#(${SP})*define(${SP})+$2\(.*[^\\]$" ;;
+			"f") match="^$fret$2($SP)*\($shead\)($SP)*\{$body\}" ;;
+			"g") match="^${A}enum($B$2($C)?\{$body\}|($C)?\{$body\}$A$2)($C)?;";;
+			"m") match="^$fret$2($SP)*[;=]";;
+			"s") match="^${A}struct($B$2($C)?\{$body\}|($C)?\{$body\}$A$2)($C)?;";;
+			"t") match="^($SP)*typedef(($B)?\{$body\}|($SP)+)$A$2($C)?;$cendl" ;;
+			"u") match="^${A}union($B$2($C)?\{$body\}|($C)?\{$body\}$A$2)($C)?;";;
+			"v") match="^$fret$2($SP)*[;=]";;
+		esac
+		t="${t//[DM]/d}"
+		pr=":__$t;/$match/${em}{H;N;b__$t};b__print;
+$pr"
+	done
+	echo "$pr"
+}
+
+parse_tags()
+{
+	# TODO: other limits, e.g. limit="\tfile:" for local functions
+	local limit="(	.*)?"
+	local l=
+	#		  name	file	query		type
+	for l in $(grep -E "^$1	($2)	[^	]+	(${3//[DM]/d})$limit$" "tags" |
+			sort -k2 | tr "\t " "@\`"); do
+		local n="${l%%@*}"
+		l="${l#*@}"
+		local f="${l%%@*}"
+		local t="${l#*;\"@}"
+		if [ "$f" != "$oldf" ]; then
+			if [ -n "$oldf" ]; then
+				sed -r -n "${sedstr}b;$4" "$oldf"
+				[ $? -ne 0 ] && echo "Error in $oldf"
+			fi
+			local sedstr=
+			local oldf="$f"
+		fi
+		l="${l%;\"*}"
+		l="${l#*@}"
+		[ "${l:0:2}" = "/^" ] && l="/^`echo "${l:2:$((${#l}-3))}" | delimit`/"
+		sedstr="${l//\`/ }b__${t:0:1};$sedstr"
+	done
+	[ -n "$sedstr" ] && sed -r -n "${sedstr}b;$4" "$oldf"
+}
+
+parse_opts()
+{
+	local fl=			# file list
+	local name=
+	local getstr=
+	local script=
+
+	while [ $# -ne 0 ]; do
+		while getopts "cdDMfgmstuv" optname; do
+			case "$optname" in
+				c|d|D|M|f|g|m|s|t|u|v) getstr="${getstr:+$getstr|}$optname" ;;
+				"?") usage; exit 0; ;;
+			esac
+		done
+		shift $((OPTIND-1))
+		[ $# -eq 0 ] && break;
+		OPTIND=0
+		if [ -f "$1" ]; then
+			fl="${fl:+$fl|}$1";
+		elif [ -d "$1" ]; then
+			fl="${fl:+$fl|}${1}[^	]+";
+		else
+			if [ -z "$name" ]; then
+				name="$1";
+			else
+				script="$1"
+			fi
+		fi
+		shift
+	done
+	[ -z "$name" ] && usage;
+	getstr="${getstr:=c|d|f|g|m|s|t|u|v}"
+	local pr="`sedstr_matches "$getstr" "$name" "${script:=p;}"`"
+	fl="${fl:=[^	]+}"
+	parse_tags "$name" "${fl//\//\\/}" "$getstr" "$pr"
+}
+
+
+#main
+if [ "$0" != "-bash" ]; then
+	if [ -f "tags" ]; then
+		parse_opts "$@"
+	else
+		echo "No tags file found";
+	fi
+fi

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2009-04-06 22:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-04-06 22:09 [PATCH v2] prints elements of C code in the git repository Roel Kluin

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).