linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] scipts/tags.sh: Add custom sort order
@ 2020-08-05 10:25 peterz
  2020-08-06 12:04 ` [PATCH -v2] " peterz
  0 siblings, 1 reply; 9+ messages in thread
From: peterz @ 2020-08-05 10:25 UTC (permalink / raw)
  To: yamada.masahiro; +Cc: linux-kernel, linux-kbuild


One long standing annoyance I have with using vim-tags is that our tags
file is not properly sorted. That is, the sorting exhuberant Ctags does
is only on the tag itself.

The problem with that is that, for example, the tag 'mutex' appears a
mere 505 times, 492 of those are structure members. However it is _far_
more likely that someone wants the struct definition when looking for
the mutex tag than any of those members. However, due to the nature of
the sorting, the struct definition will not be first.

So add a script that does a custom sort of the tags file, taking the tag
kind into account.

The kind ordering is roughly: 'type', 'function', 'macro', 'enum', rest.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 scripts/sort-tags.awk | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/tags.sh       |  8 +++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/scripts/sort-tags.awk b/scripts/sort-tags.awk
new file mode 100755
index 000000000000..1eb50406c9d3
--- /dev/null
+++ b/scripts/sort-tags.awk
@@ -0,0 +1,79 @@
+#!/usr/bin/awk -f
+
+# $ ctags --list-kinds
+# C
+#   c  classes
+#   s  structure names
+#   t  typedefs
+#   g  enumeration names
+#   u  union names
+#   n  namespaces
+
+#   f  function definitions
+#   p  function prototypes [off]
+#   d  macro definitions
+
+#   e  enumerators (values inside an enumeration)
+#   m  class, struct, and union members
+#   v  variable definitions
+
+#   l  local variables [off]
+#   x  external and forward variable declarations [off]
+
+BEGIN {
+	FS = "\t"
+
+	sort = "LC_ALL=C sort"
+
+	# our sort order for C kinds:
+	order["c"] = "A"
+	order["s"] = "B"
+	order["t"] = "C"
+	order["g"] = "D"
+	order["u"] = "E"
+	order["n"] = "F"
+	order["f"] = "G"
+	order["p"] = "H"
+	order["d"] = "I"
+	order["e"] = "J"
+	order["m"] = "K"
+	order["v"] = "L"
+	order["l"] = "M"
+	order["x"] = "N"
+}
+
+# pass through header
+/^!_TAG/ {
+	print $0
+	next
+}
+
+{
+	# find 'kinds'
+	for (i = 1; i <= NF; i++) {
+		if ($i ~ /;"$/) {
+			kind = $(i+1)
+			break;
+		}
+	}
+
+	# create sort key
+	if (order[kind])
+		key = $1 order[kind];
+	else
+		key = $1 "Z";
+
+	# get it sorted
+	print key "\t" $0 |& sort
+}
+
+END {
+	close(sort, "to")
+	while ((sort |& getline) > 0) {
+		# strip key
+		sub(/[^[:space:]]*[[:space:]]*/, "")
+		print $0
+	}
+	close(sort)
+}
+
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 4e18ae5282a6..93d729392a7b 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -251,6 +251,8 @@ setup_regex()
 
 exuberant()
 {
+	rm -f tags.unsorted
+
 	setup_regex exuberant asm c
 	all_target_sources | xargs $1 -a                        \
 	-I __initdata,__exitdata,__initconst,__ro_after_init	\
@@ -266,12 +268,16 @@ exuberant()
 	-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
 	-I static,const						\
 	--extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
+	--sort=no -o tags.unsorted				\
 	"${regex[@]}"
 
 	setup_regex exuberant kconfig
 	all_kconfigs | xargs $1 -a                              \
-	--langdef=kconfig --language-force=kconfig "${regex[@]}"
+	--langdef=kconfig --language-force=kconfig --sort=no	\
+	-o tags.unsorted "${regex[@]}"
 
+	scripts/sort-tags.awk tags.unsorted > tags
+	rm -f tags.unsorted
 }
 
 emacs()


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-08-05 10:25 [PATCH] scipts/tags.sh: Add custom sort order peterz
@ 2020-08-06 12:04 ` peterz
  2020-08-26 10:20   ` peterz
  2020-09-02 15:58   ` Masahiro Yamada
  0 siblings, 2 replies; 9+ messages in thread
From: peterz @ 2020-08-06 12:04 UTC (permalink / raw)
  To: yamada.masahiro; +Cc: linux-kernel, linux-kbuild


One long standing annoyance I have with using vim-tags is that our tags
file is not properly sorted. That is, the sorting exhuberant Ctags does
is only on the tag itself.

The problem with that is that, for example, the tag 'mutex' appears a
mere 505 times, 492 of those are structure members. However it is _far_
more likely that someone wants the struct definition when looking for
the mutex tag than any of those members. However, due to the nature of
the sorting, the struct definition will not be first.

So add a script that does a custom sort of the tags file, taking the tag
kind into account.

The kind ordering is roughly: 'type', 'function', 'macro', 'enum', rest.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
Changes since v1:
 - removed the need for tags.unsorted by using a pipe

Due to this change 'make tags' is now actually faster than it was before
due to less sorting.

 scripts/sort-tags.awk | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++
 scripts/tags.sh       | 11 +++++--
 2 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/scripts/sort-tags.awk b/scripts/sort-tags.awk
new file mode 100755
index 000000000000..1eb50406c9d3
--- /dev/null
+++ b/scripts/sort-tags.awk
@@ -0,0 +1,79 @@
+#!/usr/bin/awk -f
+
+# $ ctags --list-kinds
+# C
+#   c  classes
+#   s  structure names
+#   t  typedefs
+#   g  enumeration names
+#   u  union names
+#   n  namespaces
+
+#   f  function definitions
+#   p  function prototypes [off]
+#   d  macro definitions
+
+#   e  enumerators (values inside an enumeration)
+#   m  class, struct, and union members
+#   v  variable definitions
+
+#   l  local variables [off]
+#   x  external and forward variable declarations [off]
+
+BEGIN {
+	FS = "\t"
+
+	sort = "LC_ALL=C sort"
+
+	# our sort order for C kinds:
+	order["c"] = "A"
+	order["s"] = "B"
+	order["t"] = "C"
+	order["g"] = "D"
+	order["u"] = "E"
+	order["n"] = "F"
+	order["f"] = "G"
+	order["p"] = "H"
+	order["d"] = "I"
+	order["e"] = "J"
+	order["m"] = "K"
+	order["v"] = "L"
+	order["l"] = "M"
+	order["x"] = "N"
+}
+
+# pass through header
+/^!_TAG/ {
+	print $0
+	next
+}
+
+{
+	# find 'kinds'
+	for (i = 1; i <= NF; i++) {
+		if ($i ~ /;"$/) {
+			kind = $(i+1)
+			break;
+		}
+	}
+
+	# create sort key
+	if (order[kind])
+		key = $1 order[kind];
+	else
+		key = $1 "Z";
+
+	# get it sorted
+	print key "\t" $0 |& sort
+}
+
+END {
+	close(sort, "to")
+	while ((sort |& getline) > 0) {
+		# strip key
+		sub(/[^[:space:]]*[[:space:]]*/, "")
+		print $0
+	}
+	close(sort)
+}
+
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 4e18ae5282a6..51087c3d8b1e 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -251,8 +251,10 @@ setup_regex()
 
 exuberant()
 {
+	(
+
 	setup_regex exuberant asm c
-	all_target_sources | xargs $1 -a                        \
+	all_target_sources | xargs $1				\
 	-I __initdata,__exitdata,__initconst,__ro_after_init	\
 	-I __initdata_memblock					\
 	-I __refdata,__attribute,__maybe_unused,__always_unused \
@@ -266,12 +268,15 @@ exuberant()
 	-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
 	-I static,const						\
 	--extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
+	--sort=no -o -						\
 	"${regex[@]}"
 
 	setup_regex exuberant kconfig
-	all_kconfigs | xargs $1 -a                              \
-	--langdef=kconfig --language-force=kconfig "${regex[@]}"
+	all_kconfigs | xargs $1					\
+	--langdef=kconfig --language-force=kconfig --sort=no	\
+	-o - "${regex[@]}"
 
+	) | scripts/sort-tags.awk > tags
 }
 
 emacs()

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-08-06 12:04 ` [PATCH -v2] " peterz
@ 2020-08-26 10:20   ` peterz
  2020-09-02 15:58   ` Masahiro Yamada
  1 sibling, 0 replies; 9+ messages in thread
From: peterz @ 2020-08-26 10:20 UTC (permalink / raw)
  To: yamada.masahiro; +Cc: linux-kernel, linux-kbuild

On Thu, Aug 06, 2020 at 02:04:38PM +0200, peterz@infradead.org wrote:
> 
> One long standing annoyance I have with using vim-tags is that our tags
> file is not properly sorted. That is, the sorting exhuberant Ctags does
> is only on the tag itself.
> 
> The problem with that is that, for example, the tag 'mutex' appears a
> mere 505 times, 492 of those are structure members. However it is _far_
> more likely that someone wants the struct definition when looking for
> the mutex tag than any of those members. However, due to the nature of
> the sorting, the struct definition will not be first.
> 
> So add a script that does a custom sort of the tags file, taking the tag
> kind into account.
> 
> The kind ordering is roughly: 'type', 'function', 'macro', 'enum', rest.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---

ping?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-08-06 12:04 ` [PATCH -v2] " peterz
  2020-08-26 10:20   ` peterz
@ 2020-09-02 15:58   ` Masahiro Yamada
  2020-09-02 16:10     ` Masahiro Yamada
  2020-09-02 16:26     ` peterz
  1 sibling, 2 replies; 9+ messages in thread
From: Masahiro Yamada @ 2020-09-02 15:58 UTC (permalink / raw)
  To: Peter Zijlstra (Intel)
  Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

[-- Attachment #1: Type: text/plain, Size: 5895 bytes --]

On Fri, Aug 7, 2020 at 2:28 AM <peterz@infradead.org> wrote:
>
>
> One long standing annoyance I have with using vim-tags is that our tags
> file is not properly sorted. That is, the sorting exhuberant Ctags does
> is only on the tag itself.
>
> The problem with that is that, for example, the tag 'mutex' appears a
> mere 505 times, 492 of those are structure members. However it is _far_
> more likely that someone wants the struct definition when looking for
> the mutex tag than any of those members. However, due to the nature of
> the sorting, the struct definition will not be first.
>
> So add a script that does a custom sort of the tags file, taking the tag
> kind into account.
>
> The kind ordering is roughly: 'type', 'function', 'macro', 'enum', rest.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> Changes since v1:
>  - removed the need for tags.unsorted by using a pipe
>
> Due to this change 'make tags' is now actually faster than it was before
> due to less sorting.
>
>  scripts/sort-tags.awk | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  scripts/tags.sh       | 11 +++++--
>  2 files changed, 87 insertions(+), 3 deletions(-)
>
> diff --git a/scripts/sort-tags.awk b/scripts/sort-tags.awk
> new file mode 100755
> index 000000000000..1eb50406c9d3
> --- /dev/null
> +++ b/scripts/sort-tags.awk
> @@ -0,0 +1,79 @@
> +#!/usr/bin/awk -f
> +
> +# $ ctags --list-kinds
> +# C
> +#   c  classes
> +#   s  structure names
> +#   t  typedefs
> +#   g  enumeration names
> +#   u  union names
> +#   n  namespaces
> +
> +#   f  function definitions
> +#   p  function prototypes [off]
> +#   d  macro definitions
> +
> +#   e  enumerators (values inside an enumeration)
> +#   m  class, struct, and union members
> +#   v  variable definitions
> +
> +#   l  local variables [off]
> +#   x  external and forward variable declarations [off]
> +
> +BEGIN {
> +       FS = "\t"
> +
> +       sort = "LC_ALL=C sort"
> +
> +       # our sort order for C kinds:
> +       order["c"] = "A"
> +       order["s"] = "B"
> +       order["t"] = "C"
> +       order["g"] = "D"
> +       order["u"] = "E"
> +       order["n"] = "F"
> +       order["f"] = "G"
> +       order["p"] = "H"
> +       order["d"] = "I"
> +       order["e"] = "J"
> +       order["m"] = "K"
> +       order["v"] = "L"
> +       order["l"] = "M"
> +       order["x"] = "N"
> +}
> +
> +# pass through header
> +/^!_TAG/ {
> +       print $0
> +       next
> +}
> +
> +{
> +       # find 'kinds'
> +       for (i = 1; i <= NF; i++) {
> +               if ($i ~ /;"$/) {
> +                       kind = $(i+1)
> +                       break;
> +               }
> +       }
> +
> +       # create sort key
> +       if (order[kind])
> +               key = $1 order[kind];
> +       else
> +               key = $1 "Z";
> +
> +       # get it sorted
> +       print key "\t" $0 |& sort
> +}
> +
> +END {
> +       close(sort, "to")
> +       while ((sort |& getline) > 0) {
> +               # strip key
> +               sub(/[^[:space:]]*[[:space:]]*/, "")
> +               print $0
> +       }
> +       close(sort)
> +}
> +
> diff --git a/scripts/tags.sh b/scripts/tags.sh
> index 4e18ae5282a6..51087c3d8b1e 100755
> --- a/scripts/tags.sh
> +++ b/scripts/tags.sh
> @@ -251,8 +251,10 @@ setup_regex()
>
>  exuberant()
>  {
> +       (
> +
>         setup_regex exuberant asm c
> -       all_target_sources | xargs $1 -a                        \
> +       all_target_sources | xargs $1                           \
>         -I __initdata,__exitdata,__initconst,__ro_after_init    \
>         -I __initdata_memblock                                  \
>         -I __refdata,__attribute,__maybe_unused,__always_unused \
> @@ -266,12 +268,15 @@ exuberant()
>         -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
>         -I static,const                                         \
>         --extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
> +       --sort=no -o -                                          \
>         "${regex[@]}"
>
>         setup_regex exuberant kconfig
> -       all_kconfigs | xargs $1 -a                              \
> -       --langdef=kconfig --language-force=kconfig "${regex[@]}"
> +       all_kconfigs | xargs $1                                 \
> +       --langdef=kconfig --language-force=kconfig --sort=no    \
> +       -o - "${regex[@]}"
>
> +       ) | scripts/sort-tags.awk > tags
>  }
>
>  emacs()


Sorry for the long delay.

First, this patch breaks 'make TAGS'
if 'etags' is a symlink to exuberant ctags.


masahiro@oscar:~/ref/linux$ etags --version
Exuberant Ctags 5.9~svn20110310, Copyright (C) 1996-2009 Darren Hiebert
  Addresses: <dhiebert@users.sourceforge.net>, http://ctags.sourceforge.net
  Optional compiled features: +wildcards, +regex

masahiro@oscar:~/ref/linux$ make TAGS
  GEN     TAGS
etags: Warning: include/linux/seqlock.h:738: null expansion of name pattern "\2"
sed: can't read TAGS: No such file or directory
make: *** [Makefile:1820: TAGS] Error 2




The reason is the hard-coded ' > tags',
and easy to fix.



But, honestly, I am not super happy about this patch.

Reason 1
  In my understanding, sorting by the tag kind only works
  for ctags. My favorite editor is emacs.
  (Do not get me wrong. I do not intend emacs vs vi war).
  So, I rather do 'make TAGS' instead of 'make tags',
  but this solution would not work for etags because
  etags has a different format.
  So, I'd rather want to see a more general solution.

Reason 2
  We would have more messy code, mixing two files/languages



When is it useful to tag structure members?

If they are really annoying, why don't we delete them
instead of moving them to the bottom of the tag file?



I attached an alternative solution,
and wrote up my thoughts in the log.

What do you think?


-- 
Best Regards
Masahiro Yamada

[-- Attachment #2: 0001-scripts-tags.sh-remove-m-v-x-tag-kinds-from-exuberan.patch --]
[-- Type: text/x-patch, Size: 3966 bytes --]

From 1a003fce7e4f8460ef3256fb5d958fb5c6cc631e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 2 Sep 2020 21:49:59 +0900
Subject: [PATCH] scripts/tags.sh: remove m, v, x tag kinds from exuberant tags

Exuberant Ctags supports the following kinds of tags:

  $ ctags --list-kinds=c
  c  classes
  d  macro definitions
  e  enumerators (values inside an enumeration)
  f  function definitions
  g  enumeration names
  l  local variables [off]
  m  class, struct, and union members
  n  namespaces
  p  function prototypes [off]
  s  structure names
  t  typedefs
  u  union names
  v  variable definitions
  x  external and forward variable declarations [off]

This commit excludes 'm', 'v', and 'x'.

Peter Zijlstra states:
"The problem with that is that, for example, the tag 'mutex' appears a
mere 505 times, 492 of those are structure members. However it is _far_
more likely that someone wants the struct definition when looking for
the mutex tag than any of those members."
(https://lkml.org/lkml/2020/8/6/512")

So, 'm' is rather annoying than useful. For the same reason, it seems
better to turn off 'v'.

You may argue about the criteria, but we need to draw a line somewhere
to make it reasonable for the majority of people.

We flipped 'p' and 'x' in the past:

[1] commit f6333eb4e788 ("kbuild: Add ctags support for function
    prototypes and external variable declarations") added 'p' and 'x',
    but did not explain when they are actually useful.

[2] commit 7db86dc97fb0 ("ctags: usability fix") removed 'p' and 'x',
    stating both of them make no real sense.

[3] commit 0a18a9386c05 ("tags: put function prototypes back!")
    re-added 'p' and 'x', but the commit log only mentioned 'p'.

OK, [3] clearly explained why 'p' is useful, but turned --c-kinds=+px
into --c-kinds=-px. So, 'x' was also (accidentally?) disabled. I think
it should have been --c-kinds=+p-x, or more simply --c-kinds=+p since
'x' is off by default.

It seems a bug of [3], so I disabled 'x' to get back the pre-[3]
behavior.

'make tags' and 'make TAGS' will run faster, create much smaller tags
if Ctags is exuberant.

Reviewed-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/tags.sh | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/scripts/tags.sh b/scripts/tags.sh
index 32d3f53af10b..440f4ecad43b 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -179,19 +179,6 @@ regex_c=(
 	'/^DEF_PCI_AC_\(\|NO\)RET(\([[:alnum:]_]*\).*/\2/'
 	'/^PCI_OP_READ(\(\w*\).*[1-4])/pci_bus_read_config_\1/'
 	'/^PCI_OP_WRITE(\(\w*\).*[1-4])/pci_bus_write_config_\1/'
-	'/\<DEFINE_\(RT_MUTEX\|MUTEX\|SEMAPHORE\|SPINLOCK\)(\([[:alnum:]_]*\)/\2/v/'
-	'/\<DEFINE_\(RAW_SPINLOCK\|RWLOCK\|SEQLOCK\)(\([[:alnum:]_]*\)/\2/v/'
-	'/\<DECLARE_\(RWSEM\|COMPLETION\)(\([[:alnum:]_]\+\)/\2/v/'
-	'/\<DECLARE_BITMAP(\([[:alnum:]_]*\)/\1/v/'
-	'/\(^\|\s\)\(\|L\|H\)LIST_HEAD(\([[:alnum:]_]*\)/\3/v/'
-	'/\(^\|\s\)RADIX_TREE(\([[:alnum:]_]*\)/\2/v/'
-	'/\<DEFINE_PER_CPU([^,]*, *\([[:alnum:]_]*\)/\1/v/'
-	'/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*, *\([[:alnum:]_]*\)/\1/v/'
-	'/\<DECLARE_WAIT_QUEUE_HEAD(\([[:alnum:]_]*\)/\1/v/'
-	'/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)(\([[:alnum:]_]*\)/\2/v/'
-	'/\(^\s\)OFFSET(\([[:alnum:]_]*\)/\2/v/'
-	'/\(^\s\)DEFINE(\([[:alnum:]_]*\)/\2/v/'
-	'/\<\(DEFINE\|DECLARE\)_HASHTABLE(\([[:alnum:]_]*\)/\2/v/'
 	'/\<DEFINE_ID\(R\|A\)(\([[:alnum:]_]\+\)/\2/'
 	'/\<DEFINE_WD_CLASS(\([[:alnum:]_]\+\)/\1/'
 	'/\<ATOMIC_NOTIFIER_HEAD(\([[:alnum:]_]\+\)/\1/'
@@ -255,7 +242,7 @@ exuberant()
 	-I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL,ACPI_EXPORT_SYMBOL   \
 	-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
 	-I static,const						\
-	--extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
+	--extra=+fq --c-kinds=+p-mv --fields=+iaS --langmap=c:+.h \
 	"${regex[@]}"
 
 	setup_regex exuberant kconfig
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-09-02 15:58   ` Masahiro Yamada
@ 2020-09-02 16:10     ` Masahiro Yamada
  2020-09-02 16:26     ` peterz
  1 sibling, 0 replies; 9+ messages in thread
From: Masahiro Yamada @ 2020-09-02 16:10 UTC (permalink / raw)
  To: Peter Zijlstra (Intel)
  Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

On Thu, Sep 3, 2020 at 12:58 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Aug 7, 2020 at 2:28 AM <peterz@infradead.org> wrote:
> >
> >
> > One long standing annoyance I have with using vim-tags is that our tags
> > file is not properly sorted. That is, the sorting exhuberant Ctags does
> > is only on the tag itself.
> >
> > The problem with that is that, for example, the tag 'mutex' appears a
> > mere 505 times, 492 of those are structure members. However it is _far_
> > more likely that someone wants the struct definition when looking for
> > the mutex tag than any of those members. However, due to the nature of
> > the sorting, the struct definition will not be first.
> >
> > So add a script that does a custom sort of the tags file, taking the tag
> > kind into account.
> >
> > The kind ordering is roughly: 'type', 'function', 'macro', 'enum', rest.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> > Changes since v1:
> >  - removed the need for tags.unsorted by using a pipe
> >
> > Due to this change 'make tags' is now actually faster than it was before
> > due to less sorting.
> >
> >  scripts/sort-tags.awk | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  scripts/tags.sh       | 11 +++++--
> >  2 files changed, 87 insertions(+), 3 deletions(-)
> >
> > diff --git a/scripts/sort-tags.awk b/scripts/sort-tags.awk
> > new file mode 100755
> > index 000000000000..1eb50406c9d3
> > --- /dev/null
> > +++ b/scripts/sort-tags.awk
> > @@ -0,0 +1,79 @@
> > +#!/usr/bin/awk -f
> > +
> > +# $ ctags --list-kinds
> > +# C
> > +#   c  classes
> > +#   s  structure names
> > +#   t  typedefs
> > +#   g  enumeration names
> > +#   u  union names
> > +#   n  namespaces
> > +
> > +#   f  function definitions
> > +#   p  function prototypes [off]
> > +#   d  macro definitions
> > +
> > +#   e  enumerators (values inside an enumeration)
> > +#   m  class, struct, and union members
> > +#   v  variable definitions
> > +
> > +#   l  local variables [off]
> > +#   x  external and forward variable declarations [off]
> > +
> > +BEGIN {
> > +       FS = "\t"
> > +
> > +       sort = "LC_ALL=C sort"
> > +
> > +       # our sort order for C kinds:
> > +       order["c"] = "A"
> > +       order["s"] = "B"
> > +       order["t"] = "C"
> > +       order["g"] = "D"
> > +       order["u"] = "E"
> > +       order["n"] = "F"
> > +       order["f"] = "G"
> > +       order["p"] = "H"
> > +       order["d"] = "I"
> > +       order["e"] = "J"
> > +       order["m"] = "K"
> > +       order["v"] = "L"
> > +       order["l"] = "M"
> > +       order["x"] = "N"
> > +}
> > +
> > +# pass through header
> > +/^!_TAG/ {
> > +       print $0
> > +       next
> > +}
> > +
> > +{
> > +       # find 'kinds'
> > +       for (i = 1; i <= NF; i++) {
> > +               if ($i ~ /;"$/) {
> > +                       kind = $(i+1)
> > +                       break;
> > +               }
> > +       }
> > +
> > +       # create sort key
> > +       if (order[kind])
> > +               key = $1 order[kind];
> > +       else
> > +               key = $1 "Z";
> > +
> > +       # get it sorted
> > +       print key "\t" $0 |& sort
> > +}
> > +
> > +END {
> > +       close(sort, "to")
> > +       while ((sort |& getline) > 0) {
> > +               # strip key
> > +               sub(/[^[:space:]]*[[:space:]]*/, "")
> > +               print $0
> > +       }
> > +       close(sort)
> > +}
> > +
> > diff --git a/scripts/tags.sh b/scripts/tags.sh
> > index 4e18ae5282a6..51087c3d8b1e 100755
> > --- a/scripts/tags.sh
> > +++ b/scripts/tags.sh
> > @@ -251,8 +251,10 @@ setup_regex()
> >
> >  exuberant()
> >  {
> > +       (
> > +
> >         setup_regex exuberant asm c
> > -       all_target_sources | xargs $1 -a                        \
> > +       all_target_sources | xargs $1                           \
> >         -I __initdata,__exitdata,__initconst,__ro_after_init    \
> >         -I __initdata_memblock                                  \
> >         -I __refdata,__attribute,__maybe_unused,__always_unused \
> > @@ -266,12 +268,15 @@ exuberant()
> >         -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
> >         -I static,const                                         \
> >         --extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
> > +       --sort=no -o -                                          \
> >         "${regex[@]}"
> >
> >         setup_regex exuberant kconfig
> > -       all_kconfigs | xargs $1 -a                              \
> > -       --langdef=kconfig --language-force=kconfig "${regex[@]}"
> > +       all_kconfigs | xargs $1                                 \
> > +       --langdef=kconfig --language-force=kconfig --sort=no    \
> > +       -o - "${regex[@]}"
> >
> > +       ) | scripts/sort-tags.awk > tags
> >  }
> >
> >  emacs()
>
>
> Sorry for the long delay.
>
> First, this patch breaks 'make TAGS'
> if 'etags' is a symlink to exuberant ctags.
>
>
> masahiro@oscar:~/ref/linux$ etags --version
> Exuberant Ctags 5.9~svn20110310, Copyright (C) 1996-2009 Darren Hiebert
>   Addresses: <dhiebert@users.sourceforge.net>, http://ctags.sourceforge.net
>   Optional compiled features: +wildcards, +regex
>
> masahiro@oscar:~/ref/linux$ make TAGS
>   GEN     TAGS
> etags: Warning: include/linux/seqlock.h:738: null expansion of name pattern "\2"
> sed: can't read TAGS: No such file or directory
> make: *** [Makefile:1820: TAGS] Error 2
>
>
>
>
> The reason is the hard-coded ' > tags',
> and easy to fix.
>
>
>
> But, honestly, I am not super happy about this patch.
>
> Reason 1
>   In my understanding, sorting by the tag kind only works
>   for ctags. My favorite editor is emacs.
>   (Do not get me wrong. I do not intend emacs vs vi war).
>   So, I rather do 'make TAGS' instead of 'make tags',
>   but this solution would not work for etags because
>   etags has a different format.
>   So, I'd rather want to see a more general solution.
>
> Reason 2
>   We would have more messy code, mixing two files/languages
>
>
>
> When is it useful to tag structure members?
>
> If they are really annoying, why don't we delete them
> instead of moving them to the bottom of the tag file?
>
>
>
> I attached an alternative solution,
> and wrote up my thoughts in the log.
>
> What do you think?
>



Sorry, the commit log of the attachment was wrong.

The correct sentence is:

"OK, [3] clearly explained why 'p' is useful, but turned --c-kinds=-px
into --c-kinds=+px. So, 'x' was also (accidentally?) enabled."



-- 
Best Regards
Masahiro Yamada

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-09-02 15:58   ` Masahiro Yamada
  2020-09-02 16:10     ` Masahiro Yamada
@ 2020-09-02 16:26     ` peterz
  2020-09-03  2:07       ` Masahiro Yamada
  1 sibling, 1 reply; 9+ messages in thread
From: peterz @ 2020-09-02 16:26 UTC (permalink / raw)
  To: Masahiro Yamada; +Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

On Thu, Sep 03, 2020 at 12:58:14AM +0900, Masahiro Yamada wrote:

> Sorry for the long delay.
> 
> First, this patch breaks 'make TAGS'
> if 'etags' is a symlink to exuberant ctags.
> 
> 
> masahiro@oscar:~/ref/linux$ etags --version
> Exuberant Ctags 5.9~svn20110310, Copyright (C) 1996-2009 Darren Hiebert
>   Addresses: <dhiebert@users.sourceforge.net>, http://ctags.sourceforge.net
>   Optional compiled features: +wildcards, +regex
> 
> masahiro@oscar:~/ref/linux$ make TAGS
>   GEN     TAGS
> etags: Warning: include/linux/seqlock.h:738: null expansion of name pattern "\2"
> sed: can't read TAGS: No such file or directory
> make: *** [Makefile:1820: TAGS] Error 2
> 
> The reason is the hard-coded ' > tags',
> and easy to fix.

Ah, my bad, I forgot to check.

> But, honestly, I am not super happy about this patch.
> 
> Reason 1
>   In my understanding, sorting by the tag kind only works
>   for ctags. My favorite editor is emacs.
>   (Do not get me wrong. I do not intend emacs vs vi war).
>   So, I rather do 'make TAGS' instead of 'make tags',
>   but this solution would not work for etags because
>   etags has a different format.
>   So, I'd rather want to see a more general solution.

It might be possible that emacs' tags implementation can already do this
natively. Initially I tried to fix this in vim, with a macro, but I
couldn't get access to the 'kind' tag.

> Reason 2
>   We would have more messy code, mixing two files/languages

I could try and write the whole thing in bash I suppose.

> When is it useful to tag structure members?

Often, just not when there is a naming conflict.

> If they are really annoying, why don't we delete them
> instead of moving them to the bottom of the tag file?

Because they're really useful :-)

> I attached an alternative solution,
> and wrote up my thoughts in the log.
> 
> What do you think?

> Exuberant Ctags supports the following kinds of tags:
> 
>   $ ctags --list-kinds=c
>   c  classes
>   d  macro definitions
>   e  enumerators (values inside an enumeration)
>   f  function definitions
>   g  enumeration names
>   l  local variables [off]
>   m  class, struct, and union members
>   n  namespaces
>   p  function prototypes [off]
>   s  structure names
>   t  typedefs
>   u  union names
>   v  variable definitions
>   x  external and forward variable declarations [off]
> 
> This commit excludes 'm', 'v', and 'x'.

So my main beef is with m vs s conflicts (they're pretty prevalent),
removing v is insane, but even removing m is undesired IMO.

> Reviewed-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Very much not I'm afraid. I really do like my tags, it's just that I'd
like to have a set precedence when there's a naming conflict.

My claim is that a structure definition is more interesting than a
member variable, not that member variables are not interesting.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-09-02 16:26     ` peterz
@ 2020-09-03  2:07       ` Masahiro Yamada
  2020-09-03  7:26         ` peterz
  0 siblings, 1 reply; 9+ messages in thread
From: Masahiro Yamada @ 2020-09-03  2:07 UTC (permalink / raw)
  To: Peter Zijlstra (Intel)
  Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

On Thu, Sep 3, 2020 at 1:26 AM <peterz@infradead.org> wrote:
>
> On Thu, Sep 03, 2020 at 12:58:14AM +0900, Masahiro Yamada wrote:
>
> > Sorry for the long delay.
> >
> > First, this patch breaks 'make TAGS'
> > if 'etags' is a symlink to exuberant ctags.
> >
> >
> > masahiro@oscar:~/ref/linux$ etags --version
> > Exuberant Ctags 5.9~svn20110310, Copyright (C) 1996-2009 Darren Hiebert
> >   Addresses: <dhiebert@users.sourceforge.net>, http://ctags.sourceforge.net
> >   Optional compiled features: +wildcards, +regex
> >
> > masahiro@oscar:~/ref/linux$ make TAGS
> >   GEN     TAGS
> > etags: Warning: include/linux/seqlock.h:738: null expansion of name pattern "\2"
> > sed: can't read TAGS: No such file or directory
> > make: *** [Makefile:1820: TAGS] Error 2
> >
> > The reason is the hard-coded ' > tags',
> > and easy to fix.
>
> Ah, my bad, I forgot to check.
>
> > But, honestly, I am not super happy about this patch.
> >
> > Reason 1
> >   In my understanding, sorting by the tag kind only works
> >   for ctags. My favorite editor is emacs.
> >   (Do not get me wrong. I do not intend emacs vs vi war).
> >   So, I rather do 'make TAGS' instead of 'make tags',
> >   but this solution would not work for etags because
> >   etags has a different format.
> >   So, I'd rather want to see a more general solution.
>
> It might be possible that emacs' tags implementation can already do this
> natively. Initially I tried to fix this in vim, with a macro, but I
> couldn't get access to the 'kind' tag.
>
> > Reason 2
> >   We would have more messy code, mixing two files/languages
>
> I could try and write the whole thing in bash I suppose.
>
> > When is it useful to tag structure members?
>
> Often, just not when there is a naming conflict.
>
> > If they are really annoying, why don't we delete them
> > instead of moving them to the bottom of the tag file?
>
> Because they're really useful :-)
>
> > I attached an alternative solution,
> > and wrote up my thoughts in the log.
> >
> > What do you think?
>
> > Exuberant Ctags supports the following kinds of tags:
> >
> >   $ ctags --list-kinds=c
> >   c  classes
> >   d  macro definitions
> >   e  enumerators (values inside an enumeration)
> >   f  function definitions
> >   g  enumeration names
> >   l  local variables [off]
> >   m  class, struct, and union members
> >   n  namespaces
> >   p  function prototypes [off]
> >   s  structure names
> >   t  typedefs
> >   u  union names
> >   v  variable definitions
> >   x  external and forward variable declarations [off]
> >
> > This commit excludes 'm', 'v', and 'x'.
>
> So my main beef is with m vs s conflicts (they're pretty prevalent),
> removing v is insane, but even removing m is undesired IMO.
>
> > Reviewed-by: Peter Zijlstra (Intel) <peterz@infradead.org>


Sorry, I intended Reported-by, not Reviewed-by.


> Very much not I'm afraid. I really do like my tags, it's just that I'd
> like to have a set precedence when there's a naming conflict.
>
> My claim is that a structure definition is more interesting than a
> member variable, not that member variables are not interesting.



OK, but is there any idea
to make the code cleaner and easier to maintain?


People play with whatever they want to do in this script.



f81b1be40c44b33b9706d64c117edd29e627ad12
introduced file-level ordering.


4f491bb6ea2aef2f5b184f385904a73796d98554
broke it.
(I pointed it out in the review,
but akpm picked it anyway.)


Now, here is the tag-level ordering
(only for exuberant ctags).



Contributors stop caring after their code is merged,
but maintaining it is tiring.


Will re-implementing your sorting logic
in bash look cleaner?

Or, in hindsight, we should have used python or perl?


--
Best Regards
Masahiro Yamada

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-09-03  2:07       ` Masahiro Yamada
@ 2020-09-03  7:26         ` peterz
  2020-09-04 14:53           ` peterz
  0 siblings, 1 reply; 9+ messages in thread
From: peterz @ 2020-09-03  7:26 UTC (permalink / raw)
  To: Masahiro Yamada; +Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

On Thu, Sep 03, 2020 at 11:07:28AM +0900, Masahiro Yamada wrote:

> Contributors stop caring after their code is merged,
> but maintaining it is tiring.

This seems to hold in general :/

> Will re-implementing your sorting logic
> in bash look cleaner?

Possibly, I can try, we'll see.

> Or, in hindsight, we should have used python or perl?

I don't speak either :-/.

I googled to see if there is a python/perl ctags implementation we can
'borrow' and found https://github.com/universal-ctags/ctags instead.
That seems to be a continuation of exhuberant ctags, I can also try if
they're interested in --sort-kinds or something like that.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH -v2] scipts/tags.sh: Add custom sort order
  2020-09-03  7:26         ` peterz
@ 2020-09-04 14:53           ` peterz
  0 siblings, 0 replies; 9+ messages in thread
From: peterz @ 2020-09-04 14:53 UTC (permalink / raw)
  To: Masahiro Yamada; +Cc: Linux Kernel Mailing List, Linux Kbuild mailing list

On Thu, Sep 03, 2020 at 09:26:04AM +0200, peterz@infradead.org wrote:
> On Thu, Sep 03, 2020 at 11:07:28AM +0900, Masahiro Yamada wrote:

> > Will re-implementing your sorting logic
> > in bash look cleaner?
> 
> Possibly, I can try, we'll see.

It is somewhat cleaner, but it is _abysmally_ slow. Bash sucks :-(

It is still broken in all the same ways as before, I figured I'd get it
'working' first.

---
diff --git a/scripts/tags.sh b/scripts/tags.sh
index 32d3f53af10b..ec2688b3441a 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -239,10 +239,65 @@ setup_regex()
 	done
 }
 
+sort_tags()
+{
+	export LC_ALL=C
+
+	# start concurrent sort
+	coproc sort
+	# HACK, clone sort output into 3 to ensure we can still read it
+	# after sort terminates
+	exec 3<&${COPROC[0]}
+
+	while read tag file rest;
+	do
+		local tmp=${rest#*;\"}
+
+		case "${tmp:1:1}" in # Precedence for 'C' kinds
+
+		c) order="A";; # classes
+		s) order="B";; # structure names
+		t) order="C";; # typedefs
+		g) order="D";; # enumeration names
+		u) order="E";; # union names
+		n) order="F";; # namespaces
+
+		f) order="G";; # function definitions
+		p) order="H";; # function prototypes
+		d) order="I";; # macro definitions
+
+		e) order="J";; # enumerators (values inside an enumeration)
+		m) order="K";; # class, struct and union members
+		v) order="L";; # variable definitions
+
+		l) order="M";; # local variables [off]
+		x) order="N";; # external and forward variable declarations
+
+		*) order="Z";;
+
+		esac
+
+		# write to sort with a new sort-key prepended
+		echo "${tag}${order}	${tag}	${file}	${rest}" >&${COPROC[1]}
+	done
+
+	# close sort input
+	exec {COPROC[1]}>&-
+
+	# consume sort output
+	while read -u 3 key line;
+	do
+		# strip the sort-key
+		echo "${line}"
+	done
+}
+
 exuberant()
 {
+	(
+
 	setup_regex exuberant asm c
-	all_target_sources | xargs $1 -a                        \
+	all_target_sources | xargs $1                           \
 	-I __initdata,__exitdata,__initconst,__ro_after_init	\
 	-I __initdata_memblock					\
 	-I __refdata,__attribute,__maybe_unused,__always_unused \
@@ -256,12 +311,16 @@ exuberant()
 	-I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \
 	-I static,const						\
 	--extra=+fq --c-kinds=+px --fields=+iaS --langmap=c:+.h \
+	--sort=no -o -						\
 	"${regex[@]}"
 
 	setup_regex exuberant kconfig
-	all_kconfigs | xargs $1 -a                              \
+	all_kconfigs | xargs $1                                 \
+	--sort=no -o -						\
 	--langdef=kconfig --language-force=kconfig "${regex[@]}"
 
+	) | sort_tags > tags
+
 }
 
 emacs()

^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-09-04 14:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-05 10:25 [PATCH] scipts/tags.sh: Add custom sort order peterz
2020-08-06 12:04 ` [PATCH -v2] " peterz
2020-08-26 10:20   ` peterz
2020-09-02 15:58   ` Masahiro Yamada
2020-09-02 16:10     ` Masahiro Yamada
2020-09-02 16:26     ` peterz
2020-09-03  2:07       ` Masahiro Yamada
2020-09-03  7:26         ` peterz
2020-09-04 14:53           ` peterz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).