public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check
@ 2024-10-08  7:44 Tom de Vries
  2024-10-08  7:44 ` [PATCH 2/3] [gdb/contrib] Speed up " Tom de Vries
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Tom de Vries @ 2024-10-08  7:44 UTC (permalink / raw)
  To: gdb-patches

Add a new option --check to gdb/contrib/spellcheck.sh, to do the spell
check and bail out ASAP with an exit code of 1 if misspelled words were
found, or 0 otherwise.

Verified with shellcheck.
---
 gdb/contrib/spellcheck.sh | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh
index 4203333b846..3188734331c 100755
--- a/gdb/contrib/spellcheck.sh
+++ b/gdb/contrib/spellcheck.sh
@@ -76,7 +76,7 @@ sed_separator=$(join $sed_or "${sed_separators[@]}")
 
 usage ()
 {
-    echo "usage: $(basename "$0") <file|dir>+"
+    echo "usage: $(basename "$0") [--check] <file|dir>+"
 }
 
 make_absolute ()
@@ -101,6 +101,18 @@ parse_args ()
     files=$(mktemp)
     trap 'rm -f "$files"' EXIT
 
+    while true; do
+	case " $1 " in
+	    " --check ")
+		check=true
+		shift
+		;;
+	    *)
+		break
+		;;
+	esac
+    done
+
     if [ $# -eq -0 ]; then
 	usage
 	exit 1
@@ -311,6 +323,7 @@ replace_word_in_files ()
 main ()
 {
     declare -a unique_files
+    check=false
     parse_args "$@"
 
     get_dictionary
@@ -329,6 +342,10 @@ main ()
 	return
     fi
 
+    if $check; then
+	exit 1
+    fi
+
     declare -A words_done
     local i word replacement
     i=0

base-commit: e808bbbe37dda4a3ec5872ff990090cc746ad7ac
-- 
2.35.3


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/3] [gdb/contrib] Speed up spellcheck.sh --check
  2024-10-08  7:44 [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom de Vries
@ 2024-10-08  7:44 ` Tom de Vries
  2024-11-07 14:55   ` Tom Tromey
  2024-10-08  7:44 ` [PATCH 3/3] [gdb] Add spell check pre-commit hook Tom de Vries
  2024-11-07 14:52 ` [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom Tromey
  2 siblings, 1 reply; 9+ messages in thread
From: Tom de Vries @ 2024-10-08  7:44 UTC (permalink / raw)
  To: gdb-patches

Speed up gdb/contrib/shellcheck.sh by caching the grep pattern.

Without cached grep pattern:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c

real    0m2,750s
user    0m0,013s
sys     0m0,032s
...
and with cached grep pattern:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c

real    0m0,192s
user    0m0,022s
sys     0m0,024s
...

Tested on aarch64-linux.
---
 gdb/contrib/spellcheck.sh | 49 +++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/gdb/contrib/spellcheck.sh b/gdb/contrib/spellcheck.sh
index 3188734331c..08c745aa92d 100755
--- a/gdb/contrib/spellcheck.sh
+++ b/gdb/contrib/spellcheck.sh
@@ -20,12 +20,14 @@
 # $ ./gdb/contrib/spellcheck.sh gdb*
 
 scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
+this_script=$scriptdir/$(basename "$0")
 
 url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
 cache_dir=$scriptdir/../../.git
 cache_file=wikipedia-common-misspellings.txt
 dictionary=$cache_dir/$cache_file
 local_dictionary=$scriptdir/common-misspellings.txt
+cache_file2=spell-check.pat1
 
 # Separators: space, slash, tab, colon, comma.
 declare -a grep_separators
@@ -191,21 +193,38 @@ parse_dictionary ()
 
 find_files_matching_words ()
 {
-    local pat
-    pat=""
-    for word in "${words[@]}"; do
-	if [ "$pat" = "" ]; then
-	    pat="$word"
-	else
-	    pat="$pat|$word"
-	fi
-    done
-    pat="($pat)"
-
-    local sep
-    sep=$grep_separator
-
-    pat="(^|$sep)$pat($sep|$)"
+    local cache_id
+    cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
+		 | md5sum  \
+		 | awk '{print $1}')
+
+    local patfile
+    patfile="$cache_dir/$cache_file2".$cache_id
+
+    if [ -f "$patfile" ]; then
+	pat=$(cat "$patfile")
+    else
+	rm -f "$cache_dir/$cache_file2".*
+
+	local pat
+	pat=""
+	for word in "${words[@]}"; do
+	    if [ "$pat" = "" ]; then
+		pat="$word"
+	    else
+		pat="$pat|$word"
+	    fi
+	done
+	pat="($pat)"
+
+	local sep
+	sep=$grep_separator
+
+	pat="(^|$sep)$pat($sep|$)"
+
+	echo "$pat" \
+	     > "$patfile"
+    fi
 
     grep -E \
 	-l \
-- 
2.35.3


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 3/3] [gdb] Add spell check pre-commit hook
  2024-10-08  7:44 [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom de Vries
  2024-10-08  7:44 ` [PATCH 2/3] [gdb/contrib] Speed up " Tom de Vries
@ 2024-10-08  7:44 ` Tom de Vries
  2024-10-21 13:08   ` [PING][PATCH " Tom de Vries
  2024-11-07 14:52 ` [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom Tromey
  2 siblings, 1 reply; 9+ messages in thread
From: Tom de Vries @ 2024-10-08  7:44 UTC (permalink / raw)
  To: gdb-patches

Add a call to gdb/contrib/spellcheck.sh --check in .pre-commit-config.yaml:
...
$ time git commit -a -m typo
black................................................(no files to check)Skipped
flake8...............................................(no files to check)Skipped
isort................................................(no files to check)Skipped
spell check..............................................................Failed
- hook id: spell-check
- exit code: 1

real    0m0,493s
user    0m0,258s
sys     0m0,318s
...

Tested on aarch64-linux.
---
 .pre-commit-config.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 87726aeb758..4a3fdd79f3b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,3 +22,10 @@ repos:
     - id: isort
       types_or: [file]
       files: 'gdb/.*\.py(\.in)?$'
+  - repo: local
+    hooks:
+    - id: spell-check
+      name: spell check
+      language: script
+      entry: ./gdb/contrib/spellcheck.sh --check
+      files: ^(gdb|gdbsupport|gdbserver)/
-- 
2.35.3


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PING][PATCH 3/3] [gdb] Add spell check pre-commit hook
  2024-10-08  7:44 ` [PATCH 3/3] [gdb] Add spell check pre-commit hook Tom de Vries
@ 2024-10-21 13:08   ` Tom de Vries
  2024-11-07 14:57     ` Tom Tromey
  0 siblings, 1 reply; 9+ messages in thread
From: Tom de Vries @ 2024-10-21 13:08 UTC (permalink / raw)
  To: gdb-patches

On 10/8/24 09:44, Tom de Vries wrote:
> Add a call to gdb/contrib/spellcheck.sh --check in .pre-commit-config.yaml:
> ...
> $ time git commit -a -m typo
> black................................................(no files to check)Skipped
> flake8...............................................(no files to check)Skipped
> isort................................................(no files to check)Skipped
> spell check..............................................................Failed
> - hook id: spell-check
> - exit code: 1
> 
> real    0m0,493s
> user    0m0,258s
> sys     0m0,318s
> ...
> 

I've committed the other patches from this series, but this one remains 
in review, so ... ping.

Thanks,
- Tom

> Tested on aarch64-linux.
> ---
>   .pre-commit-config.yaml | 7 +++++++
>   1 file changed, 7 insertions(+)
> 
> diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
> index 87726aeb758..4a3fdd79f3b 100644
> --- a/.pre-commit-config.yaml
> +++ b/.pre-commit-config.yaml
> @@ -22,3 +22,10 @@ repos:
>       - id: isort
>         types_or: [file]
>         files: 'gdb/.*\.py(\.in)?$'
> +  - repo: local
> +    hooks:
> +    - id: spell-check
> +      name: spell check
> +      language: script
> +      entry: ./gdb/contrib/spellcheck.sh --check
> +      files: ^(gdb|gdbsupport|gdbserver)/


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check
  2024-10-08  7:44 [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom de Vries
  2024-10-08  7:44 ` [PATCH 2/3] [gdb/contrib] Speed up " Tom de Vries
  2024-10-08  7:44 ` [PATCH 3/3] [gdb] Add spell check pre-commit hook Tom de Vries
@ 2024-11-07 14:52 ` Tom Tromey
  2 siblings, 0 replies; 9+ messages in thread
From: Tom Tromey @ 2024-11-07 14:52 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gdb-patches

>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:

Tom> Add a new option --check to gdb/contrib/spellcheck.sh, to do the spell
Tom> check and bail out ASAP with an exit code of 1 if misspelled words were
Tom> found, or 0 otherwise.

Tom> Verified with shellcheck.

Looks good to me.
Approved-By: Tom Tromey <tom@tromey.com>

Tom

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/3] [gdb/contrib] Speed up spellcheck.sh --check
  2024-10-08  7:44 ` [PATCH 2/3] [gdb/contrib] Speed up " Tom de Vries
@ 2024-11-07 14:55   ` Tom Tromey
  0 siblings, 0 replies; 9+ messages in thread
From: Tom Tromey @ 2024-11-07 14:55 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gdb-patches

>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:

Tom> Speed up gdb/contrib/shellcheck.sh by caching the grep pattern.
Tom> Without cached grep pattern:
Tom> ...
Tom> $ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c

Tom> real    0m2,750s
Tom> user    0m0,013s
Tom> sys     0m0,032s
Tom> ...
Tom> and with cached grep pattern:
Tom> ...
Tom> $ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c

Tom> real    0m0,192s
Tom> user    0m0,022s
Tom> sys     0m0,024s
Tom> ...

Tom> Tested on aarch64-linux.

Seems reasonable.
Approved-By: Tom Tromey <tom@tromey.com>

Tom

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PING][PATCH 3/3] [gdb] Add spell check pre-commit hook
  2024-10-21 13:08   ` [PING][PATCH " Tom de Vries
@ 2024-11-07 14:57     ` Tom Tromey
  2024-11-12 10:15       ` Tom de Vries
  0 siblings, 1 reply; 9+ messages in thread
From: Tom Tromey @ 2024-11-07 14:57 UTC (permalink / raw)
  To: Tom de Vries; +Cc: gdb-patches

>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:

Tom> I've committed the other patches from this series, but this one
Tom> remains in review, so ... ping.

Ah, now I see the others already went in.
Totally fine of course, I just should have read the whole thread first.

I am in favor of this change.  I think it's a good improvement.
However I wonder -- how long does it take to run the script?
I ask because it seems like it will be run on essentially every commit.
Also, I can't recall how pre-commit works -- is the script run on the
entire tree or just the files being modified?

Assuming everything is ok though I think this should go in.
Approved-By: Tom Tromey <tom@tromey.com>

Tom

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PING][PATCH 3/3] [gdb] Add spell check pre-commit hook
  2024-11-07 14:57     ` Tom Tromey
@ 2024-11-12 10:15       ` Tom de Vries
  2024-11-12 13:07         ` Tom de Vries
  0 siblings, 1 reply; 9+ messages in thread
From: Tom de Vries @ 2024-11-12 10:15 UTC (permalink / raw)
  To: Tom Tromey; +Cc: gdb-patches

On 11/7/24 15:57, Tom Tromey wrote:
>>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:
> 
> Tom> I've committed the other patches from this series, but this one
> Tom> remains in review, so ... ping.
> 
> Ah, now I see the others already went in.
> Totally fine of course, I just should have read the whole thread first.
> 
> I am in favor of this change.  I think it's a good improvement.
> However I wonder -- how long does it take to run the script?

It depends.

Let's take the largest file in gdb* in terms of lines, that's 
gdb/doc/gdb.texinfo with ~51k lines:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/doc/gdb.texinfo

real	0m4.613s
user	0m4.405s
sys	0m0.215s
...

Now, let's take a small file with just 39 lines:
...
$ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c

real	0m0.445s
user	0m0.314s
sys	0m0.137s
...

This is with .git/wikipedia-common-misspellings.txt already downloaded, 
and .git/spell-check.pat1.$md5sum already generated.

> I ask because it seems like it will be run on essentially every commit.

Agreed, speed matters for this.

> Also, I can't recall how pre-commit works -- is the script run on the
> entire tree or just the files being modified?

Just on the files being modified.

[ Running it on the entire tree is slow:

$ time ./gdb/contrib/spellcheck.sh --check gdb*

real	1m9.906s
user	1m7.710s
sys	0m1.899s ]

I think it may be possible to speed up the check to only check the 
modified lines (see the line counter example below).

This works for checks that don't require context, or require a fixed 
amount of lines of context.

Currently, spellcheck.sh works on line-at-a-time basis, so no context used.

The question is, should it be sensitive to context?

The current implementation does not differentiate between comments and 
code, so when running it on directory sim we get:
...
diff --git a/sim/arm/armsupp.c b/sim/arm/armsupp.c
index 1a5eeaff1d6..1b92a3abc3e 100644
--- a/sim/arm/armsupp.c
+++ b/sim/arm/armsupp.c
@@ -390,9 +390,9 @@ ARMul_NthReg (ARMword instr, unsigned number)
  {
    unsigned bit, upto;

-  for (bit = 0, upto = 0; upto <= number; bit ++)
+  for (bit = 0, up to = 0; up to <= number; bit ++)
      if (BIT (bit))
-      upto ++;
+      up to ++;

    return (bit - 1);
  }
...

> Assuming everything is ok though I think this should go in.
> Approved-By: Tom Tromey <tom@tromey.com>

Thanks for the review, and I'm glad you're supporting the idea.

I think the potential 4.5 seconds delay is too long, so I'll hold off on 
committing this.

I'll work on a --pre-commit switch that only checks modified lines, and 
resubmit together with this patch.

Thanks,
- Tom

Patch:
...
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 87726aeb758..0b94dcc4744 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,3 +22,10 @@ repos:
      - id: isort
        types_or: [file]
        files: 'gdb/.*\.py(\.in)?$'
+  - repo: local
+    hooks:
+    - id: line-counter
+      name: line counter
+      language: script
+      entry: ./gdb/contrib/line-counter.sh
+      files: ^(gdb|gdbsupport|gdbserver)/
diff --git a/gdb/contrib/line-counter.sh b/gdb/contrib/line-counter.sh
new file mode 100755
index 00000000000..257ec3793bb
--- /dev/null
+++ b/gdb/contrib/line-counter.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+git diff --staged > DIFF
+
+git diff --staged | egrep -v "^\+\+\+" | egrep "^\+" | wc -l >> DIFF
...

Example output:
...
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 53f41e67444..e08ee7fb675 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -51432,6 +51432,8 @@ Richard M. Stallman and Roland H. Pesch, July 1991.

  @printindex cp

+BLA
+
  @node Command and Variable Index
  @unnumbered Command, Variable, and Function Index

2
...

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PING][PATCH 3/3] [gdb] Add spell check pre-commit hook
  2024-11-12 10:15       ` Tom de Vries
@ 2024-11-12 13:07         ` Tom de Vries
  0 siblings, 0 replies; 9+ messages in thread
From: Tom de Vries @ 2024-11-12 13:07 UTC (permalink / raw)
  To: Tom Tromey; +Cc: gdb-patches

On 11/12/24 11:15, Tom de Vries wrote:
> On 11/7/24 15:57, Tom Tromey wrote:
>>>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:
>>
>> Tom> I've committed the other patches from this series, but this one
>> Tom> remains in review, so ... ping.
>>
>> Ah, now I see the others already went in.
>> Totally fine of course, I just should have read the whole thread first.
>>
>> I am in favor of this change.  I think it's a good improvement.
>> However I wonder -- how long does it take to run the script?
> 
> It depends.
> 
> Let's take the largest file in gdb* in terms of lines, that's gdb/doc/ 
> gdb.texinfo with ~51k lines:
> ...
> $ time ./gdb/contrib/spellcheck.sh --check gdb/doc/gdb.texinfo
> 
> real    0m4.613s
> user    0m4.405s
> sys    0m0.215s
> ...
> 
> Now, let's take a small file with just 39 lines:
> ...
> $ time ./gdb/contrib/spellcheck.sh --check gdb/gdb.c
> 
> real    0m0.445s
> user    0m0.314s
> sys    0m0.137s
> ...
> 
> This is with .git/wikipedia-common-misspellings.txt already downloaded, 
> and .git/spell-check.pat1.$md5sum already generated.
> 
>> I ask because it seems like it will be run on essentially every commit.
> 
> Agreed, speed matters for this.
> 
>> Also, I can't recall how pre-commit works -- is the script run on the
>> entire tree or just the files being modified?
> 
> Just on the files being modified.
> 
> [ Running it on the entire tree is slow:
> 
> $ time ./gdb/contrib/spellcheck.sh --check gdb*
> 
> real    1m9.906s
> user    1m7.710s
> sys    0m1.899s ]
> 
> I think it may be possible to speed up the check to only check the 
> modified lines (see the line counter example below).
> 
> This works for checks that don't require context, or require a fixed 
> amount of lines of context.
> 
> Currently, spellcheck.sh works on line-at-a-time basis, so no context used.
> 
> The question is, should it be sensitive to context?
> 
> The current implementation does not differentiate between comments and 
> code, so when running it on directory sim we get:
> ...
> diff --git a/sim/arm/armsupp.c b/sim/arm/armsupp.c
> index 1a5eeaff1d6..1b92a3abc3e 100644
> --- a/sim/arm/armsupp.c
> +++ b/sim/arm/armsupp.c
> @@ -390,9 +390,9 @@ ARMul_NthReg (ARMword instr, unsigned number)
>   {
>     unsigned bit, upto;
> 
> -  for (bit = 0, upto = 0; upto <= number; bit ++)
> +  for (bit = 0, up to = 0; up to <= number; bit ++)
>       if (BIT (bit))
> -      upto ++;
> +      up to ++;
> 
>     return (bit - 1);
>   }
> ...
> 
>> Assuming everything is ok though I think this should go in.
>> Approved-By: Tom Tromey <tom@tromey.com>
> 
> Thanks for the review, and I'm glad you're supporting the idea.
> 
> I think the potential 4.5 seconds delay is too long, so I'll hold off on 
> committing this.
> 
> I'll work on a --pre-commit switch that only checks modified lines, and 
> resubmit together with this patch.
> 

Submitted here ( 
https://sourceware.org/pipermail/gdb-patches/2024-November/213249.html ).

Thanks,
- Tom

> Patch:
> ...
> diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
> index 87726aeb758..0b94dcc4744 100644
> --- a/.pre-commit-config.yaml
> +++ b/.pre-commit-config.yaml
> @@ -22,3 +22,10 @@ repos:
>       - id: isort
>         types_or: [file]
>         files: 'gdb/.*\.py(\.in)?$'
> +  - repo: local
> +    hooks:
> +    - id: line-counter
> +      name: line counter
> +      language: script
> +      entry: ./gdb/contrib/line-counter.sh
> +      files: ^(gdb|gdbsupport|gdbserver)/
> diff --git a/gdb/contrib/line-counter.sh b/gdb/contrib/line-counter.sh
> new file mode 100755
> index 00000000000..257ec3793bb
> --- /dev/null
> +++ b/gdb/contrib/line-counter.sh
> @@ -0,0 +1,5 @@
> +#!/bin/sh
> +
> +git diff --staged > DIFF
> +
> +git diff --staged | egrep -v "^\+\+\+" | egrep "^\+" | wc -l >> DIFF
> ...
> 
> Example output:
> ...
> diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
> index 53f41e67444..e08ee7fb675 100644
> --- a/gdb/doc/gdb.texinfo
> +++ b/gdb/doc/gdb.texinfo
> @@ -51432,6 +51432,8 @@ Richard M. Stallman and Roland H. Pesch, July 1991.
> 
>   @printindex cp
> 
> +BLA
> +
>   @node Command and Variable Index
>   @unnumbered Command, Variable, and Function Index
> 
> 2
> ...


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2024-11-12 13:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-10-08  7:44 [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom de Vries
2024-10-08  7:44 ` [PATCH 2/3] [gdb/contrib] Speed up " Tom de Vries
2024-11-07 14:55   ` Tom Tromey
2024-10-08  7:44 ` [PATCH 3/3] [gdb] Add spell check pre-commit hook Tom de Vries
2024-10-21 13:08   ` [PING][PATCH " Tom de Vries
2024-11-07 14:57     ` Tom Tromey
2024-11-12 10:15       ` Tom de Vries
2024-11-12 13:07         ` Tom de Vries
2024-11-07 14:52 ` [PATCH 1/3] [gdb/contrib] Add spellcheck.sh --check Tom Tromey

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).