public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Pierre-Marie de Rodat <derodat@adacore.com>
To: gcc-patches@gcc.gnu.org
Cc: Etienne Servais <servais@adacore.com>
Subject: [Ada] Fix Constraint error on rexgexp close bracket find algorithm
Date: Wed, 10 Nov 2021 08:58:30 +0000	[thread overview]
Message-ID: <20211110085830.GA2811045@adacore.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 486 bytes --]

In pattern syntax checking, make a procedure out of the algorithm to
find the close bracket matching an open bracket. Fix cases where the
close bracket is missing in the special cases '-' and '\', e.g.:

- "[a-b"
- "[\b"
- "[\]" misses either a backslash or a close bracket

These three cases would raise constraint errors.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

	* libgnat/s-regexp.adb (Check_Well_Formed_Pattern): Fix
	Constraint_Error on missing close bracket.

[-- Attachment #2: patch.diff --]
[-- Type: text/x-diff, Size: 5681 bytes --]

diff --git a/gcc/ada/libgnat/s-regexp.adb b/gcc/ada/libgnat/s-regexp.adb
--- a/gcc/ada/libgnat/s-regexp.adb
+++ b/gcc/ada/libgnat/s-regexp.adb
@@ -122,7 +122,7 @@ package body System.Regexp is
    is
       S : String := Pattern;
       --  The pattern which is really compiled (when the pattern is case
-      --  insensitive, we convert this string to lower-cases
+      --  insensitive, we convert this string to lower-cases).
 
       Map : Mapping := (others => 0);
       --  Mapping between characters and columns in the tables
@@ -209,8 +209,59 @@ package body System.Regexp is
          --  The last occurrence of an opening parenthesis, if Glob=False,
          --  or the last occurrence of an opening curly brace, if Glob=True.
 
+         procedure Find_Close_Bracket;
+         --  Go through the pattern to find a closing bracket. Raise an
+         --  exception if none is found.
+
          procedure Raise_Exception_If_No_More_Chars (K : Integer := 0);
-         --  If S(J + 1 .. S'Last)'Length < K then call Raise_Exception
+         --  If J + K > S'Last then call Raise_Exception
+
+         ------------------------
+         -- Find_Close_Bracket --
+         ------------------------
+
+         procedure Find_Close_Bracket is
+            Possible_Range_Start : Boolean := True;
+            --  Set True everywhere a range character '-' can occur
+
+         begin
+            loop
+               exit when S (J) = Close_Bracket;
+
+               Raise_Exception_If_No_More_Chars (1);
+               --  The current character is not a close_bracket, thus it should
+               --  be followed by at least one more char. If not, no close
+               --  bracket is present and the pattern is ill-formed.
+
+               if S (J) = '-' and then S (J + 1) /= Close_Bracket then
+                  if not Possible_Range_Start then
+                     Raise_Exception
+                        ("No mix of ranges is allowed in "
+                        & "regular expression", J);
+                  end if;
+
+                  J := J + 1;
+                  Raise_Exception_If_No_More_Chars (1);
+
+                  Possible_Range_Start := False;
+                  --  Range cannot be followed by '-' character,
+                  --  except as last character in the set.
+
+               else
+                  Possible_Range_Start := True;
+               end if;
+
+               if S (J) = '\' then
+                  J := J + 1;
+                  Raise_Exception_If_No_More_Chars (1);
+                  --  We ignore the next character and need to check we have
+                  --  one more available character. This is necessary for
+                  --  the erroneous [\] pattern which stands for [\]] or [\\].
+               end if;
+
+               J := J + 1;
+            end loop;
+         end Find_Close_Bracket;
 
          --------------------------------------
          -- Raise_Exception_If_No_More_Chars --
@@ -240,63 +291,23 @@ package body System.Regexp is
                      end if;
                   end if;
 
-                  --  The first character never has a special meaning
-
+                  --  Characters ']' and '-' are meant as literals when first
+                  --  in the list.  As such, they have no special meaning and
+                  --  we pass them.
                   if S (J) = ']' or else S (J) = '-' then
                      J := J + 1;
                      Raise_Exception_If_No_More_Chars;
                   end if;
 
-                  --  The set of characters cannot be empty
-
                   if S (J) = ']' then
+                     --  ??? This message is misleading since the check forbids
+                     --  the sets []] and [-] but not the empty set [].
                      Raise_Exception
                        ("Set of characters cannot be empty in regular "
                           & "expression", J);
                   end if;
 
-                  declare
-                     Possible_Range_Start : Boolean := True;
-                     --  Set True everywhere a range character '-' can occur
-
-                  begin
-                     loop
-                        exit when S (J) = Close_Bracket;
-
-                        --  The current character should be followed by a
-                        --  closing bracket.
-
-                        Raise_Exception_If_No_More_Chars (1);
-
-                        if S (J) = '-'
-                          and then S (J + 1) /= Close_Bracket
-                        then
-                           if not Possible_Range_Start then
-                              Raise_Exception
-                                ("No mix of ranges is allowed in "
-                                   & "regular expression", J);
-                           end if;
-
-                           J := J + 1;
-                           Raise_Exception_If_No_More_Chars;
-
-                           --  Range cannot be followed by '-' character,
-                           --  except as last character in the set.
-
-                           Possible_Range_Start := False;
-
-                        else
-                           Possible_Range_Start := True;
-                        end if;
-
-                        if S (J) = '\' then
-                           J := J + 1;
-                           Raise_Exception_If_No_More_Chars;
-                        end if;
-
-                        J := J + 1;
-                     end loop;
-                  end;
+                  Find_Close_Bracket;
 
                   --  A closing bracket can end an elmt or term
 



                 reply	other threads:[~2021-11-10  8:58 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211110085830.GA2811045@adacore.com \
    --to=derodat@adacore.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=servais@adacore.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).