public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5100] [Ada] Warn for bidirectional characters
@ 2021-11-10  8:59 Pierre-Marie de Rodat
  0 siblings, 0 replies; only message in thread
From: Pierre-Marie de Rodat @ 2021-11-10  8:59 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0e988162f6e9cbb9bea998b4f1febda199769692

commit r12-5100-g0e988162f6e9cbb9bea998b4f1febda199769692
Author: Bob Duff <duff@adacore.com>
Date:   Fri Nov 5 07:54:33 2021 -0400

    [Ada] Warn for bidirectional characters
    
    gcc/ada/
    
            * scng.adb (Check_Bidi): New procedure to give warning. Note
            that this is called only for non-ASCII characters, so should not
            be an efficiency issue.
            (Slit): Call Check_Bidi for wide characters in string_literals.
            (Minus_Case): Call Check_Bidi for wide characters in comments.
            (Char_Literal_Case): Call Check_Bidi for wide characters in
            character_literals.  Move Accumulate_Checksum down, because
            otherwise, if Err is True, the Code is uninitialized.
            * errout.ads: Make the obsolete nature of "Insertion character
            ?" more prominent; one should not have to read several
            paragraphs before finding out that it's obsolete.

Diff:
---
 gcc/ada/errout.ads |  4 ++--
 gcc/ada/scng.adb   | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads
index 60b1b4fb62d..d6b89105b06 100644
--- a/gcc/ada/errout.ads
+++ b/gcc/ada/errout.ads
@@ -275,7 +275,7 @@ package Errout is
    --      contain subprograms to be inlined in the main program. It is also
    --      used by the Compiler_Unit_Warning pragma for similar reasons.
 
-   --    Insertion character ? (Question: warning message)
+   --    Insertion character ? (Question: warning message -- OBSOLETE)
    --      The character ? appearing anywhere in a message makes the message
    --      warning instead of a normal error message, and the text of the
    --      message will be preceded by "warning:" in the normal case. The
@@ -302,7 +302,7 @@ package Errout is
    --      clear that the continuation is part of a warning message, but it is
    --      not necessary to go through any computational effort to include it.
    --
-   --      Note: this usage is obsolete, use ?? ?*? ?$? ?x? ?.x? ?_x? to
+   --      Note: this usage is obsolete; use ?? ?*? ?$? ?x? ?.x? ?_x? to
    --      specify the string to be added when Warn_Doc_Switch is set to True.
    --      If this switch is True, then for simple ? messages it has no effect.
    --      This simple form is to ease transition and may be removed later
diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb
index 3c62337a7c2..fd184b591d2 100644
--- a/gcc/ada/scng.adb
+++ b/gcc/ada/scng.adb
@@ -322,6 +322,49 @@ package body Scng is
       --  Returns True if the scan pointer is pointing to the start of a wide
       --  character sequence, does not modify the scan pointer in any case.
 
+      procedure Check_Bidi (Code : Char_Code);
+      --  Give a warning if Code is a bidirectional character, which can cause
+      --  security vulnerabilities. See the following article:
+      --
+      --  @article{boucher_trojansource_2021,
+      --      title = {Trojan {Source}: {Invisible} {Vulnerabilities}},
+      --      author = {Nicholas Boucher and Ross Anderson},
+      --      year = {2021},
+      --      journal = {Preprint},
+      --      eprint = {2111.00169},
+      --      archivePrefix = {arXiv},
+      --      primaryClass = {cs.CR},
+      --      url = {https://arxiv.org/abs/2111.00169}
+      --  }
+
+      ----------------
+      -- Check_Bidi --
+      ----------------
+
+      type Bidi_Characters is
+        (LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI);
+      Bidi_Character_Codes : constant array (Bidi_Characters) of Char_Code :=
+        (LRE => 16#202A#,
+         RLE => 16#202B#,
+         LRO => 16#202D#,
+         RLO => 16#202E#,
+         LRI => 16#2066#,
+         RLI => 16#2067#,
+         FSI => 16#2068#,
+         PDF => 16#202C#,
+         PDI => 16#2069#);
+      --  Above are the bidirectional characters, along with their Unicode code
+      --  points.
+
+      procedure Check_Bidi (Code : Char_Code) is
+      begin
+         for Bidi_Code of Bidi_Character_Codes loop
+            if Code = Bidi_Code then
+               Error_Msg ("??bidirectional wide character", Wptr);
+            end if;
+         end loop;
+      end Check_Bidi;
+
       -----------------------
       -- Double_Char_Token --
       -----------------------
@@ -1070,6 +1113,8 @@ package body Scng is
                   if Err then
                      Error_Illegal_Wide_Character;
                      Code := Get_Char_Code (' ');
+                  else
+                     Check_Bidi (Code);
                   end if;
 
                   Accumulate_Checksum (Code);
@@ -1611,11 +1656,11 @@ package body Scng is
 
                   elsif Start_Of_Wide_Character then
                      declare
-                        Wptr : constant Source_Ptr := Scan_Ptr;
                         Code : Char_Code;
                         Err  : Boolean;
 
                      begin
+                        Wptr := Scan_Ptr;
                         Scan_Wide (Source, Scan_Ptr, Code, Err);
 
                         --  If not well formed wide character, then just skip
@@ -1629,6 +1674,8 @@ package body Scng is
                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
                            Scan_Ptr := Wptr;
                            exit;
+                        else
+                           Check_Bidi (Code);
                         end if;
                      end;
 
@@ -1736,7 +1783,6 @@ package body Scng is
                if Start_Of_Wide_Character then
                   Wptr := Scan_Ptr;
                   Scan_Wide (Source, Scan_Ptr, Code, Err);
-                  Accumulate_Checksum (Code);
 
                   if Err then
                      Error_Illegal_Wide_Character;
@@ -1752,8 +1798,12 @@ package body Scng is
                      Error_Msg -- CODEFIX
                        ("(Ada 2005) non-graphic character not permitted " &
                         "in character literal", Wptr);
+                  else
+                     Check_Bidi (Code);
                   end if;
 
+                  Accumulate_Checksum (Code);
+
                   if Source (Scan_Ptr) /= ''' then
                         Error_Msg_S ("missing apostrophe");
                   else


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-10  8:59 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-10  8:59 [gcc r12-5100] [Ada] Warn for bidirectional characters Pierre-Marie de Rodat

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).