diff --git a/gcc/ada/errout.ads b/gcc/ada/errout.ads --- a/gcc/ada/errout.ads +++ b/gcc/ada/errout.ads @@ -275,7 +275,7 @@ package Errout is -- contain subprograms to be inlined in the main program. It is also -- used by the Compiler_Unit_Warning pragma for similar reasons. - -- Insertion character ? (Question: warning message) + -- Insertion character ? (Question: warning message -- OBSOLETE) -- The character ? appearing anywhere in a message makes the message -- warning instead of a normal error message, and the text of the -- message will be preceded by "warning:" in the normal case. The @@ -302,7 +302,7 @@ package Errout is -- clear that the continuation is part of a warning message, but it is -- not necessary to go through any computational effort to include it. -- - -- Note: this usage is obsolete, use ?? ?*? ?$? ?x? ?.x? ?_x? to + -- Note: this usage is obsolete; use ?? ?*? ?$? ?x? ?.x? ?_x? to -- specify the string to be added when Warn_Doc_Switch is set to True. -- If this switch is True, then for simple ? messages it has no effect. -- This simple form is to ease transition and may be removed later diff --git a/gcc/ada/scng.adb b/gcc/ada/scng.adb --- a/gcc/ada/scng.adb +++ b/gcc/ada/scng.adb @@ -322,6 +322,49 @@ package body Scng is -- Returns True if the scan pointer is pointing to the start of a wide -- character sequence, does not modify the scan pointer in any case. + procedure Check_Bidi (Code : Char_Code); + -- Give a warning if Code is a bidirectional character, which can cause + -- security vulnerabilities. See the following article: + -- + -- @article{boucher_trojansource_2021, + -- title = {Trojan {Source}: {Invisible} {Vulnerabilities}}, + -- author = {Nicholas Boucher and Ross Anderson}, + -- year = {2021}, + -- journal = {Preprint}, + -- eprint = {2111.00169}, + -- archivePrefix = {arXiv}, + -- primaryClass = {cs.CR}, + -- url = {https://arxiv.org/abs/2111.00169} + -- } + + ---------------- + -- Check_Bidi -- + ---------------- + + type Bidi_Characters is + (LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI); + Bidi_Character_Codes : constant array (Bidi_Characters) of Char_Code := + (LRE => 16#202A#, + RLE => 16#202B#, + LRO => 16#202D#, + RLO => 16#202E#, + LRI => 16#2066#, + RLI => 16#2067#, + FSI => 16#2068#, + PDF => 16#202C#, + PDI => 16#2069#); + -- Above are the bidirectional characters, along with their Unicode code + -- points. + + procedure Check_Bidi (Code : Char_Code) is + begin + for Bidi_Code of Bidi_Character_Codes loop + if Code = Bidi_Code then + Error_Msg ("??bidirectional wide character", Wptr); + end if; + end loop; + end Check_Bidi; + ----------------------- -- Double_Char_Token -- ----------------------- @@ -1070,6 +1113,8 @@ package body Scng is if Err then Error_Illegal_Wide_Character; Code := Get_Char_Code (' '); + else + Check_Bidi (Code); end if; Accumulate_Checksum (Code); @@ -1611,11 +1656,11 @@ package body Scng is elsif Start_Of_Wide_Character then declare - Wptr : constant Source_Ptr := Scan_Ptr; Code : Char_Code; Err : Boolean; begin + Wptr := Scan_Ptr; Scan_Wide (Source, Scan_Ptr, Code, Err); -- If not well formed wide character, then just skip @@ -1629,6 +1674,8 @@ package body Scng is elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then Scan_Ptr := Wptr; exit; + else + Check_Bidi (Code); end if; end; @@ -1736,7 +1783,6 @@ package body Scng is if Start_Of_Wide_Character then Wptr := Scan_Ptr; Scan_Wide (Source, Scan_Ptr, Code, Err); - Accumulate_Checksum (Code); if Err then Error_Illegal_Wide_Character; @@ -1752,8 +1798,12 @@ package body Scng is Error_Msg -- CODEFIX ("(Ada 2005) non-graphic character not permitted " & "in character literal", Wptr); + else + Check_Bidi (Code); end if; + Accumulate_Checksum (Code); + if Source (Scan_Ptr) /= ''' then Error_Msg_S ("missing apostrophe"); else