public inbox for gcc-rust@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix raw byte string parsing of zero and out of range bytes
@ 2021-09-29 20:34 Mark Wielaard
  2021-09-30  9:32 ` Philip Herron
  0 siblings, 1 reply; 2+ messages in thread
From: Mark Wielaard @ 2021-09-29 20:34 UTC (permalink / raw)
  To: gcc-rust; +Cc: Mark Wielaard

Allow \0 escape in raw byte string and reject non-ascii byte
values. Change parse_partial_hex_escapes to not skip bad characters to
provide better error messages.

Add rawbytestring.rs testcase to check string, raw string, byte string
and raw byte string parsing.
---

https://code.wildebeest.org/git/user/mjw/gccrs/commit/?h=parse-raw-byte-string

 gcc/rust/lex/rust-lex.cc                    |  20 +++++++++++++++-----
 gcc/testsuite/rust/compile/rawbytestring.rs | Bin 0 -> 3234 bytes
 2 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/rust/compile/rawbytestring.rs

diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index b70877be9ff..bbddea04d0c 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -1423,8 +1423,7 @@ Lexer::parse_partial_hex_escape ()
   char hexNum[3] = {0, 0, 0};
 
   // first hex char
-  skip_input ();
-  current_char = peek_input ();
+  current_char = peek_input (1);
   int additional_length_offset = 1;
 
   if (!is_x_digit (current_char))
@@ -1432,20 +1431,23 @@ Lexer::parse_partial_hex_escape ()
       rust_error_at (get_current_location (),
 		     "invalid character %<\\x%c%> in \\x sequence",
 		     current_char);
+      return std::make_pair (0, 0);
     }
   hexNum[0] = current_char;
 
   // second hex char
   skip_input ();
-  current_char = peek_input ();
+  current_char = peek_input (1);
   additional_length_offset++;
 
   if (!is_x_digit (current_char))
     {
       rust_error_at (get_current_location (),
-		     "invalid character %<\\x%c%> in \\x sequence",
+		     "invalid character %<\\x%c%c%> in \\x sequence", hexNum[0],
 		     current_char);
+      return std::make_pair (0, 1);
     }
+  skip_input ();
   hexNum[1] = current_char;
 
   long hexLong = std::strtol (hexNum, nullptr, 16);
@@ -1627,7 +1629,7 @@ Lexer::parse_byte_string (Location loc)
 	  else
 	    length += std::get<1> (escape_length_pair);
 
-	  if (output_char != 0)
+	  if (output_char != 0 || !std::get<2> (escape_length_pair))
 	    str += output_char;
 
 	  continue;
@@ -1722,6 +1724,14 @@ Lexer::parse_raw_byte_string (Location loc)
 	    }
 	}
 
+      if ((unsigned char) current_char > 127)
+	{
+	  rust_error_at (get_current_location (),
+			 "character %<%c%> in raw byte string out of range",
+			 current_char);
+	  current_char = 0;
+	}
+
       length++;
 
       str += current_char;
diff --git a/gcc/testsuite/rust/compile/rawbytestring.rs b/gcc/testsuite/rust/compile/rawbytestring.rs
new file mode 100644
index 0000000000000000000000000000000000000000..9c6b762a7fd378206a3bfe21db5b708890f5466f
GIT binary patch
literal 3234
zcmbVOO>fgc5amjL#mG4T6)1rVl`5`1a^M^Zc^f;m2zI;c($Wfvf5=~A-pqd4PU65Z
z<9Tmp-n`vS-O~56Y3cQwv*$CS<&wUX59E5=v|Go4UDeZ9>ni$0wkR&M$OnWLi=tR8
zvhYe0>#n0kp8Z~u3yqU0ZIOclm4066_dMaIdKBLE<JDDhNy~HEHGO5v9U=0T+ODUv
zC8SmEy1cFEe3@FkZM6EI->90VG(Y=lGOCeT&0tuLp+z$p*Er0}$>V{I!^8|YFtTxx
z@X*l4>D0{rUt=35bE5|t9J_s{&GuboZD*<I?tAKLvSqui3i{=Bxx4RJ6csV<pY3qx
zc%EYYDlHYkjRXh2@TuH1=aM+;Gqyvv;&mx;T8-!69@lIn-t3a5*&*G8KFpvI38NDZ
zXRR0;(@$zf^Mz-&9d5I9*G<Ew+mP5OSua;jH^}=FDaIRU+8^bv*+6`M(70oU!0U_=
z&}eCgAm&LiE1Ztuo)19+f+2(Qu2!m#_4vb;|G-CZh`7Kh;Epf$lpotHpVZa9RxPJ`
z*!LJ1N@A_5D-K2!c50h=c|}nH2&&HIi=qJdndb5#r=*{jFDfG+GVizjpnnJPCErUm
z(~py#01%ck2asI=5SB3ogcab#=?eJBr4{72%hYuq1akuw_HYtNmI2frgB`4tK#Ur;
zF6x6XH@P+_Ld&Pj=Khmtif_<#%m^#v8|1@fDley8DqbpRJ8##35S;)CLQU5(s-g1&
zGH1b11D=)VWpyG#bwi0++xi+Rry%Bx8xX28AhXsD5b>@|GH+hjmxkvqUg|FRiNY&&
zP6(%e4anlh3W@7JWKOd9E)p`E*!Jfr70=|kILq??taYC%vd4tW9O052<zlNPu3_&s
zQXTCpCkvg$z92;~u`^Cz|8;Ub$4U{W)axrh#`o>FwtHy(W1l^5)-!Q6rkeY2pcOb5
zC5~Q^#`Cf!S&N9GM~?nelacMDHe;2ejYcV-D%(M~7r|5FJ&7hOIQ$Oo!_o8>9i>^x
zV>X-Uc!7Jfq5+jI?0J=nn!sj`v1wMcU}PH?O>D8bJ*^GcSU|ak{Lxs!g8aAiFN}Pz
A0RR91

literal 0
HcmV?d00001

-- 
2.32.0


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix raw byte string parsing of zero and out of range bytes
  2021-09-29 20:34 [PATCH] Fix raw byte string parsing of zero and out of range bytes Mark Wielaard
@ 2021-09-30  9:32 ` Philip Herron
  0 siblings, 0 replies; 2+ messages in thread
From: Philip Herron @ 2021-09-30  9:32 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: gcc-rust

[-- Attachment #1: Type: text/plain, Size: 4486 bytes --]

Hi Mark,

This looks good and it is currently being merged:
https://github.com/Rust-GCC/gccrs/pull/695, I will catch up with your other
patches through the day.

Thanks

--Phil

On Wed, 29 Sept 2021 at 21:35, Mark Wielaard <mark@klomp.org> wrote:

> Allow \0 escape in raw byte string and reject non-ascii byte
> values. Change parse_partial_hex_escapes to not skip bad characters to
> provide better error messages.
>
> Add rawbytestring.rs testcase to check string, raw string, byte string
> and raw byte string parsing.
> ---
>
>
> https://code.wildebeest.org/git/user/mjw/gccrs/commit/?h=parse-raw-byte-string
>
>  gcc/rust/lex/rust-lex.cc                    |  20 +++++++++++++++-----
>  gcc/testsuite/rust/compile/rawbytestring.rs | Bin 0 -> 3234 bytes
>  2 files changed, 15 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/testsuite/rust/compile/rawbytestring.rs
>
> diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
> index b70877be9ff..bbddea04d0c 100644
> --- a/gcc/rust/lex/rust-lex.cc
> +++ b/gcc/rust/lex/rust-lex.cc
> @@ -1423,8 +1423,7 @@ Lexer::parse_partial_hex_escape ()
>    char hexNum[3] = {0, 0, 0};
>
>    // first hex char
> -  skip_input ();
> -  current_char = peek_input ();
> +  current_char = peek_input (1);
>    int additional_length_offset = 1;
>
>    if (!is_x_digit (current_char))
> @@ -1432,20 +1431,23 @@ Lexer::parse_partial_hex_escape ()
>        rust_error_at (get_current_location (),
>                      "invalid character %<\\x%c%> in \\x sequence",
>                      current_char);
> +      return std::make_pair (0, 0);
>      }
>    hexNum[0] = current_char;
>
>    // second hex char
>    skip_input ();
> -  current_char = peek_input ();
> +  current_char = peek_input (1);
>    additional_length_offset++;
>
>    if (!is_x_digit (current_char))
>      {
>        rust_error_at (get_current_location (),
> -                    "invalid character %<\\x%c%> in \\x sequence",
> +                    "invalid character %<\\x%c%c%> in \\x sequence",
> hexNum[0],
>                      current_char);
> +      return std::make_pair (0, 1);
>      }
> +  skip_input ();
>    hexNum[1] = current_char;
>
>    long hexLong = std::strtol (hexNum, nullptr, 16);
> @@ -1627,7 +1629,7 @@ Lexer::parse_byte_string (Location loc)
>           else
>             length += std::get<1> (escape_length_pair);
>
> -         if (output_char != 0)
> +         if (output_char != 0 || !std::get<2> (escape_length_pair))
>             str += output_char;
>
>           continue;
> @@ -1722,6 +1724,14 @@ Lexer::parse_raw_byte_string (Location loc)
>             }
>         }
>
> +      if ((unsigned char) current_char > 127)
> +       {
> +         rust_error_at (get_current_location (),
> +                        "character %<%c%> in raw byte string out of
> range",
> +                        current_char);
> +         current_char = 0;
> +       }
> +
>        length++;
>
>        str += current_char;
> diff --git a/gcc/testsuite/rust/compile/rawbytestring.rs
> b/gcc/testsuite/rust/compile/rawbytestring.rs
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..9c6b762a7fd378206a3bfe21db5b708890f5466f
> GIT binary patch
> literal 3234
> zcmbVOO>fgc5amjL#mG4T6)1rVl`5`1a^M^Zc^f;m2zI;c($Wfvf5=~A-pqd4PU65Z
> z<9Tmp-n`vS-O~56Y3cQwv*$CS<&wUX59E5=v|Go4UDeZ9>ni$0wkR&M$OnWLi=tR8
> zvhYe0>#n0kp8Z~u3yqU0ZIOclm4066_dMaIdKBLE<JDDhNy~HEHGO5v9U=0T+ODUv
> zC8SmEy1cFEe3@FkZM6EI->90VG(Y=lGOCeT&0tuLp+z$p*Er0}$>V{I!^8|YFtTxx
> z@X*l4>D0{rUt=35bE5|t9J_s{&GuboZD*<I?tAKLvSqui3i{=Bxx4RJ6csV<pY3qx
> zc%EYYDlHYkjRXh2@TuH1=aM+;Gqyvv;&mx;T8-!69@lIn-t3a5*&*G8KFpvI38NDZ
> zXRR0;(@$zf^Mz-&9d5I9*G<Ew+mP5OSua;jH^}=FDaIRU+8^bv*+6`M(70oU!0U_=
> z&}eCgAm&LiE1Ztuo)19+f+2(Qu2!m#_4vb;|G-CZh`7Kh;Epf$lpotHpVZa9RxPJ`
> z*!LJ1N@A_5D-K2!c50h=c|}nH2&&HIi=qJdndb5#r=*{jFDfG+GVizjpnnJPCErUm
> z(~py#01%ck2asI=5SB3ogcab#=?eJBr4{72%hYuq1akuw_HYtNmI2frgB`4tK#Ur;
> zF6x6XH@P+_Ld&Pj=Khmtif_<#%m^#v8|1@fDley8DqbpRJ8##35S;)CLQU5(s-g1&
> zGH1b11D=)VWpyG#bwi0++xi+Rry%Bx8xX28AhXsD5b>@|GH+hjmxkvqUg|FRiNY&&
> zP6(%e4anlh3W@7JWKOd9E)p`E*!Jfr70=|kILq??taYC%vd4tW9O052<zlNPu3_&s
> zQXTCpCkvg$z92;~u`^Cz|8;Ub$4U{W)axrh#`o>FwtHy(W1l^5)-!Q6rkeY2pcOb5
> zC5~Q^#`Cf!S&N9GM~?nelacMDHe;2ejYcV-D%(M~7r|5FJ&7hOIQ$Oo!_o8>9i>^x
> zV>X-Uc!7Jfq5+jI?0J=nn!sj`v1wMcU}PH?O>D8bJ*^GcSU|ak{Lxs!g8aAiFN}Pz
> A0RR91
>
> literal 0
> HcmV?d00001
>
> --
> 2.32.0
>
> --
> Gcc-rust mailing list
> Gcc-rust@gcc.gnu.org
> https://gcc.gnu.org/mailman/listinfo/gcc-rust
>

[-- Attachment #2: Type: text/html, Size: 6310 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-09-30  9:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-29 20:34 [PATCH] Fix raw byte string parsing of zero and out of range bytes Mark Wielaard
2021-09-30  9:32 ` Philip Herron

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).