public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Tom Honermann <tom@honermann.net>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH 1/3 v2] C: Implement C2X N2653 char8_t and UTF-8 string literal changes
Date: Mon,  1 Aug 2022 14:32:29 -0400	[thread overview]
Message-ID: <20220801183229.1325250-1-tom@honermann.net> (raw)
In-Reply-To: <20220725175948.1424695-2-tom@honermann.net>

This patch implements the core language and compiler dependent library
changes adopted for C2X via WG14 N2653.  The changes include:
- Change of type for UTF-8 string literals from array of const char to
  array of const char8_t (unsigned char).
- A new atomic_char8_t typedef.
- A new ATOMIC_CHAR8_T_LOCK_FREE macro defined in terms of the existing
  __GCC_ATOMIC_CHAR8_T_LOCK_FREE predefined macro.

gcc/ChangeLog:

	* ginclude/stdatomic.h (atomic_char8_t,
	ATOMIC_CHAR8_T_LOCK_FREE): New typedef and macro.

gcc/c/ChangeLog:

	* c-parser.c (c_parser_string_literal): Use char8_t as the type
	of CPP_UTF8STRING when char8_t support is enabled.
	* c-typeck.c (digest_init): Allow initialization of an array
	of character type by a string literal with type array of
	char8_t.

gcc/c-family/ChangeLog:

	* c-lex.c (lex_string, lex_charconst): Use char8_t as the type
	of CPP_UTF8CHAR and CPP_UTF8STRING when char8_t support is
	enabled.
	* c-opts.c (c_common_post_options): Set flag_char8_t if
	targeting C2x.
---
 gcc/c-family/c-lex.cc    | 13 +++++++++----
 gcc/c-family/c-opts.cc   |  4 ++--
 gcc/c/c-parser.cc        | 16 ++++++++++++++--
 gcc/c/c-typeck.cc        |  2 +-
 gcc/ginclude/stdatomic.h |  6 ++++++
 5 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc
index 8bfa4f4024f..0b6f94e18a8 100644
--- a/gcc/c-family/c-lex.cc
+++ b/gcc/c-family/c-lex.cc
@@ -1352,7 +1352,14 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
 	default:
 	case CPP_STRING:
 	case CPP_UTF8STRING:
-	  value = build_string (1, "");
+	  if (type == CPP_UTF8STRING && flag_char8_t)
+	    {
+	      value = build_string (TYPE_PRECISION (char8_type_node)
+				    / TYPE_PRECISION (char_type_node),
+				    "");  /* char8_t is 8 bits */
+	    }
+	  else
+	    value = build_string (1, "");
 	  break;
 	case CPP_STRING16:
 	  value = build_string (TYPE_PRECISION (char16_type_node)
@@ -1425,9 +1432,7 @@ lex_charconst (const cpp_token *token)
     type = char16_type_node;
   else if (token->type == CPP_UTF8CHAR)
     {
-      if (!c_dialect_cxx ())
-	type = unsigned_char_type_node;
-      else if (flag_char8_t)
+      if (flag_char8_t)
         type = char8_type_node;
       else
         type = char_type_node;
diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
index b9f01a65ed7..108adc5caf8 100644
--- a/gcc/c-family/c-opts.cc
+++ b/gcc/c-family/c-opts.cc
@@ -1059,9 +1059,9 @@ c_common_post_options (const char **pfilename)
   if (flag_sized_deallocation == -1)
     flag_sized_deallocation = (cxx_dialect >= cxx14);
 
-  /* char8_t support is new in C++20.  */
+  /* char8_t support is implicitly enabled in C++20 and C2X.  */
   if (flag_char8_t == -1)
-    flag_char8_t = (cxx_dialect >= cxx20);
+    flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x;
 
   if (flag_extern_tls_init)
     {
diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc
index 92049d1a101..fa9395986de 100644
--- a/gcc/c/c-parser.cc
+++ b/gcc/c/c-parser.cc
@@ -7447,7 +7447,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
 	default:
 	case CPP_STRING:
 	case CPP_UTF8STRING:
-	  value = build_string (1, "");
+	  if (type == CPP_UTF8STRING && flag_char8_t)
+	    {
+	      value = build_string (TYPE_PRECISION (char8_type_node)
+				    / TYPE_PRECISION (char_type_node),
+				    "");  /* char8_t is 8 bits */
+	    }
+	  else
+	    value = build_string (1, "");
 	  break;
 	case CPP_STRING16:
 	  value = build_string (TYPE_PRECISION (char16_type_node)
@@ -7472,9 +7479,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok)
     {
     default:
     case CPP_STRING:
-    case CPP_UTF8STRING:
       TREE_TYPE (value) = char_array_type_node;
       break;
+    case CPP_UTF8STRING:
+      if (flag_char8_t)
+	TREE_TYPE (value) = char8_array_type_node;
+      else
+	TREE_TYPE (value) = char_array_type_node;
+      break;
     case CPP_STRING16:
       TREE_TYPE (value) = char16_array_type_node;
       break;
diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index fd0a7f81a7a..231f4e980b6 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -8045,7 +8045,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype,
 
 	  if (char_array)
 	    {
-	      if (typ2 != char_type_node)
+	      if (typ2 != char_type_node && typ2 != char8_type_node)
 		incompat_string_cst = true;
 	    }
 	  else if (!comptypes (typ1, typ2))
diff --git a/gcc/ginclude/stdatomic.h b/gcc/ginclude/stdatomic.h
index bfcfdf664c7..9f2475b739d 100644
--- a/gcc/ginclude/stdatomic.h
+++ b/gcc/ginclude/stdatomic.h
@@ -49,6 +49,9 @@ typedef _Atomic long atomic_long;
 typedef _Atomic unsigned long atomic_ulong;
 typedef _Atomic long long atomic_llong;
 typedef _Atomic unsigned long long atomic_ullong;
+#ifdef __CHAR8_TYPE__
+typedef _Atomic __CHAR8_TYPE__ atomic_char8_t;
+#endif
 typedef _Atomic __CHAR16_TYPE__ atomic_char16_t;
 typedef _Atomic __CHAR32_TYPE__ atomic_char32_t;
 typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t;
@@ -97,6 +100,9 @@ extern void atomic_signal_fence (memory_order);
 
 #define ATOMIC_BOOL_LOCK_FREE		__GCC_ATOMIC_BOOL_LOCK_FREE
 #define ATOMIC_CHAR_LOCK_FREE		__GCC_ATOMIC_CHAR_LOCK_FREE
+#ifdef __GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#define ATOMIC_CHAR8_T_LOCK_FREE	__GCC_ATOMIC_CHAR8_T_LOCK_FREE
+#endif
 #define ATOMIC_CHAR16_T_LOCK_FREE	__GCC_ATOMIC_CHAR16_T_LOCK_FREE
 #define ATOMIC_CHAR32_T_LOCK_FREE	__GCC_ATOMIC_CHAR32_T_LOCK_FREE
 #define ATOMIC_WCHAR_T_LOCK_FREE	__GCC_ATOMIC_WCHAR_T_LOCK_FREE
-- 
2.32.0


  parent reply	other threads:[~2022-08-01 18:32 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-25 17:59 [PATCH 0/3] Implement C2X N2653 (char8_t) and correct UTF-8 character literal type in preprocessor directives for C++ Tom Honermann
2022-07-25 17:59 ` [PATCH 1/3] C: Implement C2X N2653 char8_t and UTF-8 string literal changes Tom Honermann
2022-07-27 23:20   ` Joseph Myers
2022-07-30 23:17     ` Tom Honermann
2022-08-01 18:32   ` Tom Honermann [this message]
2022-07-25 17:59 ` [PATCH 2/3] testsuite: Add tests for " Tom Honermann
2022-07-27 23:23   ` Joseph Myers
2022-07-31 21:47     ` Tom Honermann
2022-08-01 18:34   ` [PATCH 2/3 v2] " Tom Honermann
2022-08-01 19:13     ` Joseph Myers
2022-08-01 22:36       ` Tom Honermann
2022-08-01 22:39   ` [PATCH 2/3 v3] " Tom Honermann
2022-08-02 16:53     ` Joseph Myers
2022-08-02 18:02       ` Tom Honermann
2022-07-25 17:59 ` [PATCH 3/3] c++/106426: Treat u8 character literals as unsigned in char8_t modes Tom Honermann
2022-07-25 18:05   ` Andrew Pinski
2022-07-26  1:32     ` [PATCH 3/3 v2] preprocessor/106426: " Tom Honermann
2022-07-26  1:38     ` [PATCH 3/3] c++/106426: " Tom Honermann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220801183229.1325250-1-tom@honermann.net \
    --to=tom@honermann.net \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).