* [PATCH] libcpp: Small incremental patch for P1854R4 [PR110341]
@ 2023-08-26 11:11 Jakub Jelinek
2023-08-28 14:00 ` [PATCH] libcpp, v2: " Jakub Jelinek
0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2023-08-26 11:11 UTC (permalink / raw)
To: Jason Merrill; +Cc: gcc-patches
Hi!
The following incremental patch to the PR110341 posted patch uses
a special conversion callback instead of conversion from host charset
(UTF-8/UTF-EBCDIC) to UTF-32, and also ignores all diagnostics from the
second cpp_interpret_string which should just count chars. The UTF-EBCDIC
is untested, but simple enough that it should just work.
2023-08-26 Jakub Jelinek <jakub@redhat.com>
PR c++/110341
* charset.cc (one_count_chars, convert_count_chars): New functions.
(narrow_str_to_charconst): Call cpp_interpret_string with type
rather than CPP_STRING32, temporarily override for that call
pfile->cb.diagnostic to noop_diagnostic_cb and
pfile->narrow_cset_desc.func to convert_count_chars and just compare
str.len against str2.len.
--- libcpp/charset.cc.jj 2023-08-25 17:14:14.098733396 +0200
+++ libcpp/charset.cc 2023-08-26 12:57:44.858858994 +0200
@@ -446,6 +446,74 @@ one_utf16_to_utf8 (iconv_t bigend, const
return 0;
}
+
+/* Special routine which just counts number of characters in the
+ string, what exactly is stored into the output doesn't matter
+ as long as it is one uchar per character. */
+
+static inline int
+one_count_chars (iconv_t, const uchar **inbufp, size_t *inbytesleftp,
+ uchar **outbufp, size_t *outbytesleftp)
+{
+ uchar *outbuf;
+ cppchar_t s = 0;
+ int rval;
+
+ /* Check for space first, since we know exactly how much we need. */
+ if (*outbytesleftp < 1)
+ return E2BIG;
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+ rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
+ if (rval)
+ return rval;
+#else
+ if (*inbytesleftp < 1)
+ return EINVAL;
+ static const uchar utf_ebcdic_map[256] = {
+ /* See table 4 in http://unicode.org/reports/tr16/tr16-7.2.html */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+ 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
+ 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+ 9, 9, 9, 9, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4,
+ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 0
+ };
+ rval = utf_ebcdic_map[**inbufp];
+ if (rval == 9)
+ return EILSEQ;
+ if (rval == 0)
+ rval = 1;
+ if (rval >= 2)
+ {
+ if (*inbytesleftp < rval)
+ return EINVAL;
+ for (int i = 1; i < rval; ++i)
+ if (utf_ebcdic_map[(*inbufp)[i]] != 9)
+ return EILSEQ;
+ }
+ *inbytesleftp -= rval;
+ *inbufp += rval;
+#endif
+
+ **outbufp = ' ';
+
+ *outbufp += 1;
+ *outbytesleftp -= 1;
+ return 0;
+}
+
+
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@@ -529,6 +597,15 @@ convert_utf32_utf8 (iconv_t cd, const uc
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
+/* Magic conversion which just counts characters from input, so
+ only to->len is significant. */
+static bool
+convert_count_chars (iconv_t cd, const uchar *from,
+ size_t flen, struct _cpp_strbuf *to)
+{
+ return conversion_loop (one_count_chars, cd, from, flen, to);
+}
+
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -2613,15 +2690,22 @@ narrow_str_to_charconst (cpp_reader *pfi
ill-formed. We need to count the number of c-chars and compare
that to str.len. */
cpp_string str2 = { 0, 0 };
- if (cpp_interpret_string (pfile, &token->val.str, 1, &str2,
- CPP_STRING32))
+ bool (*saved_diagnostic_handler) (cpp_reader *, enum cpp_diagnostic_level,
+ enum cpp_warning_reason, rich_location *,
+ const char *, va_list *)
+ ATTRIBUTE_FPTR_PRINTF(5,0);
+ saved_diagnostic_handler = pfile->cb.diagnostic;
+ pfile->cb.diagnostic = noop_diagnostic_cb;
+ convert_f save_func = pfile->narrow_cset_desc.func;
+ pfile->narrow_cset_desc.func = convert_count_chars;
+ bool ret = cpp_interpret_string (pfile, &token->val.str, 1, &str2, type);
+ pfile->narrow_cset_desc.func = save_func;
+ pfile->cb.diagnostic = saved_diagnostic_handler;
+ if (ret)
{
- size_t width32 = converter_for_type (pfile, CPP_STRING32).width;
- size_t nbwc = width32 / width;
- size_t len = str2.len / nbwc;
if (str2.text != token->val.str.text)
free ((void *)str2.text);
- if (str.len > len)
+ if (str.len > str2.len)
{
diagnosed
= cpp_error (pfile, CPP_DL_PEDWARN,
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH] libcpp, v2: Small incremental patch for P1854R4 [PR110341]
2023-08-26 11:11 [PATCH] libcpp: Small incremental patch for P1854R4 [PR110341] Jakub Jelinek
@ 2023-08-28 14:00 ` Jakub Jelinek
0 siblings, 0 replies; 2+ messages in thread
From: Jakub Jelinek @ 2023-08-28 14:00 UTC (permalink / raw)
To: Jason Merrill; +Cc: gcc-patches
Hi!
Sorry, testing revealed an unused uchar *outbuf; declaration breaking the
build, here is the same patch with that one line removed,
bootstrapped/regtested on x86_64-linux and i686-linux (on top of the earlier
POR110341 patch).
On Sat, Aug 26, 2023 at 01:11:06PM +0200, Jakub Jelinek via Gcc-patches wrote:
> The following incremental patch to the PR110341 posted patch uses
> a special conversion callback instead of conversion from host charset
> (UTF-8/UTF-EBCDIC) to UTF-32, and also ignores all diagnostics from the
> second cpp_interpret_string which should just count chars. The UTF-EBCDIC
> is untested, but simple enough that it should just work.
2023-08-28 Jakub Jelinek <jakub@redhat.com>
PR c++/110341
* charset.cc (one_count_chars, convert_count_chars): New functions.
(narrow_str_to_charconst): Call cpp_interpret_string with type
rather than CPP_STRING32, temporarily override for that call
pfile->cb.diagnostic to noop_diagnostic_cb and
pfile->narrow_cset_desc.func to convert_count_chars and just compare
str.len against str2.len.
--- libcpp/charset.cc.jj 2023-08-25 17:14:14.098733396 +0200
+++ libcpp/charset.cc 2023-08-28 12:57:44.858858994 +0200
@@ -446,6 +446,73 @@ one_utf16_to_utf8 (iconv_t bigend, const
return 0;
}
+
+/* Special routine which just counts number of characters in the
+ string, what exactly is stored into the output doesn't matter
+ as long as it is one uchar per character. */
+
+static inline int
+one_count_chars (iconv_t, const uchar **inbufp, size_t *inbytesleftp,
+ uchar **outbufp, size_t *outbytesleftp)
+{
+ cppchar_t s = 0;
+ int rval;
+
+ /* Check for space first, since we know exactly how much we need. */
+ if (*outbytesleftp < 1)
+ return E2BIG;
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+ rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
+ if (rval)
+ return rval;
+#else
+ if (*inbytesleftp < 1)
+ return EINVAL;
+ static const uchar utf_ebcdic_map[256] = {
+ /* See table 4 in http://unicode.org/reports/tr16/tr16-7.2.html */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+ 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
+ 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+ 9, 9, 9, 9, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+ 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4,
+ 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 0
+ };
+ rval = utf_ebcdic_map[**inbufp];
+ if (rval == 9)
+ return EILSEQ;
+ if (rval == 0)
+ rval = 1;
+ if (rval >= 2)
+ {
+ if (*inbytesleftp < rval)
+ return EINVAL;
+ for (int i = 1; i < rval; ++i)
+ if (utf_ebcdic_map[(*inbufp)[i]] != 9)
+ return EILSEQ;
+ }
+ *inbytesleftp -= rval;
+ *inbufp += rval;
+#endif
+
+ **outbufp = ' ';
+
+ *outbufp += 1;
+ *outbytesleftp -= 1;
+ return 0;
+}
+
+
/* Helper routine for the next few functions. The 'const' on
one_conversion means that we promise not to modify what function is
pointed to, which lets the inliner see through it. */
@@ -529,6 +596,15 @@ convert_utf32_utf8 (iconv_t cd, const uc
return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
}
+/* Magic conversion which just counts characters from input, so
+ only to->len is significant. */
+static bool
+convert_count_chars (iconv_t cd, const uchar *from,
+ size_t flen, struct _cpp_strbuf *to)
+{
+ return conversion_loop (one_count_chars, cd, from, flen, to);
+}
+
/* Identity conversion, used when we have no alternative. */
static bool
convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -2623,15 +2699,22 @@ narrow_str_to_charconst (cpp_reader *pfi
ill-formed. We need to count the number of c-chars and compare
that to str.len. */
cpp_string str2 = { 0, 0 };
- if (cpp_interpret_string (pfile, &token->val.str, 1, &str2,
- CPP_STRING32))
+ bool (*saved_diagnostic_handler) (cpp_reader *, enum cpp_diagnostic_level,
+ enum cpp_warning_reason, rich_location *,
+ const char *, va_list *)
+ ATTRIBUTE_FPTR_PRINTF(5,0);
+ saved_diagnostic_handler = pfile->cb.diagnostic;
+ pfile->cb.diagnostic = noop_diagnostic_cb;
+ convert_f save_func = pfile->narrow_cset_desc.func;
+ pfile->narrow_cset_desc.func = convert_count_chars;
+ bool ret = cpp_interpret_string (pfile, &token->val.str, 1, &str2, type);
+ pfile->narrow_cset_desc.func = save_func;
+ pfile->cb.diagnostic = saved_diagnostic_handler;
+ if (ret)
{
- size_t width32 = converter_for_type (pfile, CPP_STRING32).width;
- size_t nbwc = width32 / width;
- size_t len = str2.len / nbwc;
if (str2.text != token->val.str.text)
free ((void *)str2.text);
- if (str.len > len)
+ if (str.len > str2.len)
{
diagnosed
= cpp_error (pfile, CPP_DL_PEDWARN,
Jakub
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-08-28 14:00 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-08-26 11:11 [PATCH] libcpp: Small incremental patch for P1854R4 [PR110341] Jakub Jelinek
2023-08-28 14:00 ` [PATCH] libcpp, v2: " Jakub Jelinek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).