public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744]
@ 2022-07-06 14:20 Adhemerval Zanella
0 siblings, 0 replies; only message in thread
From: Adhemerval Zanella @ 2022-07-06 14:20 UTC (permalink / raw)
To: glibc-cvs
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=598f790fb17bcfff7fedde5209933a82d7748328
commit 598f790fb17bcfff7fedde5209933a82d7748328
Author: Tom Honermann <tom@honermann.net>
Date: Thu Jun 30 08:52:13 2022 -0400
gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744]
This patch corrects the Big5-HKSCS converter to preserve the lowest 3 bits of
the mbstate_t __count data member when the converter encounters an incomplete
multibyte character.
This fixes BZ #25744.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Diff:
---
iconvdata/big5hkscs.c | 16 ++++----
iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 65 +++++++++++++++++++++++++++++++
2 files changed, 73 insertions(+), 8 deletions(-)
diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c
index a28b18a5ec..d12389b2e3 100644
--- a/iconvdata/big5hkscs.c
+++ b/iconvdata/big5hkscs.c
@@ -17769,7 +17769,7 @@ static struct
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
- if (data->__statep->__count != 0) \
+ if ((data->__statep->__count >> 3) != 0) \
{ \
if (FROM_DIRECTION) \
{ \
@@ -17778,7 +17778,7 @@ static struct
/* Write out the last character. */ \
*((uint32_t *) outbuf) = data->__statep->__count >> 3; \
outbuf += sizeof (uint32_t); \
- data->__statep->__count = 0; \
+ data->__statep->__count &= 7; \
} \
else \
/* We don't have enough room in the output buffer. */ \
@@ -17792,7 +17792,7 @@ static struct
uint32_t lasttwo = data->__statep->__count >> 3; \
*outbuf++ = (lasttwo >> 8) & 0xff; \
*outbuf++ = lasttwo & 0xff; \
- data->__statep->__count = 0; \
+ data->__statep->__count &= 7; \
} \
else \
/* We don't have enough room in the output buffer. */ \
@@ -17878,7 +17878,7 @@ static struct
\
/* Otherwise store only the first character now, and \
put the second one into the queue. */ \
- *statep = ch2 << 3; \
+ *statep = (ch2 << 3) | (*statep & 7); \
/* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
@@ -17895,7 +17895,7 @@ static struct
} \
else \
/* Clear the queue and proceed to output the saved character. */ \
- *statep = 0; \
+ *statep &= 7; \
\
put32 (outptr, ch); \
outptr += 4; \
@@ -17946,7 +17946,7 @@ static struct
} \
*outptr++ = (ch >> 8) & 0xff; \
*outptr++ = ch & 0xff; \
- *statep = 0; \
+ *statep &= 7; \
inptr += 4; \
continue; \
\
@@ -17959,7 +17959,7 @@ static struct
} \
*outptr++ = (lasttwo >> 8) & 0xff; \
*outptr++ = lasttwo & 0xff; \
- *statep = 0; \
+ *statep &= 7; \
continue; \
} \
\
@@ -17996,7 +17996,7 @@ static struct
/* Check for possible combining character. */ \
if (__glibc_unlikely (ch == 0xca || ch == 0xea)) \
{ \
- *statep = ((cp[0] << 8) | cp[1]) << 3; \
+ *statep = (((cp[0] << 8) | cp[1]) << 3) | (*statep & 7); \
inptr += 4; \
continue; \
} \
diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
index 9601b6c1d9..e1472dc2e2 100644
--- a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
+++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
@@ -128,6 +128,71 @@ check_conversion (struct testdata test)
printf ("error: Result of third conversion was wrong.\n");
err++;
}
+
+ /* Now perform the same test as above consuming one byte at a time. */
+ mbs = test.input;
+ memset (&st, 0, sizeof (st));
+
+ /* Consume the first byte; expect an incomplete multibyte character. */
+ ret = mbrtowc (&wc, mbs, 1, &st);
+ if (ret != -2)
+ {
+ printf ("error: First byte conversion returned %zd.\n", ret);
+ err++;
+ }
+ /* Advance past the first consumed byte. */
+ mbs += 1;
+ /* Consume the second byte; expect the first wchar_t. */
+ ret = mbrtowc (&wc, mbs, 1, &st);
+ if (ret != 1)
+ {
+ printf ("error: Second byte conversion returned %zd.\n", ret);
+ err++;
+ }
+ /* Advance past the second consumed byte. */
+ mbs += 1;
+ if (wc != test.expected[0])
+ {
+ printf ("error: Result of first wchar_t conversion was wrong.\n");
+ err++;
+ }
+ /* Consume no bytes; expect the second wchar_t. */
+ ret = mbrtowc (&wc, mbs, 1, &st);
+ if (ret != 0)
+ {
+ printf ("error: First attempt of third byte conversion returned %zd.\n", ret);
+ err++;
+ }
+ /* Do not advance past the third byte. */
+ mbs += 0;
+ if (wc != test.expected[1])
+ {
+ printf ("error: Result of second wchar_t conversion was wrong.\n");
+ err++;
+ }
+ /* After the second wchar_t conversion, the converter should be in
+ the initial state since the two input BIG5-HKSCS bytes have been
+ consumed and the two wchar_t's have been output. */
+ if (mbsinit (&st) == 0)
+ {
+ printf ("error: Converter not in initial state.\n");
+ err++;
+ }
+ /* Consume the third byte; expect the third wchar_t. */
+ ret = mbrtowc (&wc, mbs, 1, &st);
+ if (ret != 1)
+ {
+ printf ("error: Third byte conversion returned %zd.\n", ret);
+ err++;
+ }
+ /* Advance past the third consumed byte. */
+ mbs += 1;
+ if (wc != test.expected[2])
+ {
+ printf ("error: Result of third wchar_t conversion was wrong.\n");
+ err++;
+ }
+
/* Return 0 if we saw no errors. */
return err;
}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-07-06 14:20 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-07-06 14:20 [glibc] gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744] Adhemerval Zanella
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).