* [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859)
@ 2024-06-12 8:50 Andreas Schwab
2024-06-20 18:39 ` Carlos O'Donell
0 siblings, 1 reply; 4+ messages in thread
From: Andreas Schwab @ 2024-06-12 8:50 UTC (permalink / raw)
To: libc-alpha
Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached. Otherwise it is a non-match, and other
patterns should be tried.
---
iconv/Makefile | 13 ++++++++++
iconv/gconv_trans.c | 2 +-
iconv/tst-translit-locale | 10 ++++++++
iconv/tst-translit-mchar.c | 48 +++++++++++++++++++++++++++++++++++++
iconv/tst-translit-mchar.sh | 48 +++++++++++++++++++++++++++++++++++++
5 files changed, 120 insertions(+), 1 deletion(-)
create mode 100644 iconv/tst-translit-locale
create mode 100644 iconv/tst-translit-mchar.c
create mode 100644 iconv/tst-translit-mchar.sh
diff --git a/iconv/Makefile b/iconv/Makefile
index 63afc853ff..e93322da85 100644
--- a/iconv/Makefile
+++ b/iconv/Makefile
@@ -57,6 +57,10 @@ tests = \
tst-iconv-opt \
# tests
+test-srcs := \
+ tst-translit-mchar \
+ # test-srcs
+
others = iconv_prog iconvconfig
install-others-programs = $(inst_bindir)/iconv
install-sbin = iconvconfig
@@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
ifeq ($(run-built-tests),yes)
xtests-special += $(objpfx)test-iconvconfig.out
tests-special += $(objpfx)tst-iconv_prog.out
+tests-special += $(objpfx)tst-translit-mchar.out
endif
# Make a copy of the file because gconv module names are constructed
@@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
$(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
'$(run-program-env)' > $@; \
$(evaluate-test)
+
+$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
+ $(objpfx)tst-translit-mchar \
+ tst-translit-locale
+ $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
+ '$(run-program-env)' '$(run-program-prefix-after-env)' \
+ $< > $@; \
+ $(evaluate-test)
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 08b7a3f71d..44f0fd849a 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */
}
- else if (cnt > 0)
+ else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */
diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale
new file mode 100644
index 0000000000..712b08628a
--- /dev/null
+++ b/iconv/tst-translit-locale
@@ -0,0 +1,10 @@
+# Test multi-character transliteration rule
+
+LC_CTYPE
+copy "POSIX"
+
+translit_start
+"ÄÄ" "AA"
+translit_end
+
+END LC_CTYPE
diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c
new file mode 100644
index 0000000000..72335976d6
--- /dev/null
+++ b/iconv/tst-translit-mchar.c
@@ -0,0 +1,48 @@
+/* Test multi-character transliterations.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <locale.h>
+#include <iconv.h>
+#include <support/support.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ iconv_t cd;
+ /* An input sequence that shares a common prefix with a transliteration
+ rule. */
+ char input[] = "\xc3\x84\xc3\x85";
+ char *inptr = input;
+ char outbuf[10];
+ char *outptr = outbuf;
+ size_t inlen = sizeof (input), outlen = sizeof (outbuf);
+ size_t n;
+
+ xsetlocale (LC_CTYPE, "tst-translit");
+
+ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+ TEST_VERIFY (cd != (iconv_t) -1);
+
+ /* This call used to loop infinitely. */
+ n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+ TEST_VERIFY (iconv_close (cd) == 0);
+ return n == 0;
+}
+
+#include <support/test-driver.c>
diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh
new file mode 100644
index 0000000000..79efd6abc8
--- /dev/null
+++ b/iconv/tst-translit-mchar.sh
@@ -0,0 +1,48 @@
+#!/bin/sh
+# Testing of multi-character transliterations
+# Copyright (C) 2024 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <https://www.gnu.org/licenses/>.
+
+set -e
+
+common_objpfx=$1
+run_program_prefix_before_env=$2
+run_program_env=$3
+run_program_prefix_after_env=$4
+
+# Generate data files.
+${run_program_prefix_before_env} \
+${run_program_env} \
+I18NPATH=../localedata \
+${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
+--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
+if [ $ret -ne 1 ]; then
+ echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
+ exit 1
+fi
+
+set -x
+
+# Run the test.
+${run_program_prefix_before_env} \
+${run_program_env} \
+LOCPATH=${common_objpfx}iconv \
+${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
+
+# Local Variables:
+# mode:shell-script
+# End:
--
2.45.2
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859)
2024-06-12 8:50 [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859) Andreas Schwab
@ 2024-06-20 18:39 ` Carlos O'Donell
2024-06-24 7:20 ` Andreas Schwab
0 siblings, 1 reply; 4+ messages in thread
From: Carlos O'Donell @ 2024-06-20 18:39 UTC (permalink / raw)
To: Andreas Schwab, libc-alpha
On 6/12/24 4:50 AM, Andreas Schwab wrote:
> Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
> the input buffer is reached. Otherwise it is a non-match, and other
> patterns should be tried.
Looking forward to v4.
Fix looks correct to me.
I have comments about the test case.
> ---
> iconv/Makefile | 13 ++++++++++
> iconv/gconv_trans.c | 2 +-
> iconv/tst-translit-locale | 10 ++++++++
> iconv/tst-translit-mchar.c | 48 +++++++++++++++++++++++++++++++++++++
> iconv/tst-translit-mchar.sh | 48 +++++++++++++++++++++++++++++++++++++
> 5 files changed, 120 insertions(+), 1 deletion(-)
> create mode 100644 iconv/tst-translit-locale
> create mode 100644 iconv/tst-translit-mchar.c
> create mode 100644 iconv/tst-translit-mchar.sh
>
> diff --git a/iconv/Makefile b/iconv/Makefile
> index 63afc853ff..e93322da85 100644
> --- a/iconv/Makefile
> +++ b/iconv/Makefile
> @@ -57,6 +57,10 @@ tests = \
> tst-iconv-opt \
> # tests
>
> +test-srcs := \
> + tst-translit-mchar \
> + # test-srcs
OK. Add test sources.
> +
> others = iconv_prog iconvconfig
> install-others-programs = $(inst_bindir)/iconv
> install-sbin = iconvconfig
> @@ -73,6 +77,7 @@ include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
> ifeq ($(run-built-tests),yes)
> xtests-special += $(objpfx)test-iconvconfig.out
> tests-special += $(objpfx)tst-iconv_prog.out
> +tests-special += $(objpfx)tst-translit-mchar.out
OK. Add special test.
> endif
>
> # Make a copy of the file because gconv module names are constructed
> @@ -126,3 +131,11 @@ $(objpfx)tst-iconv_prog.out: tst-iconv_prog.sh $(objpfx)iconv_prog
> $(BASH) $< $(common-objdir) '$(test-wrapper-env)' \
> '$(run-program-env)' > $@; \
> $(evaluate-test)
> +
> +$(objpfx)tst-translit-mchar.out: tst-translit-mchar.sh \
> + $(objpfx)tst-translit-mchar \
> + tst-translit-locale
We should be able to use LOCALES to just compile tst-tranlit-locale?
> + $(SHELL) $< $(common-objpfx) '$(run-program-prefix-before-env)' \
> + '$(run-program-env)' '$(run-program-prefix-after-env)' \
> + $< > $@; \
> + $(evaluate-test)
OK.
> diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
> index 08b7a3f71d..44f0fd849a 100644
> --- a/iconv/gconv_trans.c
> +++ b/iconv/gconv_trans.c
> @@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
>
> /* Nothing found, continue searching. */
> }
> - else if (cnt > 0)
> + else if (cnt > 0 && winbuf + cnt == winbufend)
OK. Looks correct to me, we not only need to match the prefix but must also be the
right length.
> /* This means that the input buffer contents matches a prefix of
> an entry. Since we cannot match it unless we get more input,
> we will tell the caller about it. */
> diff --git a/iconv/tst-translit-locale b/iconv/tst-translit-locale
> new file mode 100644
> index 0000000000..712b08628a
> --- /dev/null
> +++ b/iconv/tst-translit-locale
> @@ -0,0 +1,10 @@
> +# Test multi-character transliteration rule
> +
> +LC_CTYPE
> +copy "POSIX"
> +
> +translit_start
> +"ÄÄ" "AA"
OK. New locale, that provides a transliteration.
> +translit_end
> +
> +END LC_CTYPE
> diff --git a/iconv/tst-translit-mchar.c b/iconv/tst-translit-mchar.c
> new file mode 100644
> index 0000000000..72335976d6
> --- /dev/null
> +++ b/iconv/tst-translit-mchar.c
> @@ -0,0 +1,48 @@
> +/* Test multi-character transliterations.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <locale.h>
> +#include <iconv.h>
> +#include <support/support.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> + iconv_t cd;
> + /* An input sequence that shares a common prefix with a transliteration
> + rule. */
> + char input[] = "\xc3\x84\xc3\x85";
For a simple rule test like this may we just use raw UTF-8?
It makes it easier to read and understand.
e.g. "ÄÅ"
> + char *inptr = input;
> + char outbuf[10];
> + char *outptr = outbuf;
> + size_t inlen = sizeof (input), outlen = sizeof (outbuf);
> + size_t n;
> +
> + xsetlocale (LC_CTYPE, "tst-translit");
> +
> + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
> + TEST_VERIFY (cd != (iconv_t) -1);
> +
> + /* This call used to loop infinitely. */
> + n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
> + TEST_VERIFY (iconv_close (cd) == 0);
> + return n == 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/iconv/tst-translit-mchar.sh b/iconv/tst-translit-mchar.sh
> new file mode 100644
> index 0000000000..79efd6abc8
> --- /dev/null
> +++ b/iconv/tst-translit-mchar.sh
> @@ -0,0 +1,48 @@
> +#!/bin/sh
> +# Testing of multi-character transliterations
> +# Copyright (C) 2024 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <https://www.gnu.org/licenses/>.
> +
> +set -e
> +
> +common_objpfx=$1
> +run_program_prefix_before_env=$2
> +run_program_env=$3
> +run_program_prefix_after_env=$4
> +
> +# Generate data files.
> +${run_program_prefix_before_env} \
> +${run_program_env} \
> +I18NPATH=../localedata \
> +${run_program_prefix_after_env} ${common_objpfx}locale/localedef \
> +--quiet -i tst-translit-locale -f UTF-8 ${common_objpfx}iconv/tst-translit || ret=$?
> +if [ $ret -ne 1 ]; then
> + echo "FAIL: Locale compilation for tst-translit-locale failed (error $ret)."
> + exit 1
> +fi
Can this just use LOCALES in iconv/Makefile?
It should be able to compile the locale for you as all the other locales are compiled.
> +
> +set -x
> +
> +# Run the test.
> +${run_program_prefix_before_env} \
> +${run_program_env} \
> +LOCPATH=${common_objpfx}iconv \
> +${run_program_prefix_after_env} ${common_objpfx}iconv/tst-translit-mchar
Then you can get rid of those whole shell script and fold it up the iconv/Makefile as a test?
> +
> +# Local Variables:
> +# mode:shell-script
> +# End:
--
Cheers,
Carlos.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859)
2024-06-20 18:39 ` Carlos O'Donell
@ 2024-06-24 7:20 ` Andreas Schwab
0 siblings, 0 replies; 4+ messages in thread
From: Andreas Schwab @ 2024-06-24 7:20 UTC (permalink / raw)
To: Carlos O'Donell; +Cc: libc-alpha
On Jun 20 2024, Carlos O'Donell wrote:
> We should be able to use LOCALES to just compile tst-tranlit-locale?
There are two issues here:
- LOCALES can only use locale sources from localedata/locales
- tst-tranlit-locale is a stub locale (only contains LC_CTYPE) that
localedef warns about, and gen-locale.sh does not accept that.
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859)
@ 2024-06-10 10:23 Andreas Schwab
0 siblings, 0 replies; 4+ messages in thread
From: Andreas Schwab @ 2024-06-10 10:23 UTC (permalink / raw)
To: libc-alpha
Only return __GCONV_INCOMPLETE_INPUT for a partial match when the end of
the input buffer is reached. Otherwise it is a non-match, and other
patterns should be tried.
---
iconv/gconv_trans.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 08b7a3f71d..44f0fd849a 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -150,7 +150,7 @@ __gconv_transliterate (struct __gconv_step *step,
/* Nothing found, continue searching. */
}
- else if (cnt > 0)
+ else if (cnt > 0 && winbuf + cnt == winbufend)
/* This means that the input buffer contents matches a prefix of
an entry. Since we cannot match it unless we get more input,
we will tell the caller about it. */
--
2.45.2
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-06-24 7:20 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-06-12 8:50 [PATCH] iconv: Fix matching of multi-character transliterations (bug 31859) Andreas Schwab
2024-06-20 18:39 ` Carlos O'Donell
2024-06-24 7:20 ` Andreas Schwab
-- strict thread matches above, loose matches on Subject: below --
2024-06-10 10:23 Andreas Schwab
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).