public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc] Add Transliterations for Unicode Misc. Mathematical Symbols-A/B [BZ #23132]
@ 2019-10-25 17:47 Arjun Shankar
0 siblings, 0 replies; only message in thread
From: Arjun Shankar @ 2019-10-25 17:47 UTC (permalink / raw)
To: glibc-cvs
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset="us-ascii", Size: 7219 bytes --]
https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=513aaa0d782f8fae36732d06ca59d658149f0139
commit 513aaa0d782f8fae36732d06ca59d658149f0139
Author: Arjun Shankar <arjun@redhat.com>
Date: Wed Oct 23 18:51:29 2019 +0200
Add Transliterations for Unicode Misc. Mathematical Symbols-A/B [BZ #23132]
This commit adds previously missing transliterations for several code points
in the Unicode blocks "Miscellaneous Mathematical Symbols-A/B" -
transliterated to their approximate ASCII representations. It also adds a
corresponding iconv transliteration test.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
Diff:
---
localedata/Makefile | 4 +-
localedata/locales/translit_neutral | 52 +++++++++++++++++-
localedata/tst-iconv-math-trans.c | 104 ++++++++++++++++++++++++++++++++++++
3 files changed, 157 insertions(+), 3 deletions(-)
diff --git a/localedata/Makefile b/localedata/Makefile
index ce6a750..89ba404 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -156,7 +156,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \
tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \
- tst-wctype
+ tst-wctype tst-iconv-math-trans
tests-static = bug-setlocale1-static
tests += $(tests-static)
ifeq (yes,$(build-shared))
@@ -287,6 +287,8 @@ LOCALES := \
$(NULL)
include ../gen-locales.mk
+
+$(objpfx)tst-iconv-math-trans.out: $(gen-locales)
endif
include ../Rules
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index e3639c0..72f6622 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -743,10 +743,22 @@ include "translit_wide";""
<U263A> "<U003A><U0029>"
% BLACK SMILING FACE
<U263B> "<U003A><U0029>"
-% MATHEMATICAL RIGHT DOUBLE ANGLE BRACKETâ«
-<U27EB> "<U003E><U003E>"
+% MATHEMATICAL LEFT WHITE SQUARE BRACKET
+<U27E6> "<U005B><U007C>"
+% MATHEMATICAL RIGHT WHITE SQUARE BRACKET
+<U27E7> "<U007C><U005D>"
+% MATHEMATICAL LEFT ANGLE BRACKET
+<U27E8> <U003C>
+% MATHEMATICAL RIGHT ANGLE BRACKET
+<U27E9> <U003E>
% MATHEMATICAL LEFT DOUBLE ANGLE BRACKET
<U27EA> "<U003C><U003C>"
+% MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET
+<U27EB> "<U003E><U003E>"
+% MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET
+<U27EC> "<U0028><U0028>"
+% MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET
+<U27ED> "<U0029><U0029>"
% MATHEMATICAL LEFT FLATTENED PARENTHESIS
<U27EE> <U0028>
% MATHEMATICAL RIGHT FLATTENED PARENTHESIS
@@ -755,6 +767,42 @@ include "translit_wide";""
<U27CB> <U002F>
% MATHEMATICAL FALLING DIAGONAL
<U27CD> <U005C>
+% TRIPLE VERTICAL BAR DELIMITER
+<U2980> "<U007C><U007C><U007C>"
+% LEFT WHITE CURLY BRACKET
+<U2983> "<U007B><U007C>"
+% RIGHT WHITE CURLY BRACKET
+<U2984> "<U007C><U007D>"
+% LEFT WHITE PARENTHESIS
+<U2985> "<U0028><U0028>"
+% RIGHT WHITE PARENTHESIS
+<U2986> "<U0029><U0029>"
+% Z NOTATION LEFT IMAGE BRACKET
+<U2987> "<U0028><U007C>"
+% Z NOTATION RIGHT IMAGE BRACKET
+<U2988> "<U007C><U0029>"
+% Z NOTATION LEFT BINDING BRACKET
+<U2989> "<U003C><U007C>"
+% Z NOTATION RIGHT BINDING BRACKET
+<U298A> "<U007C><U003E>"
+% EQUALS SIGN AND SLANTED PARALLEL
+<U29E3> <U0023>
+% IDENTICAL TO AND SLANTED PARALLEL
+<U29E5> <U0023>
+% REVERSE SOLIDUS OPERATOR
+<U29F5> <U005C>
+% BIG SOLIDUS
+<U29F8> <U002F>
+% BIG REVERSE SOLIDUS
+<U29F9> <U005C>
+% LEFT-POINTING CURVED ANGLE BRACKET
+<U29FC> <U003C>
+% RIGHT-POINTING CURVED ANGLE BRACKET
+<U29FD> <U003E>
+% TINY
+<U29FE> <U002B>
+% MINY
+<U29FF> <U002D>
% LEFT ANGLE BRACKET
<U3008> <U003C>
% RIGHT ANGLE BRACKET
diff --git a/localedata/tst-iconv-math-trans.c b/localedata/tst-iconv-math-trans.c
new file mode 100644
index 0000000..32473bb
--- /dev/null
+++ b/localedata/tst-iconv-math-trans.c
@@ -0,0 +1,104 @@
+/* Test some mathematical operator transliterations (BZ #23132)
+
+ Copyright (C) 2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ iconv_t cd;
+
+ /* str[] = "⦠⧠⨠â©"
+ " ⬠â â¦"
+ " ⦠⦠â¦
â¦"
+ " ⦠⦠⦠â¦"
+ " ⧣ ⧥ ⧵ ⧸ ⧹"
+ " ⧼ ⧽ ⧾ ⧿"; */
+
+ const char str[] = "\u27E6 \u27E7 \u27E8 \u27E9"
+ " \u27EC \u27ED \u2980"
+ " \u2983 \u2984 \u2985 \u2986"
+ " \u2987 \u2988 \u2989 \u298A"
+ " \u29E3 \u29E5 \u29F5 \u29F8 \u29F9"
+ " \u29FC \u29FD \u29FE \u29FF";
+
+ const char expected[] = "[| |] < >"
+ " (( )) |||"
+ " {| |} (( ))"
+ " (| |) <| |>"
+ " # # \\ / \\"
+ " < > + -";
+
+ char *inptr = (char *) str;
+ size_t inlen = strlen (str) + 1;
+ char outbuf[500];
+ char *outptr = outbuf;
+ size_t outlen = sizeof (outbuf);
+ int result = 0;
+ size_t n;
+
+ if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+ FAIL_EXIT1 ("setlocale failed");
+
+ cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+ if (cd == (iconv_t) -1)
+ FAIL_EXIT1 ("iconv_open failed");
+
+ n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+ if (n != 24)
+ {
+ if (n == (size_t) -1)
+ printf ("iconv() returned error: %m\n");
+ else
+ printf ("iconv() returned %Zd, expected 24\n", n);
+ result = 1;
+ }
+ if (inlen != 0)
+ {
+ puts ("not all input consumed");
+ result = 1;
+ }
+ else if (inptr - str != strlen (str) + 1)
+ {
+ printf ("inptr wrong, advanced by %td\n", inptr - str);
+ result = 1;
+ }
+ if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+ {
+ printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+ (int) (sizeof (outbuf) - outlen), outbuf, expected);
+ result = 1;
+ }
+ else if (outlen != sizeof (outbuf) - sizeof (expected))
+ {
+ printf ("outlen wrong: %Zd, expected %Zd\n", outlen,
+ sizeof (outbuf) - 15);
+ result = 1;
+ }
+ else
+ printf ("output is \"%s\" which is OK\n", outbuf);
+
+ return result;
+}
+
+#include <support/test-driver.c>
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2019-10-25 17:47 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-25 17:47 [glibc] Add Transliterations for Unicode Misc. Mathematical Symbols-A/B [BZ #23132] Arjun Shankar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).