From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2178) id E3F0F38555B9; Tue, 29 Aug 2023 07:26:50 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E3F0F38555B9 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1693294010; bh=m5h3nJvnq4iDQYaMvvApgSUrE5OQLNBlrpBIKov6rkw=; h=From:To:Subject:Date:From; b=pOhUB0R4uS13m1+Kpn81ttBxdLzzjblYF1Dgf/vnLom9vEC0Fz85esxThokjSFgDs iPflzVFDZV92SCkX8+2p8+FtRZeaJlXqv7+i/weT2uGEmZhOONLTqSJdFhi8TSOoZM is00GU8fHyEoc70FDVnqKl8mcDSlnFaD0pc9RSco= MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Content-Type: text/plain; charset="utf-8" From: Florian Weimer To: glibc-cvs@sourceware.org Subject: [glibc/tmp] localedata: Translit common emojis to smileys [BZ #30649] X-Act-Checkin: glibc X-Git-Author: Colin Leroy-Mira X-Git-Refname: refs/heads/tmp X-Git-Oldrev: c00b984fcd53f679ca2dafcd1aee2c89836e6e73 X-Git-Newrev: b15eb46b1ae44999303655db899c2216346795f4 Message-Id: <20230829072650.E3F0F38555B9@sourceware.org> Date: Tue, 29 Aug 2023 07:26:50 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=b15eb46b1ae44999303655db899c2216346795f4 commit b15eb46b1ae44999303655db899c2216346795f4 Author: Colin Leroy-Mira Date: Tue Aug 29 08:02:50 2023 +0200 localedata: Translit common emojis to smileys [BZ #30649] Add common emojis to the translit-able characters (mostly faces and hearts), and translit them to old-fashioned smileys. Author: Colin Leroy-Mira Signed-off-by: Colin Leroy-Mira Diff: --- localedata/Makefile | 4 ++ localedata/locales/translit_emojis | 91 ++++++++++++++++++++++++++++ localedata/locales/translit_neutral | 1 + localedata/tst-iconv-emojis-trans.c | 117 ++++++++++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+) diff --git a/localedata/Makefile b/localedata/Makefile index 3619b6d47e..dd41db6d8f 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -1,4 +1,5 @@ # Copyright (C) 1996-2023 Free Software Foundation, Inc. +# Copyright The GNU Toolchain Authors. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -164,6 +165,7 @@ tests = \ bug-usesetlocale \ tst-c-utf8-consistency \ tst-digits \ + tst-iconv-emojis-trans \ tst-iconv-math-trans \ tst-leaks \ tst-mbswcs1 \ @@ -320,6 +322,8 @@ LOCALES := \ include ../gen-locales.mk +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) + $(objpfx)tst-iconv-math-trans.out: $(gen-locales) endif diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis new file mode 100644 index 0000000000..cfb1964afa --- /dev/null +++ b/localedata/locales/translit_emojis @@ -0,0 +1,91 @@ +escape_char / +comment_char % + +% This file is part of the GNU C Library and contains locale data. +% The Free Software Foundation and the GNU Toolchain Authors do not +% claim any copyright interest in the locale data contained in this +% file. The foregoing does not affect the license of the GNU C +% Library as a whole. It does not exempt you from the conditions +% of the license if your use would otherwise be governed by that +% license. + +% Transliterations of emojis to ASCII smileys. + +LC_CTYPE + +translit_start + +โ™ก "/<3" % WHITE HEART SUIT +โ™ฅ "/<3" % BLACK HEART SUIT +โค "/<3" % HEAVY BLACK HEART +๐Ÿ’™ "/<3" % BLUE HEART +๐Ÿ’“ "/<3" % BEATING HEART +๐Ÿ’” "/:)" % SMILING FACE WITH HORNS +๐Ÿ˜‰ ";-)" % WINKING FACE +๐Ÿ˜Š ":-)" % SMILING FACE WITH SMILING EYES +๐Ÿ˜‹ ":-P" % FACE SAVOURING DELICIOUS FOOD +๐Ÿ˜Œ ":-)" % RELIEVED FACE +๐Ÿ˜ ":-*" % SMILING FACE WITH HEART-SHAPED EYES +๐Ÿ˜Ž "B-)" % SMILING FACE WITH SUNGLASSES +๐Ÿ˜ ";-)" % SMIRKING FACE +๐Ÿ˜ ":-|" % NEUTRAL FACE +๐Ÿ˜‘ ":-|" % EXPRESSIONLESS FACE +๐Ÿ˜’ ":-|" % UNAMUSED FACE +๐Ÿ˜“ ":'-|" % FACE WITH COLD SWEAT +๐Ÿ˜” ":-|" % PENSIVE FACE +๐Ÿ˜• ":-//" % CONFUSED FACE +๐Ÿ˜– ":-S" % CONFOUNDED FACE +๐Ÿ˜— ":-*" % KISSING FACE +๐Ÿ˜˜ ":-*" % FACE THROWING A KISS +๐Ÿ˜™ ":-*" % KISSING FACE WITH SMILING EYES +๐Ÿ˜š ":-*" % KISSING FACE WITH CLOSED EYES +๐Ÿ˜› ":-P" % FACE WITH STUCK-OUT TONGUE +๐Ÿ˜œ ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE +๐Ÿ˜ "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES +๐Ÿ˜ž ":-(" % DISAPPOINTED FACE +๐Ÿ˜Ÿ ":-(" % WORRIED FACE +๐Ÿ˜  "/>:-(" % ANGRY FACE +๐Ÿ˜ก ":-(" % POUTING FACE +๐Ÿ˜ข ":'-(" % CRYING FACE +๐Ÿ˜ฃ "X-(" % PERSEVERING FACE +๐Ÿ˜ฆ ":-O" % FROWNING FACE WITH OPEN MOUTH +๐Ÿ˜ง ":-O" % ANGUISHED FACE +๐Ÿ˜จ ":-O" % FEARFUL FACE +๐Ÿ˜ฉ ":-O" % WEARY FACE +๐Ÿ˜ญ ":-(" % LOUDLY CRYING FACE +๐Ÿ˜ฎ ":-O" % FACE WITH OPEN MOUTH +๐Ÿ˜ฏ ":-O" % HUSHED FACE +๐Ÿ˜ฐ ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT +๐Ÿ˜ฑ ":-O" % FACE SCREAMING IN FEAR +๐Ÿ˜ฒ ":-O" % ASTONISHED FACE +๐Ÿ˜ธ ":-3" % GRINNING CAT FACE WITH SMILING EYES +๐Ÿ˜น ":'-3" % CAT FACE WITH TEARS OF JOY +๐Ÿ˜บ ":-3" % SMILING CAT FACE WITH OPEN MOUTH +๐Ÿ˜ป ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES +๐Ÿ˜ผ ";-3" % CAT FACE WITH WRY SMILE +๐Ÿ˜ฝ ":-3" % KISSING CAT FACE WITH CLOSED EYES +๐Ÿ™ ":-(" % SLIGHTLY FROWNING FACE +๐Ÿ™‚ ":-)" % SLIGHTLY SMILING FACE +๐Ÿ™ƒ "(-:" % UPSIDE-DOWN FACE + +translit_end + +END LC_CTYPE diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral index 72f66220b7..57412ae565 100644 --- a/localedata/locales/translit_neutral +++ b/localedata/locales/translit_neutral @@ -17,6 +17,7 @@ translit_start include "translit_circle";"" include "translit_cjk_compat";"" include "translit_compat";"" +include "translit_emojis";"" include "translit_font";"" include "translit_fraction";"" include "translit_narrow";"" diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c new file mode 100644 index 0000000000..de4c20a279 --- /dev/null +++ b/localedata/tst-iconv-emojis-trans.c @@ -0,0 +1,117 @@ +/* Test some emoji transliterations + + Copyright (C) 2019-2023 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd; + + const int num_emojis = 70; + + const char str[] = "โ™ก โ™ฅ โค ๐Ÿ’™ ๐Ÿ’“ " + "๐Ÿ’” ๐Ÿ’– ๐Ÿ’— ๐Ÿ’š ๐Ÿ’› " + "๐Ÿ’œ ๐Ÿ–ค ๐Ÿงก ๐Ÿค ๐ŸคŽ " + "๐Ÿ˜€ ๐Ÿ˜ ๐Ÿ˜‚ ๐Ÿ˜ƒ ๐Ÿ˜„ " + "๐Ÿ˜… ๐Ÿ˜† ๐Ÿ˜‡ ๐Ÿ˜ˆ ๐Ÿ˜‰ " + "๐Ÿ˜Š ๐Ÿ˜‹ ๐Ÿ˜Œ ๐Ÿ˜ ๐Ÿ˜Ž " + "๐Ÿ˜ ๐Ÿ˜ ๐Ÿ˜‘ ๐Ÿ˜’ ๐Ÿ˜“ " + "๐Ÿ˜” ๐Ÿ˜• ๐Ÿ˜– ๐Ÿ˜— ๐Ÿ˜˜ " + "๐Ÿ˜™ ๐Ÿ˜š ๐Ÿ˜› ๐Ÿ˜œ ๐Ÿ˜ " + "๐Ÿ˜ž ๐Ÿ˜Ÿ ๐Ÿ˜  ๐Ÿ˜ก ๐Ÿ˜ข " + "๐Ÿ˜ฃ ๐Ÿ˜ฆ ๐Ÿ˜ง ๐Ÿ˜จ ๐Ÿ˜ฉ " + "๐Ÿ˜ญ ๐Ÿ˜ฎ ๐Ÿ˜ฏ ๐Ÿ˜ฐ ๐Ÿ˜ฑ " + "๐Ÿ˜ฒ ๐Ÿ˜ธ ๐Ÿ˜น ๐Ÿ˜บ ๐Ÿ˜ป " + "๐Ÿ˜ผ ๐Ÿ˜ฝ ๐Ÿ™ ๐Ÿ™‚ ๐Ÿ™ƒ"; + + const char expected[] = "<3 <3 <3 <3 <3 " + ":) ;-) " + ":-) :-P :-) :-* B-) " + ";-) :-| :-| :-| :'-| " + ":-| :-/ :-S :-* :-* " + ":-* :-* :-P ;-P X-P " + ":-( :-( >:-( :-( :'-( " + "X-( :-O :-O :-O :-O " + ":\"-( :-O :-O :'-O :-O " + ":-O :-3 :'-3 :-3 :-3 " + ";-3 :-3 :-( :-) (-:"; + + char *inptr = (char *) str; + size_t inlen = strlen (str) + 1; + char outbuf[500]; + char *outptr = outbuf; + size_t outlen = sizeof (outbuf); + int result = 0; + size_t n; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + FAIL_EXIT1 ("setlocale failed"); + + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + FAIL_EXIT1 ("iconv_open failed"); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (n != num_emojis) + { + if (n == (size_t) -1) + printf ("iconv() returned error: %m\n"); + else + printf ("iconv() returned %zd, expected %d\n", n, num_emojis); + result = 1; + } + if (inlen != 0) + { + puts ("not all input consumed"); + result = 1; + } + else if (inptr - str != strlen (str) + 1) + { + printf ("inptr wrong, advanced by %td\n", inptr - str); + result = 1; + } + if (memcmp (outbuf, expected, sizeof (expected)) != 0) + { + printf ("result wrong: \"%.*s\", expected: \"%s\"\n", + (int) (sizeof (outbuf) - outlen), outbuf, expected); + result = 1; + } + else if (outlen != sizeof (outbuf) - sizeof (expected)) + { + printf ("outlen wrong: %zd, expected %zd\n", outlen, + sizeof (outbuf) - sizeof (expected)); + result = 1; + } + else + printf ("output is \"%s\" which is OK\n", outbuf); + + return result; +} + +#include