public inbox for glibc-cvs@sourceware.org help / color / mirror / Atom feed
From: Florian Weimer <fw@sourceware.org> To: glibc-cvs@sourceware.org Subject: [glibc] localedata: Translit common emojis to smileys [BZ #30649] Date: Tue, 29 Aug 2023 07:32:09 +0000 (GMT) [thread overview] Message-ID: <20230829073209.59E873857C43@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=dfe8c445883a50a55564b02b6957257bfc510db4 commit dfe8c445883a50a55564b02b6957257bfc510db4 Author: Colin Leroy-Mira <colin@colino.net> Date: Tue Aug 29 08:02:50 2023 +0200 localedata: Translit common emojis to smileys [BZ #30649] Add common emojis to the translit-able characters (mostly faces and hearts), and translit them to old-fashioned smileys. Signed-off-by: Colin Leroy-Mira <colin@colino.net> Reviewed-by: Florian Weimer <fweimer@redhat.com> Diff: --- localedata/Makefile | 4 ++ localedata/locales/translit_emojis | 91 ++++++++++++++++++++++++++++ localedata/locales/translit_neutral | 1 + localedata/tst-iconv-emojis-trans.c | 117 ++++++++++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+) diff --git a/localedata/Makefile b/localedata/Makefile index 3619b6d47e..dd41db6d8f 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -1,4 +1,5 @@ # Copyright (C) 1996-2023 Free Software Foundation, Inc. +# Copyright The GNU Toolchain Authors. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -164,6 +165,7 @@ tests = \ bug-usesetlocale \ tst-c-utf8-consistency \ tst-digits \ + tst-iconv-emojis-trans \ tst-iconv-math-trans \ tst-leaks \ tst-mbswcs1 \ @@ -320,6 +322,8 @@ LOCALES := \ include ../gen-locales.mk +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) + $(objpfx)tst-iconv-math-trans.out: $(gen-locales) endif diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis new file mode 100644 index 0000000000..cfb1964afa --- /dev/null +++ b/localedata/locales/translit_emojis @@ -0,0 +1,91 @@ +escape_char / +comment_char % + +% This file is part of the GNU C Library and contains locale data. +% The Free Software Foundation and the GNU Toolchain Authors do not +% claim any copyright interest in the locale data contained in this +% file. The foregoing does not affect the license of the GNU C +% Library as a whole. It does not exempt you from the conditions +% of the license if your use would otherwise be governed by that +% license. + +% Transliterations of emojis to ASCII smileys. + +LC_CTYPE + +translit_start + +♡ "/<3" % WHITE HEART SUIT +♥ "/<3" % BLACK HEART SUIT +❤ "/<3" % HEAVY BLACK HEART +💙 "/<3" % BLUE HEART +💓 "/<3" % BEATING HEART +💔 "/<//3" % BROKEN HEART +💖 "/<3" % SPARKLING HEART +💗 "/<3" % GROWING HEART +💚 "/<3" % GREEN HEART +💛 "/<3" % YELLOW HEART +💜 "/<3" % PURPLE HEART +🖤 "/<3" % BLACK HEART +🧡 "/<3" % ORANGE HEART +🤍 "/<3" % WHITE HEART +🤎 "/<3" % BROWN HEART +😀 ":-D" % GRINNING FACE +😁 ":-D" % GRINNING FACE WITH SMILING EYES +😂 ":'D" % FACE WITH TEARS OF JOY +😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺) +😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES +😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT +😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES +😇 "O:-)" % SMILING FACE WITH HALO +😈 "/>:)" % SMILING FACE WITH HORNS +😉 ";-)" % WINKING FACE +😊 ":-)" % SMILING FACE WITH SMILING EYES +😋 ":-P" % FACE SAVOURING DELICIOUS FOOD +😌 ":-)" % RELIEVED FACE +😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES +😎 "B-)" % SMILING FACE WITH SUNGLASSES +😏 ";-)" % SMIRKING FACE +😐 ":-|" % NEUTRAL FACE +😑 ":-|" % EXPRESSIONLESS FACE +😒 ":-|" % UNAMUSED FACE +😓 ":'-|" % FACE WITH COLD SWEAT +😔 ":-|" % PENSIVE FACE +😕 ":-//" % CONFUSED FACE +😖 ":-S" % CONFOUNDED FACE +😗 ":-*" % KISSING FACE +😘 ":-*" % FACE THROWING A KISS +😙 ":-*" % KISSING FACE WITH SMILING EYES +😚 ":-*" % KISSING FACE WITH CLOSED EYES +😛 ":-P" % FACE WITH STUCK-OUT TONGUE +😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE +😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES +😞 ":-(" % DISAPPOINTED FACE +😟 ":-(" % WORRIED FACE +😠 "/>:-(" % ANGRY FACE +😡 ":-(" % POUTING FACE +😢 ":'-(" % CRYING FACE +😣 "X-(" % PERSEVERING FACE +😦 ":-O" % FROWNING FACE WITH OPEN MOUTH +😧 ":-O" % ANGUISHED FACE +😨 ":-O" % FEARFUL FACE +😩 ":-O" % WEARY FACE +😭 ":<U0022>-(" % LOUDLY CRYING FACE +😮 ":-O" % FACE WITH OPEN MOUTH +😯 ":-O" % HUSHED FACE +😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT +😱 ":-O" % FACE SCREAMING IN FEAR +😲 ":-O" % ASTONISHED FACE +😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES +😹 ":'-3" % CAT FACE WITH TEARS OF JOY +😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH +😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES +😼 ";-3" % CAT FACE WITH WRY SMILE +😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES +🙁 ":-(" % SLIGHTLY FROWNING FACE +🙂 ":-)" % SLIGHTLY SMILING FACE +🙃 "(-:" % UPSIDE-DOWN FACE + +translit_end + +END LC_CTYPE diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral index 72f66220b7..57412ae565 100644 --- a/localedata/locales/translit_neutral +++ b/localedata/locales/translit_neutral @@ -17,6 +17,7 @@ translit_start include "translit_circle";"" include "translit_cjk_compat";"" include "translit_compat";"" +include "translit_emojis";"" include "translit_font";"" include "translit_fraction";"" include "translit_narrow";"" diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c new file mode 100644 index 0000000000..de4c20a279 --- /dev/null +++ b/localedata/tst-iconv-emojis-trans.c @@ -0,0 +1,117 @@ +/* Test some emoji transliterations + + Copyright (C) 2019-2023 Free Software Foundation, Inc. + Copyright The GNU Toolchain Authors. + + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <iconv.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <support/check.h> + +static int +do_test (void) +{ + iconv_t cd; + + const int num_emojis = 70; + + const char str[] = "♡ ♥ ❤ 💙 💓 " + "💔 💖 💗 💚 💛 " + "💜 🖤 🧡 🤍 🤎 " + "😀 😁 😂 😃 😄 " + "😅 😆 😇 😈 😉 " + "😊 😋 😌 😍 😎 " + "😏 😐 😑 😒 😓 " + "😔 😕 😖 😗 😘 " + "😙 😚 😛 😜 😝 " + "😞 😟 😠 😡 😢 " + "😣 😦 😧 😨 😩 " + "😭 😮 😯 😰 😱 " + "😲 😸 😹 😺 😻 " + "😼 😽 🙁 🙂 🙃"; + + const char expected[] = "<3 <3 <3 <3 <3 " + "</3 <3 <3 <3 <3 " + "<3 <3 <3 <3 <3 " + ":-D :-D :'D :-D :-D " + ":-D :-D O:-) >:) ;-) " + ":-) :-P :-) :-* B-) " + ";-) :-| :-| :-| :'-| " + ":-| :-/ :-S :-* :-* " + ":-* :-* :-P ;-P X-P " + ":-( :-( >:-( :-( :'-( " + "X-( :-O :-O :-O :-O " + ":\"-( :-O :-O :'-O :-O " + ":-O :-3 :'-3 :-3 :-3 " + ";-3 :-3 :-( :-) (-:"; + + char *inptr = (char *) str; + size_t inlen = strlen (str) + 1; + char outbuf[500]; + char *outptr = outbuf; + size_t outlen = sizeof (outbuf); + int result = 0; + size_t n; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + FAIL_EXIT1 ("setlocale failed"); + + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + FAIL_EXIT1 ("iconv_open failed"); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (n != num_emojis) + { + if (n == (size_t) -1) + printf ("iconv() returned error: %m\n"); + else + printf ("iconv() returned %zd, expected %d\n", n, num_emojis); + result = 1; + } + if (inlen != 0) + { + puts ("not all input consumed"); + result = 1; + } + else if (inptr - str != strlen (str) + 1) + { + printf ("inptr wrong, advanced by %td\n", inptr - str); + result = 1; + } + if (memcmp (outbuf, expected, sizeof (expected)) != 0) + { + printf ("result wrong: \"%.*s\", expected: \"%s\"\n", + (int) (sizeof (outbuf) - outlen), outbuf, expected); + result = 1; + } + else if (outlen != sizeof (outbuf) - sizeof (expected)) + { + printf ("outlen wrong: %zd, expected %zd\n", outlen, + sizeof (outbuf) - sizeof (expected)); + result = 1; + } + else + printf ("output is \"%s\" which is OK\n", outbuf); + + return result; +} + +#include <support/test-driver.c>
reply other threads:[~2023-08-29 7:32 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230829073209.59E873857C43@sourceware.org \ --to=fw@sourceware.org \ --cc=glibc-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).