public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] localedata: Translit common emojis to smileys [BZ #30649]
@ 2023-07-19 16:17 Colin Leroy-Mira
  2023-07-20 22:27 ` [PATCH][v2] " Colin Leroy-Mira
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
  0 siblings, 2 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-19 16:17 UTC (permalink / raw)
  To: libc-alpha; +Cc: Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
 localedata/locales/translit_emojis  | 91 +++++++++++++++++++++++++++++
 localedata/locales/translit_neutral |  1 +
 2 files changed, 92 insertions(+)
 create mode 100644 localedata/locales/translit_emojis

diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..fa4b58cee5
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% Transliterations of emojis to ASCII smileys.
+% Generated algorithmically.
+
+LC_CTYPE
+
+translit_start
+
+<U2661> "<U003C><U0033>" % WHITE HEART SUIT
+<U2665> "<U003C><U0033>" % BLACK HEART SUIT
+<U2764> "<U003C><U0033>" % HEAVY BLACK HEART
+<U0001F499> "<U003C><U0033>" % BLUE HEART
+<U0001F493> "<U003C><U0033>" % BEATING HEART
+<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART
+<U0001F496> "<U003C><U0033>" % SPARKLING HEART
+<U0001F497> "<U003C><U0033>" % GROWING HEART
+<U0001F49A> "<U003C><U0033>" % GREEN HEART
+<U0001F49B> "<U003C><U0033>" % YELLOW HEART
+<U0001F49C> "<U003C><U0033>" % PURPLE HEART
+<U0001F5A4> "<U003C><U0033>" % BLACK HEART
+<U0001F9E1> "<U003C><U0033>" % ORANGE HEART
+<U0001F90D> "<U003C><U0033>" % WHITE HEART
+<U0001F90E> "<U003C><U0033>" % BROWN HEART
+<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE
+<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES
+<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY
+<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO
+<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS
+<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE
+<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES
+<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD
+<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE
+<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES
+<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES
+<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE
+<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE
+<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE
+<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE
+<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT
+<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE
+<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE
+<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE
+<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE
+<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS
+<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES
+<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES
+<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE
+<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE
+<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE
+<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE
+<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE
+<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE
+<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE
+<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH
+<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE
+<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE
+<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE
+<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE
+<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH
+<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE
+<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR
+<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE
+<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES
+<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY
+<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH
+<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES
+<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE
+<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES
+<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE
+<U0001F642> "<U003A><U002D><U0028>" % SLIGHTLY SMILING FACE
+<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH][v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-19 16:17 [PATCH] localedata: Translit common emojis to smileys [BZ #30649] Colin Leroy-Mira
@ 2023-07-20 22:27 ` Colin Leroy-Mira
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
  1 sibling, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-20 22:27 UTC (permalink / raw)
  To: libc-alpha; +Cc: Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.
v2: fix a wrong smiley, add unit test

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
 localedata/Makefile                 |   3 +
 localedata/locales/translit_emojis  |  91 ++++++++++++++++++
 localedata/locales/translit_neutral |   1 +
 localedata/tst-iconv-emojis-trans.c | 139 ++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 localedata/locales/translit_emojis
 create mode 100644 localedata/tst-iconv-emojis-trans.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..5b6d10e33f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -164,6 +164,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +321,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..260aeedc35
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% Transliterations of emojis to ASCII smileys.
+% Generated algorithmically.
+
+LC_CTYPE
+
+translit_start
+
+<U2661> "<U003C><U0033>" % WHITE HEART SUIT
+<U2665> "<U003C><U0033>" % BLACK HEART SUIT
+<U2764> "<U003C><U0033>" % HEAVY BLACK HEART
+<U0001F499> "<U003C><U0033>" % BLUE HEART
+<U0001F493> "<U003C><U0033>" % BEATING HEART
+<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART
+<U0001F496> "<U003C><U0033>" % SPARKLING HEART
+<U0001F497> "<U003C><U0033>" % GROWING HEART
+<U0001F49A> "<U003C><U0033>" % GREEN HEART
+<U0001F49B> "<U003C><U0033>" % YELLOW HEART
+<U0001F49C> "<U003C><U0033>" % PURPLE HEART
+<U0001F5A4> "<U003C><U0033>" % BLACK HEART
+<U0001F9E1> "<U003C><U0033>" % ORANGE HEART
+<U0001F90D> "<U003C><U0033>" % WHITE HEART
+<U0001F90E> "<U003C><U0033>" % BROWN HEART
+<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE
+<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES
+<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY
+<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO
+<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS
+<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE
+<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES
+<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD
+<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE
+<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES
+<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES
+<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE
+<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE
+<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE
+<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE
+<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT
+<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE
+<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE
+<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE
+<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE
+<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS
+<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES
+<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES
+<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE
+<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE
+<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE
+<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE
+<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE
+<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE
+<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE
+<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH
+<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE
+<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE
+<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE
+<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE
+<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH
+<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE
+<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR
+<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE
+<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES
+<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY
+<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH
+<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES
+<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE
+<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES
+<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE
+<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE
+<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..cc8b2a8bba
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,139 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  /* str[] = "♡ ♥ ❤ 💙 💓 "
+             "💔 💖 💗 💚 💛 "
+             "💜 🖤 🧡 🤍 🤎 "
+             "😀 😁 😂 😃 😄 "
+             "😅 😆 😇 😈 😉 "
+             "😊 😋 😌 😍 😎 "
+             "😏 😐 😑 😒 😓 "
+             "😔 😕 😖 😗 😘 "
+             "😙 😚 😛 😜 😝 "
+             "😞 😟 😠 😡 😢 "
+             "😣 😦 😧 😨 😩 "
+             "😭 😮 😯 😰 😱 "
+             "😲 😸 😹 😺 😻 "
+             "😼 😽 🙁 🙂 🙃";  */
+
+  const int num_emojis = 70;
+
+  const char str[] = "\u2661 \u2665 \u2764 \U0001F499 "
+                      "\U0001F493 \U0001F494 \U0001F496 "
+                      "\U0001F497 \U0001F49A \U0001F49B "
+                      "\U0001F49C \U0001F5A4 \U0001F9E1 "
+                      "\U0001F90D \U0001F90E \U0001F600 "
+                      "\U0001F601 \U0001F602 \U0001F603 "
+                      "\U0001F604 \U0001F605 \U0001F606 "
+                      "\U0001F607 \U0001F608 \U0001F609 "
+                      "\U0001F60A \U0001F60B \U0001F60C "
+                      "\U0001F60D \U0001F60E \U0001F60F "
+                      "\U0001F610 \U0001F611 \U0001F612 "
+                      "\U0001F613 \U0001F614 \U0001F615 "
+                      "\U0001F616 \U0001F617 \U0001F618 "
+                      "\U0001F619 \U0001F61A \U0001F61B "
+                      "\U0001F61C \U0001F61D \U0001F61E "
+                      "\U0001F61F \U0001F620 \U0001F621 "
+                      "\U0001F622 \U0001F623 \U0001F626 "
+                      "\U0001F627 \U0001F628 \U0001F629 "
+                      "\U0001F62D \U0001F62E \U0001F62F "
+                      "\U0001F630 \U0001F631 \U0001F632 "
+                      "\U0001F638 \U0001F639 \U0001F63A "
+                      "\U0001F63B \U0001F63C \U0001F63D "
+                      "\U0001F641 \U0001F642 \U0001F643";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-19 16:17 [PATCH] localedata: Translit common emojis to smileys [BZ #30649] Colin Leroy-Mira
  2023-07-20 22:27 ` [PATCH][v2] " Colin Leroy-Mira
@ 2023-07-21 14:11 ` Colin Leroy-Mira
  2023-08-08  7:07   ` Colin Leroy-Mira
                     ` (3 more replies)
  1 sibling, 4 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-21 14:11 UTC (permalink / raw)
  To: libc-alpha; +Cc: Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
v2: Fix a wrong smiley, add unit test
 localedata/Makefile                 |   3 +
 localedata/locales/translit_emojis  |  91 ++++++++++++++++++++
 localedata/locales/translit_neutral |   1 +
 localedata/tst-iconv-emojis-trans.c | 124 ++++++++++++++++++++++++++++
 4 files changed, 219 insertions(+)
 create mode 100644 localedata/locales/translit_emojis
 create mode 100644 localedata/tst-iconv-emojis-trans.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..5b6d10e33f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -164,6 +164,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +321,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..260aeedc35
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% Transliterations of emojis to ASCII smileys.
+% Generated algorithmically.
+
+LC_CTYPE
+
+translit_start
+
+<U2661> "<U003C><U0033>" % WHITE HEART SUIT
+<U2665> "<U003C><U0033>" % BLACK HEART SUIT
+<U2764> "<U003C><U0033>" % HEAVY BLACK HEART
+<U0001F499> "<U003C><U0033>" % BLUE HEART
+<U0001F493> "<U003C><U0033>" % BEATING HEART
+<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART
+<U0001F496> "<U003C><U0033>" % SPARKLING HEART
+<U0001F497> "<U003C><U0033>" % GROWING HEART
+<U0001F49A> "<U003C><U0033>" % GREEN HEART
+<U0001F49B> "<U003C><U0033>" % YELLOW HEART
+<U0001F49C> "<U003C><U0033>" % PURPLE HEART
+<U0001F5A4> "<U003C><U0033>" % BLACK HEART
+<U0001F9E1> "<U003C><U0033>" % ORANGE HEART
+<U0001F90D> "<U003C><U0033>" % WHITE HEART
+<U0001F90E> "<U003C><U0033>" % BROWN HEART
+<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE
+<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES
+<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY
+<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO
+<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS
+<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE
+<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES
+<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD
+<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE
+<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES
+<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES
+<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE
+<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE
+<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE
+<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE
+<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT
+<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE
+<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE
+<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE
+<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE
+<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS
+<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES
+<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES
+<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE
+<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE
+<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE
+<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE
+<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE
+<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE
+<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE
+<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH
+<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE
+<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE
+<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE
+<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE
+<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH
+<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE
+<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR
+<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE
+<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES
+<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY
+<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH
+<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES
+<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE
+<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES
+<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE
+<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE
+<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..89a32074d5
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,124 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  const int num_emojis = 70;
+
+  const char str[] = "\u2661 \u2665 \u2764 \U0001F499 "
+                     "\U0001F493 \U0001F494 \U0001F496 "
+                     "\U0001F497 \U0001F49A \U0001F49B "
+                     "\U0001F49C \U0001F5A4 \U0001F9E1 "
+                     "\U0001F90D \U0001F90E \U0001F600 "
+                     "\U0001F601 \U0001F602 \U0001F603 "
+                     "\U0001F604 \U0001F605 \U0001F606 "
+                     "\U0001F607 \U0001F608 \U0001F609 "
+                     "\U0001F60A \U0001F60B \U0001F60C "
+                     "\U0001F60D \U0001F60E \U0001F60F "
+                     "\U0001F610 \U0001F611 \U0001F612 "
+                     "\U0001F613 \U0001F614 \U0001F615 "
+                     "\U0001F616 \U0001F617 \U0001F618 "
+                     "\U0001F619 \U0001F61A \U0001F61B "
+                     "\U0001F61C \U0001F61D \U0001F61E "
+                     "\U0001F61F \U0001F620 \U0001F621 "
+                     "\U0001F622 \U0001F623 \U0001F626 "
+                     "\U0001F627 \U0001F628 \U0001F629 "
+                     "\U0001F62D \U0001F62E \U0001F62F "
+                     "\U0001F630 \U0001F631 \U0001F632 "
+                     "\U0001F638 \U0001F639 \U0001F63A "
+                     "\U0001F63B \U0001F63C \U0001F63D "
+                     "\U0001F641 \U0001F642 \U0001F643";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
@ 2023-08-08  7:07   ` Colin Leroy-Mira
  2023-08-08  9:20   ` Florian Weimer
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-08  7:07 UTC (permalink / raw)
  To: libc-alpha

Hi folks, 

Ping :) I'd love a review on this!

Thanks!

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
> 
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>
> ---
> v2: Fix a wrong smiley, add unit test
>  localedata/Makefile                 |   3 +
>  localedata/locales/translit_emojis  |  91 ++++++++++++++++++++
>  localedata/locales/translit_neutral |   1 +
>  localedata/tst-iconv-emojis-trans.c | 124 ++++++++++++++++++++++++++++
>  4 files changed, 219 insertions(+)
>  create mode 100644 localedata/locales/translit_emojis
>  create mode 100644 localedata/tst-iconv-emojis-trans.c
> 
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 3619b6d47e..5b6d10e33f 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -164,6 +164,7 @@ tests = \
>    bug-usesetlocale \
>    tst-c-utf8-consistency \
>    tst-digits \
> +  tst-iconv-emojis-trans \
>    tst-iconv-math-trans \
>    tst-leaks \
>    tst-mbswcs1 \
> @@ -320,6 +321,8 @@ LOCALES := \
>  
>  include ../gen-locales.mk
>  
> +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
> +
>  $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
>  endif
>  
> diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
> new file mode 100644
> index 0000000000..260aeedc35
> --- /dev/null
> +++ b/localedata/locales/translit_emojis
> @@ -0,0 +1,91 @@
> +escape_char /
> +comment_char %
> +
> +% This file is part of the GNU C Library and contains locale data.
> +% The Free Software Foundation does not claim any copyright interest
> +% in the locale data contained in this file.  The foregoing does not
> +% affect the license of the GNU C Library as a whole.  It does not
> +% exempt you from the conditions of the license if your use would
> +% otherwise be governed by that license.
> +
> +% Transliterations of emojis to ASCII smileys.
> +% Generated algorithmically.
> +
> +LC_CTYPE
> +
> +translit_start
> +
> +<U2661> "<U003C><U0033>" % WHITE HEART SUIT
> +<U2665> "<U003C><U0033>" % BLACK HEART SUIT
> +<U2764> "<U003C><U0033>" % HEAVY BLACK HEART
> +<U0001F499> "<U003C><U0033>" % BLUE HEART
> +<U0001F493> "<U003C><U0033>" % BEATING HEART
> +<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART
> +<U0001F496> "<U003C><U0033>" % SPARKLING HEART
> +<U0001F497> "<U003C><U0033>" % GROWING HEART
> +<U0001F49A> "<U003C><U0033>" % GREEN HEART
> +<U0001F49B> "<U003C><U0033>" % YELLOW HEART
> +<U0001F49C> "<U003C><U0033>" % PURPLE HEART
> +<U0001F5A4> "<U003C><U0033>" % BLACK HEART
> +<U0001F9E1> "<U003C><U0033>" % ORANGE HEART
> +<U0001F90D> "<U003C><U0033>" % WHITE HEART
> +<U0001F90E> "<U003C><U0033>" % BROWN HEART
> +<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE
> +<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES
> +<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY
> +<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
> +<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
> +<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
> +<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
> +<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO
> +<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS
> +<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE
> +<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES
> +<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD
> +<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE
> +<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES
> +<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES
> +<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE
> +<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE
> +<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE
> +<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE
> +<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT
> +<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE
> +<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE
> +<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE
> +<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE
> +<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS
> +<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES
> +<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES
> +<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE
> +<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
> +<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
> +<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE
> +<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE
> +<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE
> +<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE
> +<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE
> +<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE
> +<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH
> +<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE
> +<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE
> +<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE
> +<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE
> +<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH
> +<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE
> +<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT
> +<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR
> +<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE
> +<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES
> +<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY
> +<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH
> +<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES
> +<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE
> +<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES
> +<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE
> +<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE
> +<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE
> +
> +translit_end
> +
> +END LC_CTYPE
> diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
> index 72f66220b7..57412ae565 100644
> --- a/localedata/locales/translit_neutral
> +++ b/localedata/locales/translit_neutral
> @@ -17,6 +17,7 @@ translit_start
>  include "translit_circle";""
>  include "translit_cjk_compat";""
>  include "translit_compat";""
> +include "translit_emojis";""
>  include "translit_font";""
>  include "translit_fraction";""
>  include "translit_narrow";""
> diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
> new file mode 100644
> index 0000000000..89a32074d5
> --- /dev/null
> +++ b/localedata/tst-iconv-emojis-trans.c
> @@ -0,0 +1,124 @@
> +/* Test some emoji transliterations
> +
> +   Copyright (C) 2019-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <iconv.h>
> +#include <locale.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> +  iconv_t cd;
> +
> +  const int num_emojis = 70;
> +
> +  const char str[] = "\u2661 \u2665 \u2764 \U0001F499 "
> +                     "\U0001F493 \U0001F494 \U0001F496 "
> +                     "\U0001F497 \U0001F49A \U0001F49B "
> +                     "\U0001F49C \U0001F5A4 \U0001F9E1 "
> +                     "\U0001F90D \U0001F90E \U0001F600 "
> +                     "\U0001F601 \U0001F602 \U0001F603 "
> +                     "\U0001F604 \U0001F605 \U0001F606 "
> +                     "\U0001F607 \U0001F608 \U0001F609 "
> +                     "\U0001F60A \U0001F60B \U0001F60C "
> +                     "\U0001F60D \U0001F60E \U0001F60F "
> +                     "\U0001F610 \U0001F611 \U0001F612 "
> +                     "\U0001F613 \U0001F614 \U0001F615 "
> +                     "\U0001F616 \U0001F617 \U0001F618 "
> +                     "\U0001F619 \U0001F61A \U0001F61B "
> +                     "\U0001F61C \U0001F61D \U0001F61E "
> +                     "\U0001F61F \U0001F620 \U0001F621 "
> +                     "\U0001F622 \U0001F623 \U0001F626 "
> +                     "\U0001F627 \U0001F628 \U0001F629 "
> +                     "\U0001F62D \U0001F62E \U0001F62F "
> +                     "\U0001F630 \U0001F631 \U0001F632 "
> +                     "\U0001F638 \U0001F639 \U0001F63A "
> +                     "\U0001F63B \U0001F63C \U0001F63D "
> +                     "\U0001F641 \U0001F642 \U0001F643";
> +
> +  const char expected[] = "<3 <3 <3 <3 <3 "
> +                          "</3 <3 <3 <3 <3 "
> +                          "<3 <3 <3 <3 <3 "
> +                          ":-D :-D :'D :-D :-D "
> +                          ":-D :-D O:-) >:) ;-) "
> +                          ":-) :-P :-) :-* B-) "
> +                          ";-) :-| :-| :-| :'-| "
> +                          ":-| :-/ :-S :-* :-* "
> +                          ":-* :-* :-P ;-P X-P "
> +                          ":-( :-( >:-( :-( :'-( "
> +                          "X-( :-O :-O :-O :-O "
> +                          ":\"-( :-O :-O :'-O :-O "
> +                          ":-O :-3 :'-3 :-3 :-3 "
> +                          ";-3 :-3 :-( :-) (-:";
> +
> +  char *inptr = (char *) str;
> +  size_t inlen = strlen (str) + 1;
> +  char outbuf[500];
> +  char *outptr = outbuf;
> +  size_t outlen = sizeof (outbuf);
> +  int result = 0;
> +  size_t n;
> +
> +  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
> +    FAIL_EXIT1 ("setlocale failed");
> +
> +  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
> +  if (cd == (iconv_t) -1)
> +    FAIL_EXIT1 ("iconv_open failed");
> +
> +  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
> +  if (n != num_emojis)
> +    {
> +      if (n == (size_t) -1)
> +        printf ("iconv() returned error: %m\n");
> +      else
> +        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
> +      result = 1;
> +    }
> +  if (inlen != 0)
> +    {
> +      puts ("not all input consumed");
> +      result = 1;
> +    }
> +  else if (inptr - str != strlen (str) + 1)
> +    {
> +      printf ("inptr wrong, advanced by %td\n", inptr - str);
> +      result = 1;
> +    }
> +  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
> +    {
> +      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
> +              (int) (sizeof (outbuf) - outlen), outbuf, expected);
> +      result = 1;
> +    }
> +  else if (outlen != sizeof (outbuf) - sizeof (expected))
> +    {
> +      printf ("outlen wrong: %zd, expected %zd\n", outlen,
> +              sizeof (outbuf) - sizeof (expected));
> +      result = 1;
> +    }
> +  else
> +    printf ("output is \"%s\" which is OK\n", outbuf);
> +
> +  return result;
> +}
> +
> +#include <support/test-driver.c>



-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
  2023-08-08  7:07   ` Colin Leroy-Mira
@ 2023-08-08  9:20   ` Florian Weimer
  2023-08-08 10:02   ` Colin Leroy-Mira
  2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
  3 siblings, 0 replies; 18+ messages in thread
From: Florian Weimer @ 2023-08-08  9:20 UTC (permalink / raw)
  To: Colin Leroy-Mira via Libc-alpha; +Cc: Colin Leroy-Mira

* Colin Leroy-Mira via Libc-alpha:

> +♡ "/<3" % WHITE HEART SUIT
> +♥ "/<3" % BLACK HEART SUIT
> +❤ "/<3" % HEAVY BLACK HEART
> +💙 "/<3" % BLUE HEART
> +💓 "/<3" % BEATING HEART
> +💔 "/<//3" % BROKEN HEART
> +💖 "/<3" % SPARKLING HEART
> +💗 "/<3" % GROWING HEART
> +💚 "/<3" % GREEN HEART
> +💛 "/<3" % YELLOW HEART
> +💜 "/<3" % PURPLE HEART
> +🖤 "/<3" % BLACK HEART
> +🧡 "/<3" % ORANGE HEART
> +🤍 "/<3" % WHITE HEART
> +🤎 "/<3" % BROWN HEART
> +😀 ":-D" % GRINNING FACE
> +😁 ":-D" % GRINNING FACE WITH SMILING EYES
> +😂 ":'D" % FACE WITH TEARS OF JOY
> +😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
> +😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
> +😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
> +😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
> +😇 "O:-)" % SMILING FACE WITH HALO
> +😈 "/>:)" % SMILING FACE WITH HORNS
> +😉 ";-)" % WINKING FACE
> +😊 ":-)" % SMILING FACE WITH SMILING EYES
> +😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
> +😌 ":-)" % RELIEVED FACE
> +😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
> +😎 "B-)" % SMILING FACE WITH SUNGLASSES
> +😏 ";-)" % SMIRKING FACE
> +😐 ":-|" % NEUTRAL FACE
> +😑 ":-|" % EXPRESSIONLESS FACE
> +😒 ":-|" % UNAMUSED FACE
> +😓 ":'-|" % FACE WITH COLD SWEAT
> +😔 ":-|" % PENSIVE FACE
> +😕 ":-//" % CONFUSED FACE
> +😖 ":-S" % CONFOUNDED FACE
> +😗 ":-*" % KISSING FACE
> +😘 ":-*" % FACE THROWING A KISS
> +😙 ":-*" % KISSING FACE WITH SMILING EYES
> +😚 ":-*" % KISSING FACE WITH CLOSED EYES
> +😛 ":-P" % FACE WITH STUCK-OUT TONGUE
> +😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
> +😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
> +😞 ":-(" % DISAPPOINTED FACE
> +😟 ":-(" % WORRIED FACE
> +😠 "/>:-(" % ANGRY FACE
> +😡 ":-(" % POUTING FACE
> +😢 ":'-(" % CRYING FACE
> +😣 "X-(" % PERSEVERING FACE
> +😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
> +😧 ":-O" % ANGUISHED FACE
> +😨 ":-O" % FEARFUL FACE
> +😩 ":-O" % WEARY FACE
> +😭 ":<U0022>-(" % LOUDLY CRYING FACE
> +😮 ":-O" % FACE WITH OPEN MOUTH
> +😯 ":-O" % HUSHED FACE
> +😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
> +😱 ":-O" % FACE SCREAMING IN FEAR
> +😲 ":-O" % ASTONISHED FACE
> +😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
> +😹 ":'-3" % CAT FACE WITH TEARS OF JOY
> +😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
> +😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
> +😼 ";-3" % CAT FACE WITH WRY SMILE
> +😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
> +🙁 ":-(" % SLIGHTLY FROWNING FACE
> +🙂 ":-)" % SLIGHTLY SMILING FACE
> +🙃 "(-:" % UPSIDE-DOWN FACE

Nowadays, you can use UTF-8 directly.  It makes review much easier.  The
extra / above are escape characters.

The transliterations seem mostly reasonable to me.

Thanks,
Florian


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
  2023-08-08  7:07   ` Colin Leroy-Mira
  2023-08-08  9:20   ` Florian Weimer
@ 2023-08-08 10:02   ` Colin Leroy-Mira
  2023-08-08 11:50     ` Florian Weimer
  2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
  3 siblings, 1 reply; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-08 10:02 UTC (permalink / raw)
  To: Libc-alpha

August 8, 2023 at 11:20 AM, "Florian Weimer" <fweimer@redhat.com> wrote:

Hi,

> Nowadays, you can use UTF-8 directly. It makes review much easier. The
> extra / above are escape characters.

Can I? I have seen no example of this in the code, and when I tried, the patch
did not reach Patchwork, and Adhemerval suggested I resend it without UTF-8.

> The transliterations seem mostly reasonable to me.

Thanks!
-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-08 10:02   ` Colin Leroy-Mira
@ 2023-08-08 11:50     ` Florian Weimer
  0 siblings, 0 replies; 18+ messages in thread
From: Florian Weimer @ 2023-08-08 11:50 UTC (permalink / raw)
  To: Colin Leroy-Mira via Libc-alpha; +Cc: Colin Leroy-Mira

* Colin Leroy-Mira via Libc-alpha:

>> Nowadays, you can use UTF-8 directly. It makes review much easier. The
>> extra / above are escape characters.
>
> Can I? I have seen no example of this in the code, and when I tried,
> the patch did not reach Patchwork, and Adhemerval suggested I resend
> it without UTF-8.

Patchwork is optional.  As long as Mailman distributes it, we should be
fine.  Patchwork bugs can be fixed (although whether this is a good use
of our time is … debatable).

Thanks,
Florian


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v3] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
                     ` (2 preceding siblings ...)
  2023-08-08 10:02   ` Colin Leroy-Mira
@ 2023-08-08 13:52   ` Colin Leroy-Mira
  2023-08-16  8:33     ` [PING] " Colin Leroy-Mira
                       ` (2 more replies)
  3 siblings, 3 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-08 13:52 UTC (permalink / raw)
  To: libc-alpha; +Cc: Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
v3: Directly use UTF-8 in translit data and test, for legibility
v2: Fix a wrong smiley, add unit test
 localedata/Makefile                 |   3 +
 localedata/locales/translit_emojis  |  90 ++++++++++++++++++++++
 localedata/locales/translit_neutral |   1 +
 localedata/tst-iconv-emojis-trans.c | 115 ++++++++++++++++++++++++++++
 4 files changed, 209 insertions(+)
 create mode 100644 localedata/locales/translit_emojis
 create mode 100644 localedata/tst-iconv-emojis-trans.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..5b6d10e33f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -164,6 +164,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +321,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..6bcaf7a02e
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,90 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% Transliterations of emojis to ASCII smileys.
+
+LC_CTYPE
+
+translit_start
+
+♡ "/<3" % WHITE HEART SUIT
+♥ "/<3" % BLACK HEART SUIT
+❤ "/<3" % HEAVY BLACK HEART
+💙 "/<3" % BLUE HEART
+💓 "/<3" % BEATING HEART
+💔 "/<//3" % BROKEN HEART
+💖 "/<3" % SPARKLING HEART
+💗 "/<3" % GROWING HEART
+💚 "/<3" % GREEN HEART
+💛 "/<3" % YELLOW HEART
+💜 "/<3" % PURPLE HEART
+🖤 "/<3" % BLACK HEART
+🧡 "/<3" % ORANGE HEART
+🤍 "/<3" % WHITE HEART
+🤎 "/<3" % BROWN HEART
+😀 ":-D" % GRINNING FACE
+😁 ":-D" % GRINNING FACE WITH SMILING EYES
+😂 ":'D" % FACE WITH TEARS OF JOY
+😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+😇 "O:-)" % SMILING FACE WITH HALO
+😈 "/>:)" % SMILING FACE WITH HORNS
+😉 ";-)" % WINKING FACE
+😊 ":-)" % SMILING FACE WITH SMILING EYES
+😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
+😌 ":-)" % RELIEVED FACE
+😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
+😎 "B-)" % SMILING FACE WITH SUNGLASSES
+😏 ";-)" % SMIRKING FACE
+😐 ":-|" % NEUTRAL FACE
+😑 ":-|" % EXPRESSIONLESS FACE
+😒 ":-|" % UNAMUSED FACE
+😓 ":'-|" % FACE WITH COLD SWEAT
+😔 ":-|" % PENSIVE FACE
+😕 ":-//" % CONFUSED FACE
+😖 ":-S" % CONFOUNDED FACE
+😗 ":-*" % KISSING FACE
+😘 ":-*" % FACE THROWING A KISS
+😙 ":-*" % KISSING FACE WITH SMILING EYES
+😚 ":-*" % KISSING FACE WITH CLOSED EYES
+😛 ":-P" % FACE WITH STUCK-OUT TONGUE
+😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+😞 ":-(" % DISAPPOINTED FACE
+😟 ":-(" % WORRIED FACE
+😠 "/>:-(" % ANGRY FACE
+😡 ":-(" % POUTING FACE
+😢 ":'-(" % CRYING FACE
+😣 "X-(" % PERSEVERING FACE
+😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
+😧 ":-O" % ANGUISHED FACE
+😨 ":-O" % FEARFUL FACE
+😩 ":-O" % WEARY FACE
+😭 ":<U0022>-(" % LOUDLY CRYING FACE
+😮 ":-O" % FACE WITH OPEN MOUTH
+😯 ":-O" % HUSHED FACE
+😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
+😱 ":-O" % FACE SCREAMING IN FEAR
+😲 ":-O" % ASTONISHED FACE
+😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
+😹 ":'-3" % CAT FACE WITH TEARS OF JOY
+😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
+😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
+😼 ";-3" % CAT FACE WITH WRY SMILE
+😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
+🙁 ":-(" % SLIGHTLY FROWNING FACE
+🙂 ":-)" % SLIGHTLY SMILING FACE
+🙃 "(-:" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..dda9d3fd5c
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,115 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  const int num_emojis = 70;
+
+  const char str[] = "♡ ♥ ❤ 💙 💓 "
+                     "💔 💖 💗 💚 💛 "
+                     "💜 🖤 🧡 🤍 🤎 "
+                     "😀 😁 😂 😃 😄 "
+                     "😅 😆 😇 😈 😉 "
+                     "😊 😋 😌 😍 😎 "
+                     "😏 😐 😑 😒 😓 "
+                     "😔 😕 😖 😗 😘 "
+                     "😙 😚 😛 😜 😝 "
+                     "😞 😟 😠 😡 😢 "
+                     "😣 😦 😧 😨 😩 "
+                     "😭 😮 😯 😰 😱 "
+                     "😲 😸 😹 😺 😻 "
+                     "😼 😽 🙁 🙂 🙃";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PING] [PATCH v3] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
@ 2023-08-16  8:33     ` Colin Leroy-Mira
  2023-08-23 17:42     ` [ping] " Colin Leroy-Mira
  2023-08-28 13:10     ` Florian Weimer
  2 siblings, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-16  8:33 UTC (permalink / raw)
  To: libc-alpha

On 08 August 2023 at 15h52, Colin Leroy-Mira wrote:

Hi, 
ping :)
Thanks in advance,
Colin

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
> 
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>
> ---
> v3: Directly use UTF-8 in translit data and test, for legibility
> v2: Fix a wrong smiley, add unit test
>  localedata/Makefile                 |   3 +
>  localedata/locales/translit_emojis  |  90 ++++++++++++++++++++++
>  localedata/locales/translit_neutral |   1 +
>  localedata/tst-iconv-emojis-trans.c | 115 ++++++++++++++++++++++++++++
>  4 files changed, 209 insertions(+)
>  create mode 100644 localedata/locales/translit_emojis
>  create mode 100644 localedata/tst-iconv-emojis-trans.c
> 
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 3619b6d47e..5b6d10e33f 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -164,6 +164,7 @@ tests = \
>    bug-usesetlocale \
>    tst-c-utf8-consistency \
>    tst-digits \
> +  tst-iconv-emojis-trans \
>    tst-iconv-math-trans \
>    tst-leaks \
>    tst-mbswcs1 \
> @@ -320,6 +321,8 @@ LOCALES := \
>  
>  include ../gen-locales.mk
>  
> +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
> +
>  $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
>  endif
>  
> diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
> new file mode 100644
> index 0000000000..6bcaf7a02e
> --- /dev/null
> +++ b/localedata/locales/translit_emojis
> @@ -0,0 +1,90 @@
> +escape_char /
> +comment_char %
> +
> +% This file is part of the GNU C Library and contains locale data.
> +% The Free Software Foundation does not claim any copyright interest
> +% in the locale data contained in this file.  The foregoing does not
> +% affect the license of the GNU C Library as a whole.  It does not
> +% exempt you from the conditions of the license if your use would
> +% otherwise be governed by that license.
> +
> +% Transliterations of emojis to ASCII smileys.
> +
> +LC_CTYPE
> +
> +translit_start
> +
> +♡ "/<3" % WHITE HEART SUIT
> +♥ "/<3" % BLACK HEART SUIT
> +❤ "/<3" % HEAVY BLACK HEART
> +💙 "/<3" % BLUE HEART
> +💓 "/<3" % BEATING HEART
> +💔 "/<//3" % BROKEN HEART
> +💖 "/<3" % SPARKLING HEART
> +💗 "/<3" % GROWING HEART
> +💚 "/<3" % GREEN HEART
> +💛 "/<3" % YELLOW HEART
> +💜 "/<3" % PURPLE HEART
> +🖤 "/<3" % BLACK HEART
> +🧡 "/<3" % ORANGE HEART
> +🤍 "/<3" % WHITE HEART
> +🤎 "/<3" % BROWN HEART
> +😀 ":-D" % GRINNING FACE
> +😁 ":-D" % GRINNING FACE WITH SMILING EYES
> +😂 ":'D" % FACE WITH TEARS OF JOY
> +😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
> +😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
> +😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
> +😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
> +😇 "O:-)" % SMILING FACE WITH HALO
> +😈 "/>:)" % SMILING FACE WITH HORNS
> +😉 ";-)" % WINKING FACE
> +😊 ":-)" % SMILING FACE WITH SMILING EYES
> +😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
> +😌 ":-)" % RELIEVED FACE
> +😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
> +😎 "B-)" % SMILING FACE WITH SUNGLASSES
> +😏 ";-)" % SMIRKING FACE
> +😐 ":-|" % NEUTRAL FACE
> +😑 ":-|" % EXPRESSIONLESS FACE
> +😒 ":-|" % UNAMUSED FACE
> +😓 ":'-|" % FACE WITH COLD SWEAT
> +😔 ":-|" % PENSIVE FACE
> +😕 ":-//" % CONFUSED FACE
> +😖 ":-S" % CONFOUNDED FACE
> +😗 ":-*" % KISSING FACE
> +😘 ":-*" % FACE THROWING A KISS
> +😙 ":-*" % KISSING FACE WITH SMILING EYES
> +😚 ":-*" % KISSING FACE WITH CLOSED EYES
> +😛 ":-P" % FACE WITH STUCK-OUT TONGUE
> +😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
> +😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
> +😞 ":-(" % DISAPPOINTED FACE
> +😟 ":-(" % WORRIED FACE
> +😠 "/>:-(" % ANGRY FACE
> +😡 ":-(" % POUTING FACE
> +😢 ":'-(" % CRYING FACE
> +😣 "X-(" % PERSEVERING FACE
> +😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
> +😧 ":-O" % ANGUISHED FACE
> +😨 ":-O" % FEARFUL FACE
> +😩 ":-O" % WEARY FACE
> +😭 ":<U0022>-(" % LOUDLY CRYING FACE
> +😮 ":-O" % FACE WITH OPEN MOUTH
> +😯 ":-O" % HUSHED FACE
> +😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
> +😱 ":-O" % FACE SCREAMING IN FEAR
> +😲 ":-O" % ASTONISHED FACE
> +😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
> +😹 ":'-3" % CAT FACE WITH TEARS OF JOY
> +😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
> +😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
> +😼 ";-3" % CAT FACE WITH WRY SMILE
> +😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
> +🙁 ":-(" % SLIGHTLY FROWNING FACE
> +🙂 ":-)" % SLIGHTLY SMILING FACE
> +🙃 "(-:" % UPSIDE-DOWN FACE
> +
> +translit_end
> +
> +END LC_CTYPE
> diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
> index 72f66220b7..57412ae565 100644
> --- a/localedata/locales/translit_neutral
> +++ b/localedata/locales/translit_neutral
> @@ -17,6 +17,7 @@ translit_start
>  include "translit_circle";""
>  include "translit_cjk_compat";""
>  include "translit_compat";""
> +include "translit_emojis";""
>  include "translit_font";""
>  include "translit_fraction";""
>  include "translit_narrow";""
> diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
> new file mode 100644
> index 0000000000..dda9d3fd5c
> --- /dev/null
> +++ b/localedata/tst-iconv-emojis-trans.c
> @@ -0,0 +1,115 @@
> +/* Test some emoji transliterations
> +
> +   Copyright (C) 2019-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <iconv.h>
> +#include <locale.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> +  iconv_t cd;
> +
> +  const int num_emojis = 70;
> +
> +  const char str[] = "♡ ♥ ❤ 💙 💓 "
> +                     "💔 💖 💗 💚 💛 "
> +                     "💜 🖤 🧡 🤍 🤎 "
> +                     "😀 😁 😂 😃 😄 "
> +                     "😅 😆 😇 😈 😉 "
> +                     "😊 😋 😌 😍 😎 "
> +                     "😏 😐 😑 😒 😓 "
> +                     "😔 😕 😖 😗 😘 "
> +                     "😙 😚 😛 😜 😝 "
> +                     "😞 😟 😠 😡 😢 "
> +                     "😣 😦 😧 😨 😩 "
> +                     "😭 😮 😯 😰 😱 "
> +                     "😲 😸 😹 😺 😻 "
> +                     "😼 😽 🙁 🙂 🙃";
> +
> +  const char expected[] = "<3 <3 <3 <3 <3 "
> +                          "</3 <3 <3 <3 <3 "
> +                          "<3 <3 <3 <3 <3 "
> +                          ":-D :-D :'D :-D :-D "
> +                          ":-D :-D O:-) >:) ;-) "
> +                          ":-) :-P :-) :-* B-) "
> +                          ";-) :-| :-| :-| :'-| "
> +                          ":-| :-/ :-S :-* :-* "
> +                          ":-* :-* :-P ;-P X-P "
> +                          ":-( :-( >:-( :-( :'-( "
> +                          "X-( :-O :-O :-O :-O "
> +                          ":\"-( :-O :-O :'-O :-O "
> +                          ":-O :-3 :'-3 :-3 :-3 "
> +                          ";-3 :-3 :-( :-) (-:";
> +
> +  char *inptr = (char *) str;
> +  size_t inlen = strlen (str) + 1;
> +  char outbuf[500];
> +  char *outptr = outbuf;
> +  size_t outlen = sizeof (outbuf);
> +  int result = 0;
> +  size_t n;
> +
> +  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
> +    FAIL_EXIT1 ("setlocale failed");
> +
> +  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
> +  if (cd == (iconv_t) -1)
> +    FAIL_EXIT1 ("iconv_open failed");
> +
> +  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
> +  if (n != num_emojis)
> +    {
> +      if (n == (size_t) -1)
> +        printf ("iconv() returned error: %m\n");
> +      else
> +        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
> +      result = 1;
> +    }
> +  if (inlen != 0)
> +    {
> +      puts ("not all input consumed");
> +      result = 1;
> +    }
> +  else if (inptr - str != strlen (str) + 1)
> +    {
> +      printf ("inptr wrong, advanced by %td\n", inptr - str);
> +      result = 1;
> +    }
> +  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
> +    {
> +      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
> +              (int) (sizeof (outbuf) - outlen), outbuf, expected);
> +      result = 1;
> +    }
> +  else if (outlen != sizeof (outbuf) - sizeof (expected))
> +    {
> +      printf ("outlen wrong: %zd, expected %zd\n", outlen,
> +              sizeof (outbuf) - sizeof (expected));
> +      result = 1;
> +    }
> +  else
> +    printf ("output is \"%s\" which is OK\n", outbuf);
> +
> +  return result;
> +}
> +
> +#include <support/test-driver.c>



-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [ping] [PATCH v3] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
  2023-08-16  8:33     ` [PING] " Colin Leroy-Mira
@ 2023-08-23 17:42     ` Colin Leroy-Mira
  2023-08-28 13:10     ` Florian Weimer
  2 siblings, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-23 17:42 UTC (permalink / raw)
  To: libc-alpha

Hi, 

Ping again! It's a simple patch ;)

Thanks!
-- 
Colin

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
> 
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>
> ---
> v3: Directly use UTF-8 in translit data and test, for legibility
> v2: Fix a wrong smiley, add unit test
>  localedata/Makefile                 |   3 +
>  localedata/locales/translit_emojis  |  90 ++++++++++++++++++++++
>  localedata/locales/translit_neutral |   1 +
>  localedata/tst-iconv-emojis-trans.c | 115 ++++++++++++++++++++++++++++
>  4 files changed, 209 insertions(+)
>  create mode 100644 localedata/locales/translit_emojis
>  create mode 100644 localedata/tst-iconv-emojis-trans.c
> 
> diff --git a/localedata/Makefile b/localedata/Makefile
> index 3619b6d47e..5b6d10e33f 100644
> --- a/localedata/Makefile
> +++ b/localedata/Makefile
> @@ -164,6 +164,7 @@ tests = \
>    bug-usesetlocale \
>    tst-c-utf8-consistency \
>    tst-digits \
> +  tst-iconv-emojis-trans \
>    tst-iconv-math-trans \
>    tst-leaks \
>    tst-mbswcs1 \
> @@ -320,6 +321,8 @@ LOCALES := \
>  
>  include ../gen-locales.mk
>  
> +$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
> +
>  $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
>  endif
>  
> diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
> new file mode 100644
> index 0000000000..6bcaf7a02e
> --- /dev/null
> +++ b/localedata/locales/translit_emojis
> @@ -0,0 +1,90 @@
> +escape_char /
> +comment_char %
> +
> +% This file is part of the GNU C Library and contains locale data.
> +% The Free Software Foundation does not claim any copyright interest
> +% in the locale data contained in this file.  The foregoing does not
> +% affect the license of the GNU C Library as a whole.  It does not
> +% exempt you from the conditions of the license if your use would
> +% otherwise be governed by that license.
> +
> +% Transliterations of emojis to ASCII smileys.
> +
> +LC_CTYPE
> +
> +translit_start
> +
> +♡ "/<3" % WHITE HEART SUIT
> +♥ "/<3" % BLACK HEART SUIT
> +❤ "/<3" % HEAVY BLACK HEART
> +💙 "/<3" % BLUE HEART
> +💓 "/<3" % BEATING HEART
> +💔 "/<//3" % BROKEN HEART
> +💖 "/<3" % SPARKLING HEART
> +💗 "/<3" % GROWING HEART
> +💚 "/<3" % GREEN HEART
> +💛 "/<3" % YELLOW HEART
> +💜 "/<3" % PURPLE HEART
> +🖤 "/<3" % BLACK HEART
> +🧡 "/<3" % ORANGE HEART
> +🤍 "/<3" % WHITE HEART
> +🤎 "/<3" % BROWN HEART
> +😀 ":-D" % GRINNING FACE
> +😁 ":-D" % GRINNING FACE WITH SMILING EYES
> +😂 ":'D" % FACE WITH TEARS OF JOY
> +😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
> +😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
> +😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
> +😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
> +😇 "O:-)" % SMILING FACE WITH HALO
> +😈 "/>:)" % SMILING FACE WITH HORNS
> +😉 ";-)" % WINKING FACE
> +😊 ":-)" % SMILING FACE WITH SMILING EYES
> +😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
> +😌 ":-)" % RELIEVED FACE
> +😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
> +😎 "B-)" % SMILING FACE WITH SUNGLASSES
> +😏 ";-)" % SMIRKING FACE
> +😐 ":-|" % NEUTRAL FACE
> +😑 ":-|" % EXPRESSIONLESS FACE
> +😒 ":-|" % UNAMUSED FACE
> +😓 ":'-|" % FACE WITH COLD SWEAT
> +😔 ":-|" % PENSIVE FACE
> +😕 ":-//" % CONFUSED FACE
> +😖 ":-S" % CONFOUNDED FACE
> +😗 ":-*" % KISSING FACE
> +😘 ":-*" % FACE THROWING A KISS
> +😙 ":-*" % KISSING FACE WITH SMILING EYES
> +😚 ":-*" % KISSING FACE WITH CLOSED EYES
> +😛 ":-P" % FACE WITH STUCK-OUT TONGUE
> +😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
> +😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
> +😞 ":-(" % DISAPPOINTED FACE
> +😟 ":-(" % WORRIED FACE
> +😠 "/>:-(" % ANGRY FACE
> +😡 ":-(" % POUTING FACE
> +😢 ":'-(" % CRYING FACE
> +😣 "X-(" % PERSEVERING FACE
> +😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
> +😧 ":-O" % ANGUISHED FACE
> +😨 ":-O" % FEARFUL FACE
> +😩 ":-O" % WEARY FACE
> +😭 ":<U0022>-(" % LOUDLY CRYING FACE
> +😮 ":-O" % FACE WITH OPEN MOUTH
> +😯 ":-O" % HUSHED FACE
> +😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
> +😱 ":-O" % FACE SCREAMING IN FEAR
> +😲 ":-O" % ASTONISHED FACE
> +😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
> +😹 ":'-3" % CAT FACE WITH TEARS OF JOY
> +😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
> +😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
> +😼 ";-3" % CAT FACE WITH WRY SMILE
> +😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
> +🙁 ":-(" % SLIGHTLY FROWNING FACE
> +🙂 ":-)" % SLIGHTLY SMILING FACE
> +🙃 "(-:" % UPSIDE-DOWN FACE
> +
> +translit_end
> +
> +END LC_CTYPE
> diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
> index 72f66220b7..57412ae565 100644
> --- a/localedata/locales/translit_neutral
> +++ b/localedata/locales/translit_neutral
> @@ -17,6 +17,7 @@ translit_start
>  include "translit_circle";""
>  include "translit_cjk_compat";""
>  include "translit_compat";""
> +include "translit_emojis";""
>  include "translit_font";""
>  include "translit_fraction";""
>  include "translit_narrow";""
> diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
> new file mode 100644
> index 0000000000..dda9d3fd5c
> --- /dev/null
> +++ b/localedata/tst-iconv-emojis-trans.c
> @@ -0,0 +1,115 @@
> +/* Test some emoji transliterations
> +
> +   Copyright (C) 2019-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <iconv.h>
> +#include <locale.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <support/check.h>
> +
> +static int
> +do_test (void)
> +{
> +  iconv_t cd;
> +
> +  const int num_emojis = 70;
> +
> +  const char str[] = "♡ ♥ ❤ 💙 💓 "
> +                     "💔 💖 💗 💚 💛 "
> +                     "💜 🖤 🧡 🤍 🤎 "
> +                     "😀 😁 😂 😃 😄 "
> +                     "😅 😆 😇 😈 😉 "
> +                     "😊 😋 😌 😍 😎 "
> +                     "😏 😐 😑 😒 😓 "
> +                     "😔 😕 😖 😗 😘 "
> +                     "😙 😚 😛 😜 😝 "
> +                     "😞 😟 😠 😡 😢 "
> +                     "😣 😦 😧 😨 😩 "
> +                     "😭 😮 😯 😰 😱 "
> +                     "😲 😸 😹 😺 😻 "
> +                     "😼 😽 🙁 🙂 🙃";
> +
> +  const char expected[] = "<3 <3 <3 <3 <3 "
> +                          "</3 <3 <3 <3 <3 "
> +                          "<3 <3 <3 <3 <3 "
> +                          ":-D :-D :'D :-D :-D "
> +                          ":-D :-D O:-) >:) ;-) "
> +                          ":-) :-P :-) :-* B-) "
> +                          ";-) :-| :-| :-| :'-| "
> +                          ":-| :-/ :-S :-* :-* "
> +                          ":-* :-* :-P ;-P X-P "
> +                          ":-( :-( >:-( :-( :'-( "
> +                          "X-( :-O :-O :-O :-O "
> +                          ":\"-( :-O :-O :'-O :-O "
> +                          ":-O :-3 :'-3 :-3 :-3 "
> +                          ";-3 :-3 :-( :-) (-:";
> +
> +  char *inptr = (char *) str;
> +  size_t inlen = strlen (str) + 1;
> +  char outbuf[500];
> +  char *outptr = outbuf;
> +  size_t outlen = sizeof (outbuf);
> +  int result = 0;
> +  size_t n;
> +
> +  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
> +    FAIL_EXIT1 ("setlocale failed");
> +
> +  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
> +  if (cd == (iconv_t) -1)
> +    FAIL_EXIT1 ("iconv_open failed");
> +
> +  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
> +  if (n != num_emojis)
> +    {
> +      if (n == (size_t) -1)
> +        printf ("iconv() returned error: %m\n");
> +      else
> +        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
> +      result = 1;
> +    }
> +  if (inlen != 0)
> +    {
> +      puts ("not all input consumed");
> +      result = 1;
> +    }
> +  else if (inptr - str != strlen (str) + 1)
> +    {
> +      printf ("inptr wrong, advanced by %td\n", inptr - str);
> +      result = 1;
> +    }
> +  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
> +    {
> +      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
> +              (int) (sizeof (outbuf) - outlen), outbuf, expected);
> +      result = 1;
> +    }
> +  else if (outlen != sizeof (outbuf) - sizeof (expected))
> +    {
> +      printf ("outlen wrong: %zd, expected %zd\n", outlen,
> +              sizeof (outbuf) - sizeof (expected));
> +      result = 1;
> +    }
> +  else
> +    printf ("output is \"%s\" which is OK\n", outbuf);
> +
> +  return result;
> +}
> +
> +#include <support/test-driver.c>



-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
  2023-08-16  8:33     ` [PING] " Colin Leroy-Mira
  2023-08-23 17:42     ` [ping] " Colin Leroy-Mira
@ 2023-08-28 13:10     ` Florian Weimer
  2023-08-29  6:02       ` [PATCH v4] " Colin Leroy-Mira
  2 siblings, 1 reply; 18+ messages in thread
From: Florian Weimer @ 2023-08-28 13:10 UTC (permalink / raw)
  To: Colin Leroy-Mira via Libc-alpha; +Cc: Colin Leroy-Mira

* Colin Leroy-Mira via Libc-alpha:

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
>
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>

Looks good overall.

As you do not have copyright assignment and contribute under DCO,
please post a v4 with

+ Copyright The GNU Toolchain Authors.

added in localedata/Makefile and localedata/tst-iconv-emojis-trans.c.
(I assume yoy copied the latter from another test file, which is why
copyright starts in 2019).

Thanks,
Florian


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v4] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-28 13:10     ` Florian Weimer
@ 2023-08-29  6:02       ` Colin Leroy-Mira
  2023-08-29  7:26         ` Florian Weimer
  2023-08-29  7:50         ` Colin Leroy-Mira
  0 siblings, 2 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-29  6:02 UTC (permalink / raw)
  To: libc-alpha; +Cc: fweimer, Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
v4: Update copyright
v3: Directly use UTF-8 in translit data and test, for legibility
v2: Fix a wrong smiley, add unit test
 localedata/Makefile                 |   4 +
 localedata/locales/translit_emojis  |  91 ++++++++++++++++++++++
 localedata/locales/translit_neutral |   1 +
 localedata/tst-iconv-emojis-trans.c | 117 ++++++++++++++++++++++++++++
 4 files changed, 213 insertions(+)
 create mode 100644 localedata/locales/translit_emojis
 create mode 100644 localedata/tst-iconv-emojis-trans.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..dd41db6d8f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -1,4 +1,5 @@
 # Copyright (C) 1996-2023 Free Software Foundation, Inc.
+# Copyright The GNU Toolchain Authors.
 # This file is part of the GNU C Library.
 
 # The GNU C Library is free software; you can redistribute it and/or
@@ -164,6 +165,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +322,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..cfb1964afa
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation and the GNU Toolchain Authors do not
+% claim any copyright interest in the locale data contained in this
+% file.  The foregoing does not affect the license of the GNU C
+% Library as a whole.  It does not exempt you from the conditions
+% of the license if your use would otherwise be governed by that
+% license.
+
+% Transliterations of emojis to ASCII smileys.
+
+LC_CTYPE
+
+translit_start
+
+♡ "/<3" % WHITE HEART SUIT
+♥ "/<3" % BLACK HEART SUIT
+❤ "/<3" % HEAVY BLACK HEART
+💙 "/<3" % BLUE HEART
+💓 "/<3" % BEATING HEART
+💔 "/<//3" % BROKEN HEART
+💖 "/<3" % SPARKLING HEART
+💗 "/<3" % GROWING HEART
+💚 "/<3" % GREEN HEART
+💛 "/<3" % YELLOW HEART
+💜 "/<3" % PURPLE HEART
+🖤 "/<3" % BLACK HEART
+🧡 "/<3" % ORANGE HEART
+🤍 "/<3" % WHITE HEART
+🤎 "/<3" % BROWN HEART
+😀 ":-D" % GRINNING FACE
+😁 ":-D" % GRINNING FACE WITH SMILING EYES
+😂 ":'D" % FACE WITH TEARS OF JOY
+😃 ":-D" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+😄 ":-D" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+😅 ":-D" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+😆 ":-D" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+😇 "O:-)" % SMILING FACE WITH HALO
+😈 "/>:)" % SMILING FACE WITH HORNS
+😉 ";-)" % WINKING FACE
+😊 ":-)" % SMILING FACE WITH SMILING EYES
+😋 ":-P" % FACE SAVOURING DELICIOUS FOOD
+😌 ":-)" % RELIEVED FACE
+😍 ":-*" % SMILING FACE WITH HEART-SHAPED EYES
+😎 "B-)" % SMILING FACE WITH SUNGLASSES
+😏 ";-)" % SMIRKING FACE
+😐 ":-|" % NEUTRAL FACE
+😑 ":-|" % EXPRESSIONLESS FACE
+😒 ":-|" % UNAMUSED FACE
+😓 ":'-|" % FACE WITH COLD SWEAT
+😔 ":-|" % PENSIVE FACE
+😕 ":-//" % CONFUSED FACE
+😖 ":-S" % CONFOUNDED FACE
+😗 ":-*" % KISSING FACE
+😘 ":-*" % FACE THROWING A KISS
+😙 ":-*" % KISSING FACE WITH SMILING EYES
+😚 ":-*" % KISSING FACE WITH CLOSED EYES
+😛 ":-P" % FACE WITH STUCK-OUT TONGUE
+😜 ";-P" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+😝 "X-P" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+😞 ":-(" % DISAPPOINTED FACE
+😟 ":-(" % WORRIED FACE
+😠 "/>:-(" % ANGRY FACE
+😡 ":-(" % POUTING FACE
+😢 ":'-(" % CRYING FACE
+😣 "X-(" % PERSEVERING FACE
+😦 ":-O" % FROWNING FACE WITH OPEN MOUTH
+😧 ":-O" % ANGUISHED FACE
+😨 ":-O" % FEARFUL FACE
+😩 ":-O" % WEARY FACE
+😭 ":<U0022>-(" % LOUDLY CRYING FACE
+😮 ":-O" % FACE WITH OPEN MOUTH
+😯 ":-O" % HUSHED FACE
+😰 ":'-O" % FACE WITH OPEN MOUTH AND COLD SWEAT
+😱 ":-O" % FACE SCREAMING IN FEAR
+😲 ":-O" % ASTONISHED FACE
+😸 ":-3" % GRINNING CAT FACE WITH SMILING EYES
+😹 ":'-3" % CAT FACE WITH TEARS OF JOY
+😺 ":-3" % SMILING CAT FACE WITH OPEN MOUTH
+😻 ":-3" % SMILING CAT FACE WITH HEART-SHAPE EYES
+😼 ";-3" % CAT FACE WITH WRY SMILE
+😽 ":-3" % KISSING CAT FACE WITH CLOSED EYES
+🙁 ":-(" % SLIGHTLY FROWNING FACE
+🙂 ":-)" % SLIGHTLY SMILING FACE
+🙃 "(-:" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..de4c20a279
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,117 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   Copyright The GNU Toolchain Authors.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  const int num_emojis = 70;
+
+  const char str[] = "♡ ♥ ❤ 💙 💓 "
+                     "💔 💖 💗 💚 💛 "
+                     "💜 🖤 🧡 🤍 🤎 "
+                     "😀 😁 😂 😃 😄 "
+                     "😅 😆 😇 😈 😉 "
+                     "😊 😋 😌 😍 😎 "
+                     "😏 😐 😑 😒 😓 "
+                     "😔 😕 😖 😗 😘 "
+                     "😙 😚 😛 😜 😝 "
+                     "😞 😟 😠 😡 😢 "
+                     "😣 😦 😧 😨 😩 "
+                     "😭 😮 😯 😰 😱 "
+                     "😲 😸 😹 😺 😻 "
+                     "😼 😽 🙁 🙂 🙃";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>
-- 
2.39.2

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-29  6:02       ` [PATCH v4] " Colin Leroy-Mira
@ 2023-08-29  7:26         ` Florian Weimer
  2023-08-29  7:50         ` Colin Leroy-Mira
  1 sibling, 0 replies; 18+ messages in thread
From: Florian Weimer @ 2023-08-29  7:26 UTC (permalink / raw)
  To: Colin Leroy-Mira; +Cc: libc-alpha

* Colin Leroy-Mira:

> Add common emojis to the translit-able characters (mostly
> faces and hearts), and translit them to old-fashioned
> smileys.
>
> Author: Colin Leroy-Mira <colin@colino.net>
> Signed-off-by: Colin Leroy-Mira <colin@colino.net>

Thanks, applied.

Florian


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v4] localedata: Translit common emojis to smileys [BZ #30649]
  2023-08-29  6:02       ` [PATCH v4] " Colin Leroy-Mira
  2023-08-29  7:26         ` Florian Weimer
@ 2023-08-29  7:50         ` Colin Leroy-Mira
  1 sibling, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-08-29  7:50 UTC (permalink / raw)
  To: Florian Weimer, libc-alpha

Hi Florian,

> Thanks, applied.

Thank you !

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 13:25 ` Colin Leroy-Mira
  2023-07-21 14:04   ` Adhemerval Zanella Netto
@ 2023-07-21 14:20   ` Colin Leroy-Mira
  1 sibling, 0 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-21 14:20 UTC (permalink / raw)
  To: libc-alpha

July 21, 2023 at 4:04 PM, "Adhemerval Zanella Netto" <adhemerval.zanella@linaro.org> wrote:

Hi again,

> I am not sure, but it could this indeed. In any case I would recommend you to
> remove the comments, it is not clear the would be correctly displayed in all
> environments and it can confusing.
> Could you resend another version without the comment?

That was it :) 
Also I've been able to automatically mark the very first send of that
patch "Superseded" using in-reply-to. Sadly I failed setting in-reply-to
in the second sending, so it has not been marked Superseded by this last send.

I hope that's not going to be an annoyance to Patchwork reviewers and 
I'm sorry for the mess-ups! I'll do better as I learn the system.

Thanks,
-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21 13:25 ` Colin Leroy-Mira
@ 2023-07-21 14:04   ` Adhemerval Zanella Netto
  2023-07-21 14:20   ` Colin Leroy-Mira
  1 sibling, 0 replies; 18+ messages in thread
From: Adhemerval Zanella Netto @ 2023-07-21 14:04 UTC (permalink / raw)
  To: Colin Leroy-Mira, libc-alpha



On 21/07/23 10:25, Colin Leroy-Mira via Libc-alpha wrote:
> Hello,
> 
> And sorry for spamming - I've seen that v2 of the patch does not pop in the Patchwork list at https://patchwork.sourceware.org/project/glibc/list/ and thought my patch tag in the subject was wrong, but that's apparently not that.
> 
> Could the raw UTF-8 in localedata/tst-iconv-emojis-trans.c be the problem? 
> 
> Should I remove that commented declaration? I added it because a similar one is included in localedata/tst-iconv-math-trans.c.

I am not sure, but it could this indeed.  In any case I would recommend you to
remove the comments, it is not clear the would be correctly displayed in all
environments and it can confusing.

Could you resend another version without the comment? 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
  2023-07-21  6:52 [PATCH v2] " Colin Leroy-Mira
@ 2023-07-21 13:25 ` Colin Leroy-Mira
  2023-07-21 14:04   ` Adhemerval Zanella Netto
  2023-07-21 14:20   ` Colin Leroy-Mira
  0 siblings, 2 replies; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-21 13:25 UTC (permalink / raw)
  To: libc-alpha

Hello,

And sorry for spamming - I've seen that v2 of the patch does not pop in the Patchwork list at https://patchwork.sourceware.org/project/glibc/list/ and thought my patch tag in the subject was wrong, but that's apparently not that.

Could the raw UTF-8 in localedata/tst-iconv-emojis-trans.c be the problem? 

Should I remove that commented declaration? I added it because a similar one is included in localedata/tst-iconv-math-trans.c.

Thanks in advance!
-- 
Colin

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2] localedata: Translit common emojis to smileys [BZ #30649]
@ 2023-07-21  6:52 Colin Leroy-Mira
  2023-07-21 13:25 ` Colin Leroy-Mira
  0 siblings, 1 reply; 18+ messages in thread
From: Colin Leroy-Mira @ 2023-07-21  6:52 UTC (permalink / raw)
  To: libc-alpha; +Cc: Colin Leroy-Mira

Add common emojis to the translit-able characters (mostly
faces and hearts), and translit them to old-fashioned
smileys.
v2: fix a wrong smiley, add unit test

Author: Colin Leroy-Mira <colin@colino.net>
Signed-off-by: Colin Leroy-Mira <colin@colino.net>
---
 localedata/Makefile                 |   3 +
 localedata/locales/translit_emojis  |  91 ++++++++++++++++++
 localedata/locales/translit_neutral |   1 +
 localedata/tst-iconv-emojis-trans.c | 139 ++++++++++++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 localedata/locales/translit_emojis
 create mode 100644 localedata/tst-iconv-emojis-trans.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 3619b6d47e..5b6d10e33f 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -164,6 +164,7 @@ tests = \
   bug-usesetlocale \
   tst-c-utf8-consistency \
   tst-digits \
+  tst-iconv-emojis-trans \
   tst-iconv-math-trans \
   tst-leaks \
   tst-mbswcs1 \
@@ -320,6 +321,8 @@ LOCALES := \
 
 include ../gen-locales.mk
 
+$(objpfx)tst-iconv-emojis-trans.out: $(gen-locales)
+
 $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
 endif
 
diff --git a/localedata/locales/translit_emojis b/localedata/locales/translit_emojis
new file mode 100644
index 0000000000..260aeedc35
--- /dev/null
+++ b/localedata/locales/translit_emojis
@@ -0,0 +1,91 @@
+escape_char /
+comment_char %
+
+% This file is part of the GNU C Library and contains locale data.
+% The Free Software Foundation does not claim any copyright interest
+% in the locale data contained in this file.  The foregoing does not
+% affect the license of the GNU C Library as a whole.  It does not
+% exempt you from the conditions of the license if your use would
+% otherwise be governed by that license.
+
+% Transliterations of emojis to ASCII smileys.
+% Generated algorithmically.
+
+LC_CTYPE
+
+translit_start
+
+<U2661> "<U003C><U0033>" % WHITE HEART SUIT
+<U2665> "<U003C><U0033>" % BLACK HEART SUIT
+<U2764> "<U003C><U0033>" % HEAVY BLACK HEART
+<U0001F499> "<U003C><U0033>" % BLUE HEART
+<U0001F493> "<U003C><U0033>" % BEATING HEART
+<U0001F494> "<U003C><U002F><U0033>" % BROKEN HEART
+<U0001F496> "<U003C><U0033>" % SPARKLING HEART
+<U0001F497> "<U003C><U0033>" % GROWING HEART
+<U0001F49A> "<U003C><U0033>" % GREEN HEART
+<U0001F49B> "<U003C><U0033>" % YELLOW HEART
+<U0001F49C> "<U003C><U0033>" % PURPLE HEART
+<U0001F5A4> "<U003C><U0033>" % BLACK HEART
+<U0001F9E1> "<U003C><U0033>" % ORANGE HEART
+<U0001F90D> "<U003C><U0033>" % WHITE HEART
+<U0001F90E> "<U003C><U0033>" % BROWN HEART
+<U0001F600> "<U003A><U002D><U0044>" % GRINNING FACE
+<U0001F601> "<U003A><U002D><U0044>" % GRINNING FACE WITH SMILING EYES
+<U0001F602> "<U003A><U0027><U0044>" % FACE WITH TEARS OF JOY
+<U0001F603> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH (C.F. ☺)
+<U0001F604> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND SMILING EYES
+<U0001F605> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F606> "<U003A><U002D><U0044>" % SMILING FACE WITH OPEN MOUTH AND TIGHTLY-CLOSED EYES
+<U0001F607> "<U004F><U003A><U002D><U0029>" % SMILING FACE WITH HALO
+<U0001F608> "<U003E><U003A><U0029>" % SMILING FACE WITH HORNS
+<U0001F609> "<U003B><U002D><U0029>" % WINKING FACE
+<U0001F60A> "<U003A><U002D><U0029>" % SMILING FACE WITH SMILING EYES
+<U0001F60B> "<U003A><U002D><U0050>" % FACE SAVOURING DELICIOUS FOOD
+<U0001F60C> "<U003A><U002D><U0029>" % RELIEVED FACE
+<U0001F60D> "<U003A><U002D><U002A>" % SMILING FACE WITH HEART-SHAPED EYES
+<U0001F60E> "<U0042><U002D><U0029>" % SMILING FACE WITH SUNGLASSES
+<U0001F60F> "<U003B><U002D><U0029>" % SMIRKING FACE
+<U0001F610> "<U003A><U002D><U007C>" % NEUTRAL FACE
+<U0001F611> "<U003A><U002D><U007C>" % EXPRESSIONLESS FACE
+<U0001F612> "<U003A><U002D><U007C>" % UNAMUSED FACE
+<U0001F613> "<U003A><U0027><U002D><U007C>" % FACE WITH COLD SWEAT
+<U0001F614> "<U003A><U002D><U007C>" % PENSIVE FACE
+<U0001F615> "<U003A><U002D><U002F>" % CONFUSED FACE
+<U0001F616> "<U003A><U002D><U0053>" % CONFOUNDED FACE
+<U0001F617> "<U003A><U002D><U002A>" % KISSING FACE
+<U0001F618> "<U003A><U002D><U002A>" % FACE THROWING A KISS
+<U0001F619> "<U003A><U002D><U002A>" % KISSING FACE WITH SMILING EYES
+<U0001F61A> "<U003A><U002D><U002A>" % KISSING FACE WITH CLOSED EYES
+<U0001F61B> "<U003A><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE
+<U0001F61C> "<U003B><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND WINKING EYE
+<U0001F61D> "<U0058><U002D><U0050>" % FACE WITH STUCK-OUT TONGUE AND TIGHTLY-CLOSED EYES
+<U0001F61E> "<U003A><U002D><U0028>" % DISAPPOINTED FACE
+<U0001F61F> "<U003A><U002D><U0028>" % WORRIED FACE
+<U0001F620> "<U003E><U003A><U002D><U0028>" % ANGRY FACE
+<U0001F621> "<U003A><U002D><U0028>" % POUTING FACE
+<U0001F622> "<U003A><U0027><U002D><U0028>" % CRYING FACE
+<U0001F623> "<U0058><U002D><U0028>" % PERSEVERING FACE
+<U0001F626> "<U003A><U002D><U004F>" % FROWNING FACE WITH OPEN MOUTH
+<U0001F627> "<U003A><U002D><U004F>" % ANGUISHED FACE
+<U0001F628> "<U003A><U002D><U004F>" % FEARFUL FACE
+<U0001F629> "<U003A><U002D><U004F>" % WEARY FACE
+<U0001F62D> "<U003A><U0022><U002D><U0028>" % LOUDLY CRYING FACE
+<U0001F62E> "<U003A><U002D><U004F>" % FACE WITH OPEN MOUTH
+<U0001F62F> "<U003A><U002D><U004F>" % HUSHED FACE
+<U0001F630> "<U003A><U0027><U002D><U004F>" % FACE WITH OPEN MOUTH AND COLD SWEAT
+<U0001F631> "<U003A><U002D><U004F>" % FACE SCREAMING IN FEAR
+<U0001F632> "<U003A><U002D><U004F>" % ASTONISHED FACE
+<U0001F638> "<U003A><U002D><U0033>" % GRINNING CAT FACE WITH SMILING EYES
+<U0001F639> "<U003A><U0027><U002D><U0033>" % CAT FACE WITH TEARS OF JOY
+<U0001F63A> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH OPEN MOUTH
+<U0001F63B> "<U003A><U002D><U0033>" % SMILING CAT FACE WITH HEART-SHAPE EYES
+<U0001F63C> "<U003B><U002D><U0033>" % CAT FACE WITH WRY SMILE
+<U0001F63D> "<U003A><U002D><U0033>" % KISSING CAT FACE WITH CLOSED EYES
+<U0001F641> "<U003A><U002D><U0028>" % SLIGHTLY FROWNING FACE
+<U0001F642> "<U003A><U002D><U0029>" % SLIGHTLY SMILING FACE
+<U0001F643> "<U0028><U002D><U003A>" % UPSIDE-DOWN FACE
+
+translit_end
+
+END LC_CTYPE
diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral
index 72f66220b7..57412ae565 100644
--- a/localedata/locales/translit_neutral
+++ b/localedata/locales/translit_neutral
@@ -17,6 +17,7 @@ translit_start
 include "translit_circle";""
 include "translit_cjk_compat";""
 include "translit_compat";""
+include "translit_emojis";""
 include "translit_font";""
 include "translit_fraction";""
 include "translit_narrow";""
diff --git a/localedata/tst-iconv-emojis-trans.c b/localedata/tst-iconv-emojis-trans.c
new file mode 100644
index 0000000000..cc8b2a8bba
--- /dev/null
+++ b/localedata/tst-iconv-emojis-trans.c
@@ -0,0 +1,139 @@
+/* Test some emoji transliterations
+
+   Copyright (C) 2019-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+  iconv_t cd;
+
+  /* str[] = "♡ ♥ ❤ 💙 💓 "
+             "💔 💖 💗 💚 💛 "
+             "💜 🖤 🧡 🤍 🤎 "
+             "😀 😁 😂 😃 😄 "
+             "😅 😆 😇 😈 😉 "
+             "😊 😋 😌 😍 😎 "
+             "😏 😐 😑 😒 😓 "
+             "😔 😕 😖 😗 😘 "
+             "😙 😚 😛 😜 😝 "
+             "😞 😟 😠 😡 😢 "
+             "😣 😦 😧 😨 😩 "
+             "😭 😮 😯 😰 😱 "
+             "😲 😸 😹 😺 😻 "
+             "😼 😽 🙁 🙂 🙃";  */
+
+  const int num_emojis = 70;
+
+  const char str[] = "\u2661 \u2665 \u2764 \U0001F499 "
+                      "\U0001F493 \U0001F494 \U0001F496 "
+                      "\U0001F497 \U0001F49A \U0001F49B "
+                      "\U0001F49C \U0001F5A4 \U0001F9E1 "
+                      "\U0001F90D \U0001F90E \U0001F600 "
+                      "\U0001F601 \U0001F602 \U0001F603 "
+                      "\U0001F604 \U0001F605 \U0001F606 "
+                      "\U0001F607 \U0001F608 \U0001F609 "
+                      "\U0001F60A \U0001F60B \U0001F60C "
+                      "\U0001F60D \U0001F60E \U0001F60F "
+                      "\U0001F610 \U0001F611 \U0001F612 "
+                      "\U0001F613 \U0001F614 \U0001F615 "
+                      "\U0001F616 \U0001F617 \U0001F618 "
+                      "\U0001F619 \U0001F61A \U0001F61B "
+                      "\U0001F61C \U0001F61D \U0001F61E "
+                      "\U0001F61F \U0001F620 \U0001F621 "
+                      "\U0001F622 \U0001F623 \U0001F626 "
+                      "\U0001F627 \U0001F628 \U0001F629 "
+                      "\U0001F62D \U0001F62E \U0001F62F "
+                      "\U0001F630 \U0001F631 \U0001F632 "
+                      "\U0001F638 \U0001F639 \U0001F63A "
+                      "\U0001F63B \U0001F63C \U0001F63D "
+                      "\U0001F641 \U0001F642 \U0001F643";
+
+  const char expected[] = "<3 <3 <3 <3 <3 "
+                          "</3 <3 <3 <3 <3 "
+                          "<3 <3 <3 <3 <3 "
+                          ":-D :-D :'D :-D :-D "
+                          ":-D :-D O:-) >:) ;-) "
+                          ":-) :-P :-) :-* B-) "
+                          ";-) :-| :-| :-| :'-| "
+                          ":-| :-/ :-S :-* :-* "
+                          ":-* :-* :-P ;-P X-P "
+                          ":-( :-( >:-( :-( :'-( "
+                          "X-( :-O :-O :-O :-O "
+                          ":\"-( :-O :-O :'-O :-O "
+                          ":-O :-3 :'-3 :-3 :-3 "
+                          ";-3 :-3 :-( :-) (-:";
+
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    FAIL_EXIT1 ("setlocale failed");
+
+  cd = iconv_open ("ASCII//TRANSLIT", "UTF-8");
+  if (cd == (iconv_t) -1)
+    FAIL_EXIT1 ("iconv_open failed");
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != num_emojis)
+    {
+      if (n == (size_t) -1)
+        printf ("iconv() returned error: %m\n");
+      else
+        printf ("iconv() returned %zd, expected %d\n", n, num_emojis);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+              (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %zd, expected %zd\n", outlen,
+              sizeof (outbuf) - sizeof (expected));
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
+
+#include <support/test-driver.c>
-- 
2.39.2


^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2023-08-29  7:50 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-19 16:17 [PATCH] localedata: Translit common emojis to smileys [BZ #30649] Colin Leroy-Mira
2023-07-20 22:27 ` [PATCH][v2] " Colin Leroy-Mira
2023-07-21 14:11 ` [PATCH v2] " Colin Leroy-Mira
2023-08-08  7:07   ` Colin Leroy-Mira
2023-08-08  9:20   ` Florian Weimer
2023-08-08 10:02   ` Colin Leroy-Mira
2023-08-08 11:50     ` Florian Weimer
2023-08-08 13:52   ` [PATCH v3] " Colin Leroy-Mira
2023-08-16  8:33     ` [PING] " Colin Leroy-Mira
2023-08-23 17:42     ` [ping] " Colin Leroy-Mira
2023-08-28 13:10     ` Florian Weimer
2023-08-29  6:02       ` [PATCH v4] " Colin Leroy-Mira
2023-08-29  7:26         ` Florian Weimer
2023-08-29  7:50         ` Colin Leroy-Mira
2023-07-21  6:52 [PATCH v2] " Colin Leroy-Mira
2023-07-21 13:25 ` Colin Leroy-Mira
2023-07-21 14:04   ` Adhemerval Zanella Netto
2023-07-21 14:20   ` Colin Leroy-Mira

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).