public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] add tests for tzset(3)
@ 2022-04-07 15:58 jdoubleu
  2022-04-08 21:21 ` Jeff Johnston
  2022-04-10  8:43 ` Dimitar Dimitrov
  0 siblings, 2 replies; 35+ messages in thread
From: jdoubleu @ 2022-04-07 15:58 UTC (permalink / raw)
  To: newlib

[-- Attachment #1: Type: text/plain, Size: 478 bytes --]

Hi,

I've finally created tests for tzset(3). They test the POSIX timezone 
string compliance.

This patch is intended to be applied after Brian's tzset changes have 
been pushed (see other discussion).

You can also find the test vectors online 
(https://github.com/jdoubleu/newlib-posix-tzset-tests/blob/main/timezones.h) 
and run the tests on linux with glibc 
(https://github.com/jdoubleu/newlib-posix-tzset-tests/tree/main/host_test).

Cheers
---
🙎🏻‍♂️ jdoubleu

[-- Attachment #2: 0001-add-tests-for-tzset-3.patch --]
[-- Type: text/plain, Size: 8909 bytes --]

From 97fa4a760e67f4f553dbe18a3a75fa14e855916d Mon Sep 17 00:00:00 2001
From: jdoubleu <hi@jdoubleu.de>
Date: Thu, 7 Apr 2022 17:30:01 +0200
Subject: [PATCH] add tests for tzset(3)

---
 newlib/testsuite/newlib.time/time.exp |  12 ++
 newlib/testsuite/newlib.time/tzset.c  | 163 ++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)
 create mode 100644 newlib/testsuite/newlib.time/time.exp
 create mode 100644 newlib/testsuite/newlib.time/tzset.c

diff --git a/newlib/testsuite/newlib.time/time.exp b/newlib/testsuite/newlib.time/time.exp
new file mode 100644
index 000000000..3fce2306e
--- /dev/null
+++ b/newlib/testsuite/newlib.time/time.exp
@@ -0,0 +1,12 @@
+# Copyright (C) 2022 jdoubleu. All rights reserved.
+#
+# Permission to use, copy, modify, and distribute this software
+# is freely granted, provided that this notice is preserved.
+#
+
+load_lib passfail.exp
+
+set exclude_list {
+}
+
+newlib_pass_fail_all -x $exclude_list
diff --git a/newlib/testsuite/newlib.time/tzset.c b/newlib/testsuite/newlib.time/tzset.c
new file mode 100644
index 000000000..0e5b196c6
--- /dev/null
+++ b/newlib/testsuite/newlib.time/tzset.c
@@ -0,0 +1,163 @@
+/* Test that valid POSIX timezone strings are correctly parsed by tzset(3). */
+#include <stdio.h>
+#include <stdlib.h>
+
+// BEGIN test vectors
+#include <time.h>
+#include <limits.h>
+
+#define IN_SECONDS(h, m, s) ((h) * 3600 + (m) * 60 + (s))
+#define NO_TIME INT_MIN
+
+struct tz_test {
+    const char* tzstr;
+    int offset_seconds;
+    int dst_offset_seconds;
+};
+
+extern struct tm winter_tm;
+extern struct tm summer_tm;
+extern const time_t winter_time;
+extern const time_t summer_time;
+extern struct tz_test test_timezones[];
+
+// winter time is March, 21st 2022 at 8:15pm and 20 seconds
+struct tm winter_tm = {
+    .tm_sec     = 20,
+    .tm_min     = 15,
+    .tm_hour    = 20,
+    .tm_mday    = 21,
+    .tm_mon     = 3 - 1,
+    .tm_year    = 2022 - 1900,
+    .tm_isdst   = 0
+};
+
+// summer time is July, 15th 2022 at 10:50am and 40 seconds
+struct tm summer_tm = {
+    .tm_sec     = 40,
+    .tm_min     = 50,
+    .tm_hour    = 10,
+    .tm_mday    = 15,
+    .tm_mon     = 7 - 1,
+    .tm_year    = 2022 - 1900,
+    .tm_isdst   = 1
+};
+
+// UTC unix time for the winter time
+const time_t winter_time = 1647893720;
+const time_t summer_time = 1657882240;
+
+struct tz_test test_timezones[] = {
+    /*
+     * creating test vectors based on the POSIX spec (https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03)
+     */
+    // normal std names
+    {"MAR1",         IN_SECONDS(1, 0, 0),    NO_TIME},
+    {"MAR-1",       -IN_SECONDS(1, 0, 0),    NO_TIME},
+    {"MAR+2",        IN_SECONDS(2, 0, 0),    NO_TIME},
+    {"MAR7",         IN_SECONDS(7, 0, 0),    NO_TIME},
+    {"MAR-7",       -IN_SECONDS(7, 0, 0),    NO_TIME},
+    {"MARS5",        IN_SECONDS(5, 0, 0),    NO_TIME},
+    {"MARSM5",       IN_SECONDS(5, 0, 0),    NO_TIME},
+    {"MARSMOON5",    IN_SECONDS(5, 0, 0),    NO_TIME},   // assuming TZNAME_MAX >= 8
+    {"MARS5:23:42",  IN_SECONDS(5, 23, 42),  NO_TIME},
+    {"SUN-7:14:24", -IN_SECONDS(7, 14, 24),  NO_TIME},
+    // with DST
+    {"MAR5SMAR",                IN_SECONDS(5, 0, 0), IN_SECONDS(4, 0, 0)},  // only DST name
+    {"MAR5SMAR2",               IN_SECONDS(5, 0, 0), IN_SECONDS(2, 0, 0)},  // DST name with offset
+    {"MAR3SMAR-3",              IN_SECONDS(3, 0, 0), -IN_SECONDS(3, 0, 0)},
+    {"MARSWINTER4MARSUMMER",    IN_SECONDS(4, 0, 0), IN_SECONDS(3, 0, 0)},
+    {"MARSWINTER4MARSUMMER3",   IN_SECONDS(4, 0, 0), IN_SECONDS(3, 0, 0)},
+    // with DST IN_SECONDSs
+    {"WMARS3SMARS,J80",                                 IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,J80,J134",                            IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,79",                                  IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,76,134",                              IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,76/02,134/03",                        IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,76/02:15:45,134/03:40:20",            IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+    {"WMARS3SMARS,M3.4.1/02:15:45,M8.3.1/03:40:20",     IN_SECONDS(3, 0, 0), IN_SECONDS(2, 0, 0)},
+
+    // special std names
+    {"<UNK>-1",                                 -IN_SECONDS(1, 0, 0),     NO_TIME},
+    {"<UNKNOWN>-2",                             -IN_SECONDS(2, 0, 0),     NO_TIME},                  // require TZNAME_MAX >= 7 + 1
+    {"<003>3",                                   IN_SECONDS(3, 0, 0),     NO_TIME},
+    {"<+04>4",                                   IN_SECONDS(4, 0, 0),     NO_TIME},
+    {"<-05>-5",                                 -IN_SECONDS(5, 0, 0),     NO_TIME},
+    {"<A-5>6",                                   IN_SECONDS(6, 0, 0),     NO_TIME},
+    {"<+A5>-7",                                 -IN_SECONDS(7, 0, 0),     NO_TIME},
+    {"<0123456>8",                               IN_SECONDS(8, 0, 0),     NO_TIME},
+    {"<0A1B2C3>9",                               IN_SECONDS(9, 0, 0),     NO_TIME},
+    {"<RD-04>-4<RD+005>5",                      -IN_SECONDS(4, 0, 0),     IN_SECONDS(5, 0, 0)},
+    {"<WINT+03>3<SUM+02>",                       IN_SECONDS(3, 0, 0),     IN_SECONDS(2, 0, 0)},
+    {"<WINT+03>3<SUM+02>2",                      IN_SECONDS(3, 0, 0),     IN_SECONDS(2, 0, 0)},
+    {"<WINT+03>3:15<SUM+02>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},
+    {"<H3M15>3:15<H2M30S15>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},   // requires TZNAME_MAX >= 8 + 1
+    {"<+H6M20S12>6:20:12<-H4M40S14>-4:40:14",    IN_SECONDS(6, 20, 12),  -IN_SECONDS(4, 40, 14)},   // requires TZNAME_MAX >= 9 + 1
+    {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
+
+    /* 
+     * real-world test vectors.
+     * IN_SECONDSzones extracted from the tzdb (https://github.com/eggert/tz#2019e).
+     * The IN_SECONDSzone strings can also be obtained from https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv.
+     */
+    { /* Etc/GMT-14 */              "<+14>-14",                        -IN_SECONDS(14, 0, 0),     NO_TIME},
+    { /* Etc/GMT+12 */              "<-12>12",                          IN_SECONDS(12, 0, 0),     NO_TIME},
+    { /* Africa/Casablanca */       "<+01>-1",                         -IN_SECONDS(1, 0, 0),      NO_TIME},
+    { /* America/Araguaina */       "<-03>3",                           IN_SECONDS(3, 0, 0),      NO_TIME},
+    { /* America/Asuncion */        "<-04>4<-03>,M10.1.0/0,M3.4.0/0",   IN_SECONDS(4, 0, 0),      IN_SECONDS(3, 0, 0)},
+    { /* America/Los_Angeles */     "PST8PDT,M3.2.0,M11.1.0",           IN_SECONDS(8, 0, 0),      IN_SECONDS(7, 0, 0)},
+    { /* America/New_York */        "EST5EDT,M3.2.0,M11.1.0",           IN_SECONDS(5, 0, 0),      IN_SECONDS(4, 0, 0)},
+    { /* America/Scoresbysund */    "<-01>1<+00>,M3.5.0/0,M10.5.0/1",   IN_SECONDS(1, 0, 0),      IN_SECONDS(0, 0, 0)},
+    { /* Asia/Colombo */            "<+0530>-5:30",                    -IN_SECONDS(5, 30, 0),     NO_TIME},
+    { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",      -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
+
+    // END of list
+    {NULL, NO_TIME, NO_TIME}
+};
+
+// helper macros
+#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name = test_timezones; iter_name->tzstr != NULL; ++iter_name)
+
+// END test vectors
+
+static int failed = 0;
+
+#define TEST_ASSERT_EQUAL_INT_MESSAGE(...) assert_equal(__VA_ARGS__)
+void assert_equal(int lhs, int rhs, const char* msg)
+{
+    if (lhs != rhs)
+    {
+        printf("Assertion failed! Expected %d to equal %d. %s\n", lhs, rhs, msg);
+        ++failed;
+    }
+}
+
+void test_TimezoneStrings(void)
+{
+    char buffer[128];
+
+    FOR_TIMEZONES(ptr)
+    {
+        setenv("TZ", ptr->tzstr, 1);
+        tzset();
+
+        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr->tzstr);
+
+        struct tm winter_tm_copy = winter_tm; // copy
+        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
+
+        if (ptr->dst_offset_seconds != NO_TIME)
+        {
+            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr->tzstr);
+
+            struct tm summer_tm_copy = summer_tm; // copy
+            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
+        }
+    }
+}
+
+int main()
+{
+    test_TimezoneStrings();
+    exit (failed);
+}
\ No newline at end of file
-- 
2.35.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var
@ 2022-04-07 23:34               ` Brian Inglis
  2022-04-07 23:34                 ` [PATCH v2 1/2] newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support Brian Inglis
                                   ` (3 more replies)
  0 siblings, 4 replies; 35+ messages in thread
From: Brian Inglis @ 2022-04-07 23:34 UTC (permalink / raw)
  To: Newlib

Brian Inglis (2):
  newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support
  newlib/libc/time/tzset_r.c(_tzset_unlocked_r): POSIX angle bracket <> support

 newlib/libc/time/tzset.c   | 110 ++++++++++++++++++++++++++-----------
 newlib/libc/time/tzset_r.c |  65 ++++++++++++++++++----
 2 files changed, 132 insertions(+), 43 deletions(-)

-- 
2.35.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 1/2] newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
@ 2022-04-07 23:34                 ` Brian Inglis
  2022-04-07 23:34                 ` [PATCH v2 2/2] newlib/libc/time/tzset_r.c(_tzset_unlocked_r): " Brian Inglis
                                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 35+ messages in thread
From: Brian Inglis @ 2022-04-07 23:34 UTC (permalink / raw)
  To: Newlib

[-- Attachment #1: Type: text/plain, Size: 161 bytes --]


add format and structure to doc
---
 newlib/libc/time/tzset.c | 110 ++++++++++++++++++++++++++++-----------
 1 file changed, 79 insertions(+), 31 deletions(-)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-newlib-libc-time-tzset.c-doc-update-POSIX-angle-bracket-support.patch --]
[-- Type: text/x-patch; name="0001-newlib-libc-time-tzset.c-doc-update-POSIX-angle-bracket-support.patch", Size: 5371 bytes --]

diff --git a/newlib/libc/time/tzset.c b/newlib/libc/time/tzset.c
index 3b4c01c660a4..77accbfb0485 100644
--- a/newlib/libc/time/tzset.c
+++ b/newlib/libc/time/tzset.c
@@ -1,6 +1,6 @@
 /*
 FUNCTION
-<<tzset>>---set timezone characteristics from TZ environment variable
+<<tzset>>---set timezone characteristics from <[TZ]> environment variable
 
 INDEX
 	tzset
@@ -13,36 +13,84 @@ SYNOPSIS
 	void _tzset_r (struct _reent *<[reent_ptr]>);
 
 DESCRIPTION
-<<tzset>> examines the TZ environment variable and sets up the three
-external variables: <<_timezone>>, <<_daylight>>, and <<tzname>>.  The
-value of <<_timezone>> shall be the offset from the current time zone
-to GMT.  The value of <<_daylight>> shall be 0 if there is no daylight
-savings time for the current time zone, otherwise it will be non-zero.
-The <<tzname>> array has two entries: the first is the name of the
-standard time zone, the second is the name of the daylight-savings time
-zone.
-
-The TZ environment variable is expected to be in the following POSIX
-format:
-
-  stdoffset1[dst[offset2][,start[/time1],end[/time2]]]
-
-where: std is the name of the standard time-zone (minimum 3 chars)
-       offset1 is the value to add to local time to arrive at Universal time
-         it has the form:  hh[:mm[:ss]]
-       dst is the name of the alternate (daylight-savings) time-zone (min 3 chars)
-       offset2 is the value to add to local time to arrive at Universal time
-         it has the same format as the std offset
-       start is the day that the alternate time-zone starts
-       time1 is the optional time that the alternate time-zone starts
-         (this is in local time and defaults to 02:00:00 if not specified)
-       end is the day that the alternate time-zone ends
-       time2 is the time that the alternate time-zone ends
-         (it is in local time and defaults to 02:00:00 if not specified)
-
-Note that there is no white-space padding between fields.  Also note that
-if TZ is null, the default is Universal GMT which has no daylight-savings
-time.  If TZ is empty, the default EST5EDT is used.
+<<tzset>> examines the <[TZ]> environment variable and sets up the three
+external variables: <<_timezone>>, <<_daylight>>, and <<tzname>>.
+The value of <<_timezone>> shall be the offset from the current time
+to Universal Time.
+The value of <<_daylight>> shall be 0 if there is no daylight savings
+time for the current time zone, otherwise it will be non-zero.
+The <<tzname>> array has two entries: the first is the designation of the
+standard time period, the second is the designation of the alternate time
+period.
+
+The <[TZ]> environment variable is expected to be in the following POSIX
+format (spaces inserted for clarity):
+
+    <[std]> <[offset1]> [<[dst]> [<[offset2]>] [,<[start]> [/<[time1]>], <[end]> [/<[time2]>]]]
+
+where:
+
+<[std]> is the designation for the standard time period (minimum 3,
+maximum <<TZNAME_MAX>> bytes) in one of two forms:
+
+*- quoted within angle bracket '<' '>' characters: portable numeric
+sign or alphanumeric characters in the current locale; the
+quoting characters are not included in the designation
+
+*- unquoted: portable alphabetic characters in the current locale
+
+<[offset1]> is the value to add to local standard time to get Universal Time;
+it has the format:
+
+    [<[S]>]<[hh]>[:<[mm]>[:<[ss]>]]
+
+    where:
+
+    <[S]> is an optional numeric sign character; if negative '-', the
+    time zone is East of the International Reference
+    Meridian; else it is positive and West, and '+' may be used
+
+    <[hh]> is the required numeric hour between 0 and 24
+
+    <[mm]> is the optional numeric minute between 0 and 59
+
+    <[ss]> is the optional numeric second between 0 and 59
+
+<[dst]> is the designation of the alternate (daylight saving or
+summer) time period, with the same limits and forms as
+the standard time period designation
+
+<[offset2]> is the value to add to local alternate time to get
+Universal Time; it has the same format as <[offset1]>
+
+<[start]> is the date in the year that alternate time starts;
+the form may be one of:
+(quotes "'" around characters below are used only to distinguish literals)
+
+    <[n]>	zero based Julian day (0-365), counting February 29 Leap days
+
+    'J'<[n]>	one based Julian day (1-365), not counting February 29 Leap
+    days; in all years day 59 is February 28 and day 60 is March 1
+
+    'M'<[m]>'.'<[w]>'.'<[d]>
+    month <[m]> (1-12) week <[w]> (1-5) day <[d]> (0-6) where week 1 is
+    the first week in month <[m]> with day <[d]>; week 5 is the last
+    week in the month; day 0 is Sunday
+
+<[time1]> is the optional local time that alternate time starts, in
+the same format as <[offset1]> without any sign, and defaults
+to 02:00:00
+
+<[end]> is the date in the year that alternate time ends, in the same
+forms as <[start]>
+
+<[time2]> is the optional local time that alternate time ends, in
+the same format as <[offset1]> without any sign, and
+defaults to 02:00:00
+
+Note that there is no white-space padding between fields. Also note that
+if <[TZ]> is null, the default is Universal Time which has no daylight saving
+time. If <[TZ]> is empty, the default EST5EDT is used.
 
 The function <<_tzset_r>> is identical to <<tzset>> only it is reentrant
 and is used for applications that use multiple threads.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2 2/2] newlib/libc/time/tzset_r.c(_tzset_unlocked_r): POSIX angle bracket <> support
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
  2022-04-07 23:34                 ` [PATCH v2 1/2] newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support Brian Inglis
@ 2022-04-07 23:34                 ` Brian Inglis
  2022-04-08 19:11                 ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Jeff Johnston
  2022-04-13 17:53                 ` [PATCH] add tests for tzset(3) Brian Inglis
  3 siblings, 0 replies; 35+ messages in thread
From: Brian Inglis @ 2022-04-07 23:34 UTC (permalink / raw)
  To: Newlib

[-- Attachment #1: Type: text/plain, Size: 403 bytes --]


local defines for POSIX minimum TZ abbr size 3 TZNAME_MIN and
maximum TZ abbr size supported 10 TZNAME_MAX
allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330>
allow POSIX unquoted alphabetic tz abbr e.g. MESZ
allow same suuport for DST tz abbr
---
 newlib/libc/time/tzset_r.c | 65 +++++++++++++++++++++++++++++++-------
 1 file changed, 53 insertions(+), 12 deletions(-)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0002-newlib-libc-time-tzset_r.c-_tzset_unlocked_r-POSIX-angle-bracket-support.patch --]
[-- Type: text/x-patch; name="0002-newlib-libc-time-tzset_r.c-_tzset_unlocked_r-POSIX-angle-bracket-support.patch", Size: 2772 bytes --]

diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
index 9e0cf834bd6b..9cb30b188f98 100644
--- a/newlib/libc/time/tzset_r.c
+++ b/newlib/libc/time/tzset_r.c
@@ -9,8 +9,11 @@
 
 #define sscanf siscanf	/* avoid to pull in FP functions. */
 
-static char __tzname_std[11];
-static char __tzname_dst[11];
+#define TZNAME_MIN	3	/* POSIX min TZ abbr size local def */
+#define TZNAME_MAX	10	/* POSIX max TZ abbr size local def */
+
+static char __tzname_std[TZNAME_MAX + 1];
+static char __tzname_dst[TZNAME_MAX + 1];
 static char *prev_tzenv = NULL;
 
 void
@@ -45,8 +48,25 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (*tzenv == ':')
     ++tzenv;  
 
-  if (sscanf (tzenv, "%10[^0-9,+-]%n", __tzname_std, &n) <= 0)
-    return;
+  /* allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330> */
+  if (*tzenv == '<')
+    {
+      ++tzenv;
+
+      /* quit if no items, too few or too many chars, or no close quote '>' */
+      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
+		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+        return;
+
+      ++tzenv;	/* bump for close quote '>' */
+    }
+  else
+    {
+      /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
+      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_std, &n) <= 0
+				|| n < TZNAME_MIN || TZNAME_MAX < n)
+        return;
+    }
  
   tzenv += n;
 
@@ -68,17 +88,38 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   tz->__tzrule[0].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
   _tzname[0] = __tzname_std;
   tzenv += n;
-  
-  if (sscanf (tzenv, "%10[^0-9,+-]%n", __tzname_dst, &n) <= 0)
-    { /* No dst */
-      _tzname[1] = _tzname[0];
-      _timezone = tz->__tzrule[0].offset;
-      _daylight = 0;
-      return;
+
+  /* allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330> */
+  if (*tzenv == '<')
+    {
+      ++tzenv;
+
+      /* quit if no items, too few or too many chars, or no close quote '>' */
+      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
+		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+	{ /* No dst */
+	  _tzname[1] = _tzname[0];
+	  _timezone = tz->__tzrule[0].offset;
+	  _daylight = 0;
+	  return;
+	}
+
+      ++tzenv;	/* bump for close quote '>' */
     }
   else
-    _tzname[1] = __tzname_dst;
+    {
+      /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
+      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0
+				|| n < TZNAME_MIN || TZNAME_MAX < n)
+	{ /* No dst */
+	  _tzname[1] = _tzname[0];
+	  _timezone = tz->__tzrule[0].offset;
+	  _daylight = 0;
+	  return;
+	}
+    }
 
+  _tzname[1] = __tzname_dst;
   tzenv += n;
 
   /* otherwise we have a dst name, look for the offset */

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
  2022-04-07 23:34                 ` [PATCH v2 1/2] newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support Brian Inglis
  2022-04-07 23:34                 ` [PATCH v2 2/2] newlib/libc/time/tzset_r.c(_tzset_unlocked_r): " Brian Inglis
@ 2022-04-08 19:11                 ` Jeff Johnston
  2022-04-13 17:53                 ` [PATCH] add tests for tzset(3) Brian Inglis
  3 siblings, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-04-08 19:11 UTC (permalink / raw)
  To: Brian Inglis; +Cc: Newlib

Patches merged to master.  Thanks Brian.

-- Jeff J.

On Thu, Apr 7, 2022 at 7:34 PM Brian Inglis <Brian.Inglis@systematicsw.ab.ca>
wrote:

> Brian Inglis (2):
>   newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support
>   newlib/libc/time/tzset_r.c(_tzset_unlocked_r): POSIX angle bracket <>
> support
>
>  newlib/libc/time/tzset.c   | 110 ++++++++++++++++++++++++++-----------
>  newlib/libc/time/tzset_r.c |  65 ++++++++++++++++++----
>  2 files changed, 132 insertions(+), 43 deletions(-)
>
> --
> 2.35.1
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-07 15:58 [PATCH] add tests for tzset(3) jdoubleu
@ 2022-04-08 21:21 ` Jeff Johnston
  2022-04-10  8:43 ` Dimitar Dimitrov
  1 sibling, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-04-08 21:21 UTC (permalink / raw)
  To: jdoubleu; +Cc: Newlib

Patch applied.  Thanks.

-- Jeff J.

On Thu, Apr 7, 2022 at 11:58 AM jdoubleu <hi@jdoubleu.de> wrote:

> Hi,
>
> I've finally created tests for tzset(3). They test the POSIX timezone
> string compliance.
>
> This patch is intended to be applied after Brian's tzset changes have
> been pushed (see other discussion).
>
> You can also find the test vectors online
> (
> https://github.com/jdoubleu/newlib-posix-tzset-tests/blob/main/timezones.h)
>
> and run the tests on linux with glibc
> (https://github.com/jdoubleu/newlib-posix-tzset-tests/tree/main/host_test
> ).
>
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-07 15:58 [PATCH] add tests for tzset(3) jdoubleu
  2022-04-08 21:21 ` Jeff Johnston
@ 2022-04-10  8:43 ` Dimitar Dimitrov
  2022-04-10 17:55   ` jdoubleu
  1 sibling, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-04-10  8:43 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib

On Thu, Apr 07, 2022 at 05:58:27PM +0200, jdoubleu wrote:
> Hi,
> 
> I've finally created tests for tzset(3). They test the POSIX timezone string
> compliance.
> 
> This patch is intended to be applied after Brian's tzset changes have been
> pushed (see other discussion).
> 
> You can also find the test vectors online
> (https://github.com/jdoubleu/newlib-posix-tzset-tests/blob/main/timezones.h)
> and run the tests on linux with glibc
> (https://github.com/jdoubleu/newlib-posix-tzset-tests/tree/main/host_test).
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu

> From 97fa4a760e67f4f553dbe18a3a75fa14e855916d Mon Sep 17 00:00:00 2001
> From: jdoubleu <hi@jdoubleu.de>
> Date: Thu, 7 Apr 2022 17:30:01 +0200
> Subject: [PATCH] add tests for tzset(3)
> 
> ---
>  newlib/testsuite/newlib.time/time.exp |  12 ++
>  newlib/testsuite/newlib.time/tzset.c  | 163 ++++++++++++++++++++++++++
>  2 files changed, 175 insertions(+)
>  create mode 100644 newlib/testsuite/newlib.time/time.exp
>  create mode 100644 newlib/testsuite/newlib.time/tzset.c

Hi,

I get the following failure on pru-none-elf and arm-none-eabi targets:

Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "<+0123456789ABCDEF>3:33:33"
FAIL: newlib.time/tzset.c execution

Regards,
Dimitar

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-10  8:43 ` Dimitar Dimitrov
@ 2022-04-10 17:55   ` jdoubleu
  2022-04-10 21:00     ` Dimitar Dimitrov
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-04-10 17:55 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: newlib

Hi Dimitar,

On 4/10/2022 10:43 AM, Dimitar Dimitrov wrote:
> I get the following failure on pru-none-elf and arm-none-eabi targets:

that is interesting! I just checked and they are still passing on 
x86_64-pc-linux-gnu. Which begs the question: What is the difference to 
your environment/target platform?

Are you using any additional compilation flags? Have you 
enabled/disabled optimizations? Have you enabled the nano I/O API[1]?
What compiler (and version) are you using? How/Where are you running the 
tests?

> [..] timezone = "<+0123456789ABCDEF>3:33:33"

The given test case aims to check for tzset's parsing behavior when the 
name (inside the angle brackets) exceeds the TZNAME_MAX var, i.e. is too 
long to be fully stored. The default behavior is to truncate the name 
and ignore all remaining chars[2]. The time should still be parsed.

> Assertion failed! Expected 1647906533 to equal 1647916532.

Now, the interesting part is, that the diff is 9,999 seconds. The time 
is off by 6 hours, 20 minutes and 12 seconds. That is 2 hours, 46 
minutes and 38 seconds too far in the future. I am not sure where this 
is coming from.

Neither does it look like it bailed early[3], because it couldn't parse 
the name.

Please try and remove the "<+0123456789ABCDEF>" part from the string in 
the tests[4] and re-run the tests.

Could you maybe also post the assembly listing and intermediate output 
of the compiler? If you're using gcc, just (re)configure newlib and 
rebuild libc:

	CFLAGS=-save-temps=obj ../newlib/configure
	make clean # get rid of stamp files
	make

then copy and attach the following files:
	
	libc/time/libc_a-tzset_r.i
	libc/time/libc_a-tzset_r.s


[1]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/README;h=97890b9d20ca0ac9f206d7a2334d55a3d7052289;hb=HEAD#l403
[2]: https://sourceware.org/pipermail/newlib/2022/019535.html
[3]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
[4]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/testsuite/newlib.time/tzset.c;h=0e5b196c629bcbecae6148c09bbeba82079c5367;hb=HEAD#l96


Cheers
---
🙎🏻‍♂️ jdoubleu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-10 17:55   ` jdoubleu
@ 2022-04-10 21:00     ` Dimitar Dimitrov
  2022-04-11 11:17       ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-04-10 21:00 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib

On Sun, Apr 10, 2022 at 07:55:56PM +0200, jdoubleu wrote:
> Hi Dimitar,
> 
> On 4/10/2022 10:43 AM, Dimitar Dimitrov wrote:
> > I get the following failure on pru-none-elf and arm-none-eabi targets:
> 
> that is interesting! I just checked and they are still passing on
> x86_64-pc-linux-gnu. Which begs the question: What is the difference to your
> environment/target platform?

Here is how I build a cross toolchain for arm:
  https://github.com/dinuxbg/gnupru/blob/master/testing/manual-build-arm.sh
Then I build and execute the test case using qemu:
  $ ./arm-opt/bin/arm-none-eabi-gcc -O2 newlib/newlib/testsuite/newlib.time/tzset.c -o tzset-arm.elf
  $ qemu-arm tzset-arm.elf
Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "3:33:33"

For PRU target I'm running a daily regression test for Newlib, using the
GNU simulator:
  https://github.com/dinuxbg/gnupru/blob/master/testing/buildbot-pru.sh

> 
> Are you using any additional compilation flags? Have you enabled/disabled
> optimizations? Have you enabled the nano I/O API[1]?
> What compiler (and version) are you using? How/Where are you running the
> tests?

These are the newlib configure options I use for arm:
   --target=arm-none-eabi --enable-newlib-io-long-long --enable-newlib-io-long-double --enable-newlib-io-c99-formats

I use GCC 12 from GIT master branch for both PRU and ARM cross
toolchains.

> 
> > [..] timezone = "<+0123456789ABCDEF>3:33:33"
> 
> The given test case aims to check for tzset's parsing behavior when the name
> (inside the angle brackets) exceeds the TZNAME_MAX var, i.e. is too long to
> be fully stored. The default behavior is to truncate the name and ignore all
> remaining chars[2]. The time should still be parsed.
> 
> > Assertion failed! Expected 1647906533 to equal 1647916532.
> 
> Now, the interesting part is, that the diff is 9,999 seconds. The time is
> off by 6 hours, 20 minutes and 12 seconds. That is 2 hours, 46 minutes and
> 38 seconds too far in the future. I am not sure where this is coming from.

6:20:12 is the timezone string of the previous test case, whose tzset
call was successful. Looking at the current code, this is expected
behaviour. Perhaps TZ should be reset to UTC before the bail out?

      /* quit if no items, too few or too many chars, or no close quote '>' */
      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
                || n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
      {
+       _timezone = 0;
+       _daylight = 0;
+       _tzname[0] = "GMT";
+       _tzname[1] = "GMT";
+       free(prev_tzenv);
+       prev_tzenv = NULL;
        return;
      }

> 
> Neither does it look like it bailed early[3], because it couldn't parse the
> name.
> 
> Please try and remove the "<+0123456789ABCDEF>" part from the string in the
> tests[4] and re-run the tests.

With that chunk removed, as shown below:
    {"3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
I still get:
  Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "3:33:33"

Test passes with the following modifcation:
     {"<+00>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)

> 
> Could you maybe also post the assembly listing and intermediate output of
> the compiler? If you're using gcc, just (re)configure newlib and rebuild
> libc:
> 
> 	CFLAGS=-save-temps=obj ../newlib/configure
> 	make clean # get rid of stamp files
> 	make
> 
> then copy and attach the following files:
> 	
> 	libc/time/libc_a-tzset_r.i
> 	libc/time/libc_a-tzset_r.s

Sure. Give me a day or two to spin the builds.

> 
> 
> [1]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/README;h=97890b9d20ca0ac9f206d7a2334d55a3d7052289;hb=HEAD#l403
> [2]: https://sourceware.org/pipermail/newlib/2022/019535.html
> [3]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> [4]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/testsuite/newlib.time/tzset.c;h=0e5b196c629bcbecae6148c09bbeba82079c5367;hb=HEAD#l96
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-10 21:00     ` Dimitar Dimitrov
@ 2022-04-11 11:17       ` jdoubleu
  2022-04-11 17:27         ` Dimitar Dimitrov
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-04-11 11:17 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: newlib

[-- Attachment #1: Type: text/plain, Size: 1969 bytes --]

Hi,

looks like I'm running the testsuite against glibc and not newlib (for 
target x86_64-pc-linux-gnu). I'm not even sure whether there's a backend 
for linux.

I'm currently trying to run only the tzset code against the test vectors 
(like Brian Inglis did[1]).

At least it show, that the newlib implementation differs from glibc. 
Maybe the test case is flawed and it should indeed fail.

> 6:20:12 is the timezone string of the previous test case, whose tzset
> call was successful. Looking at the current code, this is expected
> behaviour.

Okay. Looks like the condition[2] fails. The question is, which part of 
it does?

I've appended a patch, which prints all variables when the condition 
fails. Could you please apply the patch and then recompile and re-run 
the tests again?

I've previously noticed something with the sscanf format[3].

> Perhaps TZ should be reset to UTC before the bail out?

I don't think the implementation should fall back to UTC whenever 
parsing failed. It apparently doesn't in glibc. I'm not sure if the 
behavior is specified somewhere.

Maybe resetting it before each test case is a good idea, though. That 
makes it clearer, why the test case failed.

> With that chunk removed, as shown below:
>     {"3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
> I still get:
>   Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "3:33:33"

My bad; "3:33:33" isn't a valid timezone string. It has to be prefixed 
by a name e.g. "MESZ" or "<+00>", as you tried. That explains why it is 
also failing.


Thanks for your effort so far!


[1]: https://sourceware.org/pipermail/newlib/2022/019529.html
[2]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
[3]: https://sourceware.org/pipermail/newlib/2022/019535.html


Cheers
---
🙎🏻‍♂️ jdoubleu

[-- Attachment #2: 0001-debug-print-condition-values-of-tz-string-name-parsi.patch --]
[-- Type: text/plain, Size: 1116 bytes --]

From 8c698dc63f765d4a5b3a49a25850c1738279d68d Mon Sep 17 00:00:00 2001
From: jdoubleu <hi@jdoubleu.de>
Date: Mon, 11 Apr 2022 13:10:38 +0200
Subject: [PATCH] debug print condition values of tz string name parsing

---
 newlib/libc/time/tzset_r.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
index 9cb30b188..e20a32a62 100644
--- a/newlib/libc/time/tzset_r.c
+++ b/newlib/libc/time/tzset_r.c
@@ -54,9 +54,11 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       ++tzenv;
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
-      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
-		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+      int res = sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n);
+      if (res <= 0 || n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n]) {
+        printf("parsing name: tzenv=\"%s\", res=%d, n=%d, tzenv[n] = %c\n", tzenv, res, n, tzenv[n]);
         return;
+      }
 
       ++tzenv;	/* bump for close quote '>' */
     }
-- 
2.35.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-11 11:17       ` jdoubleu
@ 2022-04-11 17:27         ` Dimitar Dimitrov
  2022-04-12 11:19           ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-04-11 17:27 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib

On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
> Hi,
> 
> looks like I'm running the testsuite against glibc and not newlib (for
> target x86_64-pc-linux-gnu). I'm not even sure whether there's a backend for
> linux.
> 
> I'm currently trying to run only the tzset code against the test vectors
> (like Brian Inglis did[1]).
> 
> At least it show, that the newlib implementation differs from glibc. Maybe
> the test case is flawed and it should indeed fail.
> 
> > 6:20:12 is the timezone string of the previous test case, whose tzset
> > call was successful. Looking at the current code, this is expected
> > behaviour.
> 
> Okay. Looks like the condition[2] fails. The question is, which part of it
> does?

I believe it is the ('>' != tzenv[n]) check that fails, because the
maximum parsed string limit of 10 has been reached and n=10.

> 
> I've appended a patch, which prints all variables when the condition fails.
> Could you please apply the patch and then recompile and re-run the tests
> again?

Here is the result for arm-none-eabi:
parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10, tzenv[n] = 9
Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "<+0123456789ABCDEF>3:33:33"

I assume you no longer need assembly output from compiler?
> 
> I've previously noticed something with the sscanf format[3].
> 
> > Perhaps TZ should be reset to UTC before the bail out?
> 
> I don't think the implementation should fall back to UTC whenever parsing
> failed. It apparently doesn't in glibc. I'm not sure if the behavior is
> specified somewhere.

The tzset manual page says that UTC is used if TZ cannot be parsed:
https://man7.org/linux/man-pages/man3/tzset.3.html

> 
> Maybe resetting it before each test case is a good idea, though. That makes
> it clearer, why the test case failed.
> 
> > With that chunk removed, as shown below:
> >     {"3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
> > I still get:
> >   Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "3:33:33"
> 
> My bad; "3:33:33" isn't a valid timezone string. It has to be prefixed by a
> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is also
> failing.
> 
> 
> Thanks for your effort so far!
> 
> 
> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
> [2]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu

> From 8c698dc63f765d4a5b3a49a25850c1738279d68d Mon Sep 17 00:00:00 2001
> From: jdoubleu <hi@jdoubleu.de>
> Date: Mon, 11 Apr 2022 13:10:38 +0200
> Subject: [PATCH] debug print condition values of tz string name parsing
> 
> ---
>  newlib/libc/time/tzset_r.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
> index 9cb30b188..e20a32a62 100644
> --- a/newlib/libc/time/tzset_r.c
> +++ b/newlib/libc/time/tzset_r.c
> @@ -54,9 +54,11 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
>        ++tzenv;
>  
>        /* quit if no items, too few or too many chars, or no close quote '>' */
> -      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
> -		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
> +      int res = sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n);
> +      if (res <= 0 || n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n]) {
> +        printf("parsing name: tzenv=\"%s\", res=%d, n=%d, tzenv[n] = %c\n", tzenv, res, n, tzenv[n]);
>          return;
> +      }
>  
>        ++tzenv;	/* bump for close quote '>' */
>      }
> -- 
> 2.35.1
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-11 17:27         ` Dimitar Dimitrov
@ 2022-04-12 11:19           ` jdoubleu
  2022-04-12 18:33             ` Brian Inglis
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-04-12 11:19 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: newlib, Jeff Johnston, Brian Inglis

Hi,

> I believe it is the ('>' != tzenv[n]) check that fails, because the
> maximum parsed string limit of 10 has been reached and n=10.

Okay, so the newlib implementation actually fails, when the name is too 
long.

The POSIX standard leaves it up to the implementation how to handle 
names[1]:
> The interpretation of these fields is unspecified if either field is less than three bytes (except for the case when dst is missing), more than {TZNAME_MAX} bytes, or if they contain characters other than those specified.

I'm not sure where to go with this. In both cases, the current 
implementation needs a fix. Looping in Brian Inglis, the writer of the 
current implementation and Jeff Johnston, a maintainer.

As I see it, there are two solutions:
1. Treat long names as an error case. The failed condition[2] needs to 
apply UTC as current time, like Dimitar Dimitrov previously suggested. 
The test needs to be changed to check that it actually fails.
2. Mimic glibc's behavior and ignore the remaining characters from the 
name. In that case, the condition[2] and following code needs to be 
updated as well.

I am happy to provide a patch, when we agreed on one solution.

> I assume you no longer need assembly output from compiler?

No, thanks. We already found the issue. It would only be helpful, if 
different configurations generate different code (through macros, 
optimization, etc.).

[1]: 
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
[2]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57

Cheers
---
🙎🏻‍♂️ jdoubleu
On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
>> Hi,
>>
>> looks like I'm running the testsuite against glibc and not newlib (for
>> target x86_64-pc-linux-gnu). I'm not even sure whether there's a backend for
>> linux.
>>
>> I'm currently trying to run only the tzset code against the test vectors
>> (like Brian Inglis did[1]).
>>
>> At least it show, that the newlib implementation differs from glibc. Maybe
>> the test case is flawed and it should indeed fail.
>>
>>> 6:20:12 is the timezone string of the previous test case, whose tzset
>>> call was successful. Looking at the current code, this is expected
>>> behaviour.
>>
>> Okay. Looks like the condition[2] fails. The question is, which part of it
>> does?
> 
> I believe it is the ('>' != tzenv[n]) check that fails, because the
> maximum parsed string limit of 10 has been reached and n=10.
> 
>>
>> I've appended a patch, which prints all variables when the condition fails.
>> Could you please apply the patch and then recompile and re-run the tests
>> again?
> 
> Here is the result for arm-none-eabi:
> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10, tzenv[n] = 9
> Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "<+0123456789ABCDEF>3:33:33"
> 
> I assume you no longer need assembly output from compiler?
>>
>> I've previously noticed something with the sscanf format[3].
>>
>>> Perhaps TZ should be reset to UTC before the bail out?
>>
>> I don't think the implementation should fall back to UTC whenever parsing
>> failed. It apparently doesn't in glibc. I'm not sure if the behavior is
>> specified somewhere.
> 
> The tzset manual page says that UTC is used if TZ cannot be parsed:
> https://man7.org/linux/man-pages/man3/tzset.3.html
> 
>>
>> Maybe resetting it before each test case is a good idea, though. That makes
>> it clearer, why the test case failed.
>>
>>> With that chunk removed, as shown below:
>>>      {"3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
>>> I still get:
>>>    Assertion failed! Expected 1647906533 to equal 1647916532. winter time, timezone = "3:33:33"
>>
>> My bad; "3:33:33" isn't a valid timezone string. It has to be prefixed by a
>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is also
>> failing.
>>
>>
>> Thanks for your effort so far!
>>
>>
>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
>> [2]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
>>
>>
>> Cheers
>> ---
>> 🙎🏻‍♂️ jdoubleu
> 
>>  From 8c698dc63f765d4a5b3a49a25850c1738279d68d Mon Sep 17 00:00:00 2001
>> From: jdoubleu <hi@jdoubleu.de>
>> Date: Mon, 11 Apr 2022 13:10:38 +0200
>> Subject: [PATCH] debug print condition values of tz string name parsing
>>
>> ---
>>   newlib/libc/time/tzset_r.c | 6 ++++--
>>   1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
>> index 9cb30b188..e20a32a62 100644
>> --- a/newlib/libc/time/tzset_r.c
>> +++ b/newlib/libc/time/tzset_r.c
>> @@ -54,9 +54,11 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
>>         ++tzenv;
>>   
>>         /* quit if no items, too few or too many chars, or no close quote '>' */
>> -      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
>> -		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
>> +      int res = sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n);
>> +      if (res <= 0 || n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n]) {
>> +        printf("parsing name: tzenv=\"%s\", res=%d, n=%d, tzenv[n] = %c\n", tzenv, res, n, tzenv[n]);
>>           return;
>> +      }
>>   
>>         ++tzenv;	/* bump for close quote '>' */
>>       }
>> -- 
>> 2.35.1
>>
> 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-12 11:19           ` jdoubleu
@ 2022-04-12 18:33             ` Brian Inglis
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
  2022-04-13 22:21               ` [PATCH] add tests for tzset(3) Brian Inglis
  0 siblings, 2 replies; 35+ messages in thread
From: Brian Inglis @ 2022-04-12 18:33 UTC (permalink / raw)
  To: Newlib

On 2022-04-12 05:19, jdoubleu wrote:
> On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
>> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
>>> looks like I'm running the testsuite against glibc and not newlib
>>> (for target x86_64-pc-linux-gnu). I'm not even sure whether
>>> there's a backend for linux.
>>> I'm currently trying to run only the tzset code against the test
>>> vectors (like Brian Inglis did[1]).
>>> At least it show, that the newlib implementation differs from glibc. 
>>> Maybe the test case is flawed and it should indeed fail.

>>>> 6:20:12 is the timezone string of the previous test case, whose tzset
>>>> call was successful. Looking at the current code, this is expected
>>>> behaviour.

>>> Okay. Looks like the condition[2] fails. The question is, which
>>> part of it does?
>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>> maximum parsed string limit of 10 has been reached and n=10.

>>> I've appended a patch, which prints all variables when the condition 
>>> fails.
>>> Could you please apply the patch and then recompile and re-run the tests
>>> again?

>> Here is the result for arm-none-eabi:
>> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10, tzenv[n] 
>> = 9
>> Assertion failed! Expected 1647906533 to equal 1647916532. winter 
>> time, timezone = "<+0123456789ABCDEF>3:33:33"
>> I assume you no longer need assembly output from compiler?

>>> I've previously noticed something with the sscanf format[3].
>>>> Perhaps TZ should be reset to UTC before the bail out?

>>> I don't think the implementation should fall back to UTC
>>> whenever parsing failed. It apparently doesn't in glibc. I'm not
>>> sure if the behavior is specified somewhere.

>> The tzset manual page says that UTC is used if TZ cannot be parsed:
>> https://man7.org/linux/man-pages/man3/tzset.3.html

>>> Maybe resetting it before each test case is a good idea, though.
>>> That makes it clearer, why the test case failed.
>>>> With that chunk removed, as shown below:
>>>>      {"3:33:33",               IN_SECONDS(3, 33, 33),   
>>>> NO_TIME},                 // truncates the name (17 + 1)
>>>> I still get:
>>>>    Assertion failed! Expected 1647906533 to equal 1647916532. winter 
>>>> time, timezone = "3:33:33"

>>> My bad; "3:33:33" isn't a valid timezone string. It has to be 
>>> prefixed by a
>>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is also
>>> failing.

>>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
>>> [2]: 
>>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
>>>
>>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html

>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>> maximum parsed string limit of 10 has been reached and n=10.

> Okay, so the newlib implementation actually fails, when the name is
> too long.
> The POSIX standard leaves it up to the implementation how to handle 
> names[1]:

>> The interpretation of these fields is unspecified if either field is 
>> less than three bytes (except for the case when dst is missing), more 
>> than {TZNAME_MAX} bytes, or if they contain characters other than 
>> those specified.

> I'm not sure where to go with this. In both cases, the current 
> implementation needs a fix. Looping in Brian Inglis, the writer of the 
> current implementation and Jeff Johnston, a maintainer.
> As I see it, there are two solutions:
> 1. Treat long names as an error case. The failed condition[2] needs to 
> apply UTC as current time, like Dimitar Dimitrov previously suggested. 
> The test needs to be changed to check that it actually fails.
> 2. Mimic glibc's behavior and ignore the remaining characters from the 
> name. In that case, the condition[2] and following code needs to be 
> updated as well.
> I am happy to provide a patch, when we agreed on one solution.

>> I assume you no longer need assembly output from compiler?

> No, thanks. We already found the issue. It would only be helpful, if 
> different configurations generate different code (through macros, 
> optimization, etc.).
> [1]:https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
> [2]:https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;hb=HEAD#l57 

Does anyone know what glibc or the BSDs do on short < 3 char 
abbreviations, or the BSDs do on > LIMIT abbreviations?
What do they do on errors - nothing or revert to UTC?

Should tzset_r allow 1...2 char abbreviations or treat them as errors?

Should tzset_r allow > 10 char abbreviations, ignore and skip extra 
chars, or treat them as errors?

If it ignores and skips extra or accepts too few characters, then 
presumably it should process the remaining fields if present?

POSIX tzset_r says only that it overrides the default time zone, so is 
there a spec for the default time zone, or is it implementation defined 
(e.g. /etc/timezone /etc/localtime)?

Should tzset_r revert to UTC on errors, or just not change the TZ, and 
leave it up to the application to handle the error?

-- 
Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada

This email may be disturbing to some readers as it contains
too much technical detail. Reader discretion is advised.
[Data in binary units and prefixes, physical quantities in SI.]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
                                   ` (2 preceding siblings ...)
  2022-04-08 19:11                 ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Jeff Johnston
@ 2022-04-13 17:53                 ` Brian Inglis
  2022-04-13 20:33                   ` Jeff Johnston
  3 siblings, 1 reply; 35+ messages in thread
From: Brian Inglis @ 2022-04-13 17:53 UTC (permalink / raw)
  To: Newlib

On 2022-04-12 12:33, Brian Inglis wrote:
> On 2022-04-12 05:19, jdoubleu wrote:
>> On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
>>> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
>>>> looks like I'm running the testsuite against glibc and not newlib
>>>> (for target x86_64-pc-linux-gnu). I'm not even sure whether
>>>> there's a backend for linux.
>>>> I'm currently trying to run only the tzset code against the test
>>>> vectors (like Brian Inglis did[1]).
>>>> At least it show, that the newlib implementation differs from glibc. 
>>>> Maybe the test case is flawed and it should indeed fail.
> 
>>>>> 6:20:12 is the timezone string of the previous test case, whose tzset
>>>>> call was successful. Looking at the current code, this is expected
>>>>> behaviour.
> 
>>>> Okay. Looks like the condition[2] fails. The question is, which
>>>> part of it does?
>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>> maximum parsed string limit of 10 has been reached and n=10.
> 
>>>> I've appended a patch, which prints all variables when the condition 
>>>> fails.
>>>> Could you please apply the patch and then recompile and re-run the 
>>>> tests
>>>> again?
> 
>>> Here is the result for arm-none-eabi:
>>> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10, 
>>> tzenv[n] = 9
>>> Assertion failed! Expected 1647906533 to equal 1647916532. winter 
>>> time, timezone = "<+0123456789ABCDEF>3:33:33"
>>> I assume you no longer need assembly output from compiler?
> 
>>>> I've previously noticed something with the sscanf format[3].
>>>>> Perhaps TZ should be reset to UTC before the bail out?
> 
>>>> I don't think the implementation should fall back to UTC
>>>> whenever parsing failed. It apparently doesn't in glibc. I'm not
>>>> sure if the behavior is specified somewhere.
> 
>>> The tzset manual page says that UTC is used if TZ cannot be parsed:
>>> https://man7.org/linux/man-pages/man3/tzset.3.html
> 
>>>> Maybe resetting it before each test case is a good idea, though.
>>>> That makes it clearer, why the test case failed.
>>>>> With that chunk removed, as shown below:
>>>>>      {"3:33:33",               IN_SECONDS(3, 33, 33), 
>>>>> NO_TIME},                 // truncates the name (17 + 1)
>>>>> I still get:
>>>>>    Assertion failed! Expected 1647906533 to equal 1647916532. 
>>>>> winter time, timezone = "3:33:33"
> 
>>>> My bad; "3:33:33" isn't a valid timezone string. It has to be 
>>>> prefixed by a
>>>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is also
>>>> failing.
> 
>>>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
>>>> [2]: 
>>>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
>>>>
>>>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
> 
>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>> maximum parsed string limit of 10 has been reached and n=10.
> 
>> Okay, so the newlib implementation actually fails, when the name is
>> too long.
>> The POSIX standard leaves it up to the implementation how to handle 
>> names[1]:
> 
>>> The interpretation of these fields is unspecified if either field is 
>>> less than three bytes (except for the case when dst is missing), more 
>>> than {TZNAME_MAX} bytes, or if they contain characters other than 
>>> those specified.
> 
>> I'm not sure where to go with this. In both cases, the current 
>> implementation needs a fix. Looping in Brian Inglis, the writer of the 
>> current implementation and Jeff Johnston, a maintainer.
>> As I see it, there are two solutions:
>> 1. Treat long names as an error case. The failed condition[2] needs to 
>> apply UTC as current time, like Dimitar Dimitrov previously suggested. 
>> The test needs to be changed to check that it actually fails.
>> 2. Mimic glibc's behavior and ignore the remaining characters from the 
>> name. In that case, the condition[2] and following code needs to be 
>> updated as well.
>> I am happy to provide a patch, when we agreed on one solution.
> 
>>> I assume you no longer need assembly output from compiler?
> 
>> No, thanks. We already found the issue. It would only be helpful, if 
>> different configurations generate different code (through macros, 
>> optimization, etc.).
>> [1]:https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
>> [2]:https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;hb=HEAD#l57 

> Does anyone know what glibc or the BSDs do on short < 3 char 
> abbreviations, or the BSDs do on > LIMIT abbreviations?
> What do they do on errors - nothing or revert to UTC?
> 
> Should tzset_r allow 1...2 char abbreviations or treat them as errors?
> 
> Should tzset_r allow > 10 char abbreviations, ignore and skip extra 
> chars, or treat them as errors?

Should tzset_r accept characters within quotes not in the allowed 
character set, ignore and skip them, or reject such abbreviations?

> If it ignores and skips extra or accepts too few characters, then 
> presumably it should process the remaining fields if present?
> 
> POSIX tzset_r says only that it overrides the default time zone, so is 
> there a spec for the default time zone, or is it implementation defined 
> (e.g. /etc/timezone /etc/localtime)?
> 
> Should tzset_r revert to UTC on errors, or just not change the TZ, and 
> leave it up to the application to handle the error?

-- 
Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada

This email may be disturbing to some readers as it contains
too much technical detail. Reader discretion is advised.
[Data in binary units and prefixes, physical quantities in SI.]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-13 17:53                 ` [PATCH] add tests for tzset(3) Brian Inglis
@ 2022-04-13 20:33                   ` Jeff Johnston
  2022-04-13 22:19                     ` Brian Inglis
  0 siblings, 1 reply; 35+ messages in thread
From: Jeff Johnston @ 2022-04-13 20:33 UTC (permalink / raw)
  To: Newlib

Looking at the glibc tzset code I have locally (not latest/greatest, but
does support angle brackets):

If there any parse failures, UTC is defaulted.

Extraneous characters inside brackets or less than 3 characters is a parse
failure.

Glibc parses the tz name string char by char and allocates space for the
name strings so there is no max size.  I am fine if you want to mandate a
maximum,
but if you do, then too many chars should be treated as a failure.  If you
aren't certain of the limit, make the limit higher than you expect.
If people run into max limit with reasonable timezone format strings, then
we can up the limit.

-- Jeff J.

On Wed, Apr 13, 2022 at 1:53 PM Brian Inglis <
Brian.Inglis@systematicsw.ab.ca> wrote:

> On 2022-04-12 12:33, Brian Inglis wrote:
> > On 2022-04-12 05:19, jdoubleu wrote:
> >> On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
> >>> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
> >>>> looks like I'm running the testsuite against glibc and not newlib
> >>>> (for target x86_64-pc-linux-gnu). I'm not even sure whether
> >>>> there's a backend for linux.
> >>>> I'm currently trying to run only the tzset code against the test
> >>>> vectors (like Brian Inglis did[1]).
> >>>> At least it show, that the newlib implementation differs from glibc.
> >>>> Maybe the test case is flawed and it should indeed fail.
> >
> >>>>> 6:20:12 is the timezone string of the previous test case, whose tzset
> >>>>> call was successful. Looking at the current code, this is expected
> >>>>> behaviour.
> >
> >>>> Okay. Looks like the condition[2] fails. The question is, which
> >>>> part of it does?
> >>> I believe it is the ('>' != tzenv[n]) check that fails, because the
> >>> maximum parsed string limit of 10 has been reached and n=10.
> >
> >>>> I've appended a patch, which prints all variables when the condition
> >>>> fails.
> >>>> Could you please apply the patch and then recompile and re-run the
> >>>> tests
> >>>> again?
> >
> >>> Here is the result for arm-none-eabi:
> >>> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10,
> >>> tzenv[n] = 9
> >>> Assertion failed! Expected 1647906533 to equal 1647916532. winter
> >>> time, timezone = "<+0123456789ABCDEF>3:33:33"
> >>> I assume you no longer need assembly output from compiler?
> >
> >>>> I've previously noticed something with the sscanf format[3].
> >>>>> Perhaps TZ should be reset to UTC before the bail out?
> >
> >>>> I don't think the implementation should fall back to UTC
> >>>> whenever parsing failed. It apparently doesn't in glibc. I'm not
> >>>> sure if the behavior is specified somewhere.
> >
> >>> The tzset manual page says that UTC is used if TZ cannot be parsed:
> >>> https://man7.org/linux/man-pages/man3/tzset.3.html
> >
> >>>> Maybe resetting it before each test case is a good idea, though.
> >>>> That makes it clearer, why the test case failed.
> >>>>> With that chunk removed, as shown below:
> >>>>>      {"3:33:33",               IN_SECONDS(3, 33, 33),
> >>>>> NO_TIME},                 // truncates the name (17 + 1)
> >>>>> I still get:
> >>>>>    Assertion failed! Expected 1647906533 to equal 1647916532.
> >>>>> winter time, timezone = "3:33:33"
> >
> >>>> My bad; "3:33:33" isn't a valid timezone string. It has to be
> >>>> prefixed by a
> >>>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is
> also
> >>>> failing.
> >
> >>>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
> >>>> [2]:
> >>>>
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> >>>>
> >>>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
> >
> >>> I believe it is the ('>' != tzenv[n]) check that fails, because the
> >>> maximum parsed string limit of 10 has been reached and n=10.
> >
> >> Okay, so the newlib implementation actually fails, when the name is
> >> too long.
> >> The POSIX standard leaves it up to the implementation how to handle
> >> names[1]:
> >
> >>> The interpretation of these fields is unspecified if either field is
> >>> less than three bytes (except for the case when dst is missing), more
> >>> than {TZNAME_MAX} bytes, or if they contain characters other than
> >>> those specified.
> >
> >> I'm not sure where to go with this. In both cases, the current
> >> implementation needs a fix. Looping in Brian Inglis, the writer of the
> >> current implementation and Jeff Johnston, a maintainer.
> >> As I see it, there are two solutions:
> >> 1. Treat long names as an error case. The failed condition[2] needs to
> >> apply UTC as current time, like Dimitar Dimitrov previously suggested.
> >> The test needs to be changed to check that it actually fails.
> >> 2. Mimic glibc's behavior and ignore the remaining characters from the
> >> name. In that case, the condition[2] and following code needs to be
> >> updated as well.
> >> I am happy to provide a patch, when we agreed on one solution.
> >
> >>> I assume you no longer need assembly output from compiler?
> >
> >> No, thanks. We already found the issue. It would only be helpful, if
> >> different configurations generate different code (through macros,
> >> optimization, etc.).
> >> [1]:
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> >> [2]:
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;hb=HEAD#l57
>
> > Does anyone know what glibc or the BSDs do on short < 3 char
> > abbreviations, or the BSDs do on > LIMIT abbreviations?
> > What do they do on errors - nothing or revert to UTC?
> >
> > Should tzset_r allow 1...2 char abbreviations or treat them as errors?
> >
> > Should tzset_r allow > 10 char abbreviations, ignore and skip extra
> > chars, or treat them as errors?
>
> Should tzset_r accept characters within quotes not in the allowed
> character set, ignore and skip them, or reject such abbreviations?
>
> > If it ignores and skips extra or accepts too few characters, then
> > presumably it should process the remaining fields if present?
> >
> > POSIX tzset_r says only that it overrides the default time zone, so is
> > there a spec for the default time zone, or is it implementation defined
> > (e.g. /etc/timezone /etc/localtime)?
> >
> > Should tzset_r revert to UTC on errors, or just not change the TZ, and
> > leave it up to the application to handle the error?
>
> --
> Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada
>
> This email may be disturbing to some readers as it contains
> too much technical detail. Reader discretion is advised.
> [Data in binary units and prefixes, physical quantities in SI.]
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-13 20:33                   ` Jeff Johnston
@ 2022-04-13 22:19                     ` Brian Inglis
  2022-04-14  8:59                       ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Brian Inglis @ 2022-04-13 22:19 UTC (permalink / raw)
  To: newlib

On 2022-04-13 14:33, Jeff Johnston wrote:
> Looking at the glibc tzset code I have locally (not latest/greatest, but
> does support angle brackets):
> 
> If there any parse failures, UTC is defaulted.

We currently leave the time zone info unchanged.

> Extraneous characters inside brackets or less than 3 characters is a
> parse failure.
✔ Check	✔ Check

> Glibc parses the tz name string char by char and allocates space for
> the name strings so there is no max size.

The suggestion was that glibc ignores the remaining characters, but you 
imply that glibc in fact uses the equivalent of the scanf "%m[...]" 
(malloc) modifier, and I think using that would be against the newlib 
philosophy to keep things limited and under control to support small 
targets.  Larger targets like Cygwin (do our own thing including 
zoneinfo), and perhaps RTEMS, can supply their own enhancements.

> the name strings so there is no max size.  I am fine if you want to 
> mandate a maximum, but if you do, then too many chars should be 
> treated as a failure.  If you aren't certain of the limit, make the 
> limit higher than you expect.
Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the most 
ever likely to be used. It might be reasonable to bump it up to say 15.

> If people run into max limit with reasonable timezone format strings, then
> we can up the limit.

The conditions are more or less what is implemented, but we could do 
with a couple more tweaks to improve things, like check for more or 
extraneous chars within the bracket quotes, and that no characters 
remain unconsumed at the end of the parse.

Context below are latest posts under thread "add test for tzset(3)".


> On Wed, Apr 13, 2022 at 1:53 PM Brian Inglis wrote:
>> On 2022-04-12 12:33, Brian Inglis wrote:
>>> On 2022-04-12 05:19, jdoubleu wrote:
>>>> On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
>>>>> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
>>>>>> looks like I'm running the testsuite against glibc and not newlib
>>>>>> (for target x86_64-pc-linux-gnu). I'm not even sure whether
>>>>>> there's a backend for linux.
>>>>>> I'm currently trying to run only the tzset code against the test
>>>>>> vectors (like Brian Inglis did[1]).
>>>>>> At least it show, that the newlib implementation differs from glibc.
>>>>>> Maybe the test case is flawed and it should indeed fail.
>>>
>>>>>>> 6:20:12 is the timezone string of the previous test case, whose tzset
>>>>>>> call was successful. Looking at the current code, this is expected
>>>>>>> behaviour.
>>>
>>>>>> Okay. Looks like the condition[2] fails. The question is, which
>>>>>> part of it does?
>>>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>>>> maximum parsed string limit of 10 has been reached and n=10.
>>>
>>>>>> I've appended a patch, which prints all variables when the condition
>>>>>> fails.
>>>>>> Could you please apply the patch and then recompile and re-run the
>>>>>> tests
>>>>>> again?
>>>
>>>>> Here is the result for arm-none-eabi:
>>>>> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10,
>>>>> tzenv[n] = 9
>>>>> Assertion failed! Expected 1647906533 to equal 1647916532. winter
>>>>> time, timezone = "<+0123456789ABCDEF>3:33:33"
>>>>> I assume you no longer need assembly output from compiler?
>>>
>>>>>> I've previously noticed something with the sscanf format[3].
>>>>>>> Perhaps TZ should be reset to UTC before the bail out?
>>>
>>>>>> I don't think the implementation should fall back to UTC
>>>>>> whenever parsing failed. It apparently doesn't in glibc. I'm not
>>>>>> sure if the behavior is specified somewhere.
>>>
>>>>> The tzset manual page says that UTC is used if TZ cannot be parsed:
>>>>> https://man7.org/linux/man-pages/man3/tzset.3.html
>>>
>>>>>> Maybe resetting it before each test case is a good idea, though.
>>>>>> That makes it clearer, why the test case failed.
>>>>>>> With that chunk removed, as shown below:
>>>>>>>       {"3:33:33",               IN_SECONDS(3, 33, 33),
>>>>>>> NO_TIME},                 // truncates the name (17 + 1)
>>>>>>> I still get:
>>>>>>>     Assertion failed! Expected 1647906533 to equal 1647916532.
>>>>>>> winter time, timezone = "3:33:33"
>>>
>>>>>> My bad; "3:33:33" isn't a valid timezone string. It has to be
>>>>>> prefixed by a
>>>>>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is
>> also
>>>>>> failing.
>>>
>>>>>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
>>>>>> [2]:
>>>>>>
>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
>>>>>>
>>>>>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
>>>
>>>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>>>> maximum parsed string limit of 10 has been reached and n=10.
>>>
>>>> Okay, so the newlib implementation actually fails, when the name is
>>>> too long.
>>>> The POSIX standard leaves it up to the implementation how to handle
>>>> names[1]:
>>>
>>>>> The interpretation of these fields is unspecified if either field is
>>>>> less than three bytes (except for the case when dst is missing), more
>>>>> than {TZNAME_MAX} bytes, or if they contain characters other than
>>>>> those specified.
>>>
>>>> I'm not sure where to go with this. In both cases, the current
>>>> implementation needs a fix. Looping in Brian Inglis, the writer of the
>>>> current implementation and Jeff Johnston, a maintainer.
>>>> As I see it, there are two solutions:
>>>> 1. Treat long names as an error case. The failed condition[2] needs to
>>>> apply UTC as current time, like Dimitar Dimitrov previously suggested.
>>>> The test needs to be changed to check that it actually fails.
>>>> 2. Mimic glibc's behavior and ignore the remaining characters from the
>>>> name. In that case, the condition[2] and following code needs to be
>>>> updated as well.
>>>> I am happy to provide a patch, when we agreed on one solution.
>>>
>>>>> I assume you no longer need assembly output from compiler?
>>>
>>>> No, thanks. We already found the issue. It would only be helpful, if
>>>> different configurations generate different code (through macros,
>>>> optimization, etc.).
>>>> [1]:
>> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
>>>> [2]:
>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;hb=HEAD#l57
>>
>>> Does anyone know what glibc or the BSDs do on short < 3 char
>>> abbreviations, or the BSDs do on > LIMIT abbreviations?
>>> What do they do on errors - nothing or revert to UTC?
>>>
>>> Should tzset_r allow 1...2 char abbreviations or treat them as errors?
>>>
>>> Should tzset_r allow > 10 char abbreviations, ignore and skip extra
>>> chars, or treat them as errors?
>>
>> Should tzset_r accept characters within quotes not in the allowed
>> character set, ignore and skip them, or reject such abbreviations?
>>
>>> If it ignores and skips extra or accepts too few characters, then
>>> presumably it should process the remaining fields if present?
>>>
>>> POSIX tzset_r says only that it overrides the default time zone, so is
>>> there a spec for the default time zone, or is it implementation defined
>>> (e.g. /etc/timezone /etc/localtime)?
>>>
>>> Should tzset_r revert to UTC on errors, or just not change the TZ, and
>>> leave it up to the application to handle the error?

-- 
Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada

This email may be disturbing to some readers as it contains
too much technical detail. Reader discretion is advised.
[Data in binary units and prefixes, physical quantities in SI.]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-12 18:33             ` Brian Inglis
  2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
@ 2022-04-13 22:21               ` Brian Inglis
  1 sibling, 0 replies; 35+ messages in thread
From: Brian Inglis @ 2022-04-13 22:21 UTC (permalink / raw)
  To: newlib

See [PATCH v2 ...] thread for JJ response and my reply!

On 2022-04-12 12:33, Brian Inglis wrote:
> On 2022-04-12 05:19, jdoubleu wrote:
>> On 4/11/2022 7:27 PM, Dimitar Dimitrov wrote:
>>> On Mon, Apr 11, 2022 at 01:17:16PM +0200, jdoubleu wrote:
>>>> looks like I'm running the testsuite against glibc and not newlib
>>>> (for target x86_64-pc-linux-gnu). I'm not even sure whether
>>>> there's a backend for linux.
>>>> I'm currently trying to run only the tzset code against the test
>>>> vectors (like Brian Inglis did[1]).
>>>> At least it show, that the newlib implementation differs from glibc. 
>>>> Maybe the test case is flawed and it should indeed fail.
> 
>>>>> 6:20:12 is the timezone string of the previous test case, whose tzset
>>>>> call was successful. Looking at the current code, this is expected
>>>>> behaviour.
> 
>>>> Okay. Looks like the condition[2] fails. The question is, which
>>>> part of it does?
>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>> maximum parsed string limit of 10 has been reached and n=10.
> 
>>>> I've appended a patch, which prints all variables when the condition 
>>>> fails.
>>>> Could you please apply the patch and then recompile and re-run the 
>>>> tests
>>>> again?
> 
>>> Here is the result for arm-none-eabi:
>>> parsing name: tzenv="+0123456789ABCDEF>3:33:33", res=1, n=10, 
>>> tzenv[n] = 9
>>> Assertion failed! Expected 1647906533 to equal 1647916532. winter 
>>> time, timezone = "<+0123456789ABCDEF>3:33:33"
>>> I assume you no longer need assembly output from compiler?
> 
>>>> I've previously noticed something with the sscanf format[3].
>>>>> Perhaps TZ should be reset to UTC before the bail out?
> 
>>>> I don't think the implementation should fall back to UTC
>>>> whenever parsing failed. It apparently doesn't in glibc. I'm not
>>>> sure if the behavior is specified somewhere.
> 
>>> The tzset manual page says that UTC is used if TZ cannot be parsed:
>>> https://man7.org/linux/man-pages/man3/tzset.3.html
> 
>>>> Maybe resetting it before each test case is a good idea, though.
>>>> That makes it clearer, why the test case failed.
>>>>> With that chunk removed, as shown below:
>>>>>      {"3:33:33",               IN_SECONDS(3, 33, 33), 
>>>>> NO_TIME},                 // truncates the name (17 + 1)
>>>>> I still get:
>>>>>    Assertion failed! Expected 1647906533 to equal 1647916532. 
>>>>> winter time, timezone = "3:33:33"
> 
>>>> My bad; "3:33:33" isn't a valid timezone string. It has to be 
>>>> prefixed by a
>>>> name e.g. "MESZ" or "<+00>", as you tried. That explains why it is also
>>>> failing.
> 
>>>> [1]: https://sourceware.org/pipermail/newlib/2022/019529.html
>>>> [2]: 
>>>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
>>>>
>>>> [3]: https://sourceware.org/pipermail/newlib/2022/019535.html
> 
>>> I believe it is the ('>' != tzenv[n]) check that fails, because the
>>> maximum parsed string limit of 10 has been reached and n=10.
> 
>> Okay, so the newlib implementation actually fails, when the name is
>> too long.
>> The POSIX standard leaves it up to the implementation how to handle 
>> names[1]:
> 
>>> The interpretation of these fields is unspecified if either field is 
>>> less than three bytes (except for the case when dst is missing), more 
>>> than {TZNAME_MAX} bytes, or if they contain characters other than 
>>> those specified.
> 
>> I'm not sure where to go with this. In both cases, the current 
>> implementation needs a fix. Looping in Brian Inglis, the writer of the 
>> current implementation and Jeff Johnston, a maintainer.
>> As I see it, there are two solutions:
>> 1. Treat long names as an error case. The failed condition[2] needs to 
>> apply UTC as current time, like Dimitar Dimitrov previously suggested. 
>> The test needs to be changed to check that it actually fails.
>> 2. Mimic glibc's behavior and ignore the remaining characters from the 
>> name. In that case, the condition[2] and following code needs to be 
>> updated as well.
>> I am happy to provide a patch, when we agreed on one solution.
> 
>>> I assume you no longer need assembly output from compiler?
> 
>> No, thanks. We already found the issue. It would only be helpful, if 
>> different configurations generate different code (through macros, 
>> optimization, etc.).
>> [1]:https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
>> [2]:https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;hb=HEAD#l57 
> 
> 
> Does anyone know what glibc or the BSDs do on short < 3 char 
> abbreviations, or the BSDs do on > LIMIT abbreviations?
> What do they do on errors - nothing or revert to UTC?
> 
> Should tzset_r allow 1...2 char abbreviations or treat them as errors?
> 
> Should tzset_r allow > 10 char abbreviations, ignore and skip extra 
> chars, or treat them as errors?
> 
> If it ignores and skips extra or accepts too few characters, then 
> presumably it should process the remaining fields if present?
> 
> POSIX tzset_r says only that it overrides the default time zone, so is 
> there a spec for the default time zone, or is it implementation defined 
> (e.g. /etc/timezone /etc/localtime)?
> 
> Should tzset_r revert to UTC on errors, or just not change the TZ, and 
> leave it up to the application to handle the error?
> 


-- 
Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada

This email may be disturbing to some readers as it contains
too much technical detail. Reader discretion is advised.
[Data in binary units and prefixes, physical quantities in SI.]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-13 22:19                     ` Brian Inglis
@ 2022-04-14  8:59                       ` jdoubleu
  2022-04-14 16:31                         ` Brian Inglis
  2022-05-14 14:39                         ` jdoubleu
  0 siblings, 2 replies; 35+ messages in thread
From: jdoubleu @ 2022-04-14  8:59 UTC (permalink / raw)
  To: newlib

On 2022-04-13 14:33, Jeff Johnston wrote:
> Looking at the glibc tzset code I have locally (not latest/greatest, but
> does support angle brackets): 

I can confirm the behavior with glibc[1]. As it turns out, glibc does 
not directly impose a character limit on the timezone name, but requires 
at least 3 characters. From the man page[2]:

> The std string specifies an abbreviation for the timezone and must be
> three or more alphabetic characters.

To my misunderstanding, they don't even ignore remaining characters, but 
keep all of them, as you can see in the output[1] and Jeff Johnston 
explained.

> but you imply that glibc in fact uses the equivalent of the scanf "%m[...]" (malloc) > modifier, and I think using that would be against the newlib 
philosophy to keep things
> limited and under control to support small targets.

I agree, newlib SHOULD impose a limit. Especially, since the POSIX 
standard[3] already introduces an upper limit, though unspecified.

The current limit is 11 characters, if I'm not mistaken. The longest 
name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted 
names[5]). All others usually are 3 or 4 chars long.

That said, I think 11 is reasonably large enough.

However, it could be helpful to get the limit from user-code, because 
there is no error reporting mechanism used. Right now, the limit is only 
defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not 
forget here is to keep limit in sync with the sscanf format's maximum 
field width[7].


To summarize, the following cases are errors:
1. name is too short (less than 3 chars)
2. name is too long (more than TZNAME_MAX)
3. name includes arbitrary chars (not <>+-ALPHANUM)
In all of these error cases, the time should be set back to UTC, right?


I'm going to prepare some test cases for the test suite to check for the 
errors as well.


[1]: https://godbolt.org/z/o93zo3qxv
[2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
[3]: 
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
[4]: https://github.com/eggert/tz
[5]: 
https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
[6]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13
[7]: 
https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57



Cheers
---
🙎🏻‍♂️ jdoubleu
On 4/14/2022 12:19 AM, Brian Inglis wrote:
> On 2022-04-13 14:33, Jeff Johnston wrote:
>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>> does support angle brackets):
>>
>> If there any parse failures, UTC is defaulted.
> 
> We currently leave the time zone info unchanged.
> 
>> Extraneous characters inside brackets or less than 3 characters is a
>> parse failure.
> ✔ Check    ✔ Check
> 
>> Glibc parses the tz name string char by char and allocates space for
>> the name strings so there is no max size.
> 
> The suggestion was that glibc ignores the remaining characters, but you 
> imply that glibc in fact uses the equivalent of the scanf "%m[...]" 
> (malloc) modifier, and I think using that would be against the newlib 
> philosophy to keep things limited and under control to support small 
> targets.  Larger targets like Cygwin (do our own thing including 
> zoneinfo), and perhaps RTEMS, can supply their own enhancements.
> 
>> the name strings so there is no max size.  I am fine if you want to 
>> mandate a maximum, but if you do, then too many chars should be 
>> treated as a failure.  If you aren't certain of the limit, make the 
>> limit higher than you expect.
> Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the most 
> ever likely to be used. It might be reasonable to bump it up to say 15.
> 
>> If people run into max limit with reasonable timezone format strings, 
>> then
>> we can up the limit.
> 
> The conditions are more or less what is implemented, but we could do 
> with a couple more tweaks to improve things, like check for more or 
> extraneous chars within the bracket quotes, and that no characters 
> remain unconsumed at the end of the parse.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-14  8:59                       ` jdoubleu
@ 2022-04-14 16:31                         ` Brian Inglis
  2022-04-14 19:23                           ` Jeff Johnston
  2022-05-14 14:39                         ` jdoubleu
  1 sibling, 1 reply; 35+ messages in thread
From: Brian Inglis @ 2022-04-14 16:31 UTC (permalink / raw)
  To: newlib

I am still not hearing from where the requirement originates to set 
UTC/GMT/etc or do anything other than leave everything as is.
Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?

-- 
Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada

This email may be disturbing to some readers as it contains
too much technical detail. Reader discretion is advised.
[Data in binary units and prefixes, physical quantities in SI.]


On 2022-04-14 02:59, jdoubleu wrote:
> On 2022-04-13 14:33, Jeff Johnston wrote:
>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>> does support angle brackets): 
> 
> I can confirm the behavior with glibc[1]. As it turns out, glibc does 
> not directly impose a character limit on the timezone name, but requires 
> at least 3 characters. From the man page[2]:
> 
>> The std string specifies an abbreviation for the timezone and must be
>> three or more alphabetic characters.
> 
> To my misunderstanding, they don't even ignore remaining characters, but 
> keep all of them, as you can see in the output[1] and Jeff Johnston 
> explained.
> 
>> but you imply that glibc in fact uses the equivalent of the scanf 
>> "%m[...]" (malloc) > modifier, and I think using that would be against 
>> the newlib 
> philosophy to keep things
>> limited and under control to support small targets.
> 
> I agree, newlib SHOULD impose a limit. Especially, since the POSIX 
> standard[3] already introduces an upper limit, though unspecified.
> 
> The current limit is 11 characters, if I'm not mistaken. The longest 
> name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted 
> names[5]). All others usually are 3 or 4 chars long.
> 
> That said, I think 11 is reasonably large enough.
> 
> However, it could be helpful to get the limit from user-code, because 
> there is no error reporting mechanism used. Right now, the limit is only 
> defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not 
> forget here is to keep limit in sync with the sscanf format's maximum 
> field width[7].
> 
> 
> To summarize, the following cases are errors:
> 1. name is too short (less than 3 chars)
> 2. name is too long (more than TZNAME_MAX)
> 3. name includes arbitrary chars (not <>+-ALPHANUM)
> In all of these error cases, the time should be set back to UTC, right?
> 
> 
> I'm going to prepare some test cases for the test suite to check for the 
> errors as well.
> 
> 
> [1]: https://godbolt.org/z/o93zo3qxv
> [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> [3]: 
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
> 
> [4]: https://github.com/eggert/tz
> [5]: 
> https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> [6]: 
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13 
> 
> [7]: 
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
> 
> 
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/14/2022 12:19 AM, Brian Inglis wrote:
>> On 2022-04-13 14:33, Jeff Johnston wrote:
>>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>>> does support angle brackets):
>>>
>>> If there any parse failures, UTC is defaulted.
>>
>> We currently leave the time zone info unchanged.
>>
>>> Extraneous characters inside brackets or less than 3 characters is a
>>> parse failure.
>> ✔ Check    ✔ Check
>>
>>> Glibc parses the tz name string char by char and allocates space for
>>> the name strings so there is no max size.
>>
>> The suggestion was that glibc ignores the remaining characters, but 
>> you imply that glibc in fact uses the equivalent of the scanf 
>> "%m[...]" (malloc) modifier, and I think using that would be against 
>> the newlib philosophy to keep things limited and under control to 
>> support small targets.  Larger targets like Cygwin (do our own thing 
>> including zoneinfo), and perhaps RTEMS, can supply their own 
>> enhancements.
>>
>>> the name strings so there is no max size.  I am fine if you want to 
>>> mandate a maximum, but if you do, then too many chars should be 
>>> treated as a failure.  If you aren't certain of the limit, make the 
>>> limit higher than you expect.
>> Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the 
>> most ever likely to be used. It might be reasonable to bump it up to 
>> say 15.
>>
>>> If people run into max limit with reasonable timezone format strings, 
>>> then
>>> we can up the limit.
>>
>> The conditions are more or less what is implemented, but we could do 
>> with a couple more tweaks to improve things, like check for more or 
>> extraneous chars within the bracket quotes, and that no characters 
>> remain unconsumed at the end of the parse.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-14 16:31                         ` Brian Inglis
@ 2022-04-14 19:23                           ` Jeff Johnston
  2022-04-15 10:10                             ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Jeff Johnston @ 2022-04-14 19:23 UTC (permalink / raw)
  To: Newlib

On Thu, Apr 14, 2022 at 12:31 PM Brian Inglis <
Brian.Inglis@systematicsw.ab.ca> wrote:

> I am still not hearing from where the requirement originates to set
> UTC/GMT/etc or do anything other than leave everything as is.
> Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?
>
>
It is glibc behaviour as I mentioned in my note.  The following is also
from man tzset

      If the TZ variable does not appear in the environment, the tzname
vari‐
       able  is  initialized  with  the best approximation of local wall
clock
       time, as specified by the tzfile(5)-format file localtime found in
 the
       system   timezone   directory   (see  below).   (One  also  often
 sees
       /etc/localtime used here, a symlink to the right  file  in  the
 system
       timezone directory.)

       If  the  TZ  variable  does  appear in the environment but its value
is
       empty or its value cannot be interpreted using any of the formats
spec‐
       ified below, Coordinated Universal Time (UTC) is used.

Note about if its value is specified and cannot be interpreted using any of
the formats specified.
If there is an error, then that clause would apply.  In glibc's case, it is
less than 3 chars and invalid
chars.  In our case, exceeding the max limit would also apply.

From glibc: tzset.c

 /* Clear out old state and reset to unnamed UTC.  */
  memset (tz_rules, '\0', sizeof tz_rules);
  tz_rules[0].name = tz_rules[1].name = "";

  /* Get the standard timezone name.  */
  if (parse_tzname (&tz, 0) && parse_offset (&tz, 0))

If the parse_tzname fails or parsing the dst name fails, unnamed UTC is
used.

-- Jeff J.

-- 
> Take care. Thanks, Brian Inglis, Calgary, Alberta, Canada
>
> This email may be disturbing to some readers as it contains
> too much technical detail. Reader discretion is advised.
> [Data in binary units and prefixes, physical quantities in SI.]
>
>
> On 2022-04-14 02:59, jdoubleu wrote:
> > On 2022-04-13 14:33, Jeff Johnston wrote:
> >> Looking at the glibc tzset code I have locally (not latest/greatest, but
> >> does support angle brackets):
> >
> > I can confirm the behavior with glibc[1]. As it turns out, glibc does
> > not directly impose a character limit on the timezone name, but requires
> > at least 3 characters. From the man page[2]:
> >
> >> The std string specifies an abbreviation for the timezone and must be
> >> three or more alphabetic characters.
> >
> > To my misunderstanding, they don't even ignore remaining characters, but
> > keep all of them, as you can see in the output[1] and Jeff Johnston
> > explained.
> >
> >> but you imply that glibc in fact uses the equivalent of the scanf
> >> "%m[...]" (malloc) > modifier, and I think using that would be against
> >> the newlib
> > philosophy to keep things
> >> limited and under control to support small targets.
> >
> > I agree, newlib SHOULD impose a limit. Especially, since the POSIX
> > standard[3] already introduces an upper limit, though unspecified.
> >
> > The current limit is 11 characters, if I'm not mistaken. The longest
> > name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted
> > names[5]). All others usually are 3 or 4 chars long.
> >
> > That said, I think 11 is reasonably large enough.
> >
> > However, it could be helpful to get the limit from user-code, because
> > there is no error reporting mechanism used. Right now, the limit is only
> > defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not
> > forget here is to keep limit in sync with the sscanf format's maximum
> > field width[7].
> >
> >
> > To summarize, the following cases are errors:
> > 1. name is too short (less than 3 chars)
> > 2. name is too long (more than TZNAME_MAX)
> > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > In all of these error cases, the time should be set back to UTC, right?
> >
> >
> > I'm going to prepare some test cases for the test suite to check for the
> > errors as well.
> >
> >
> > [1]: https://godbolt.org/z/o93zo3qxv
> > [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> > [3]:
> >
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> >
> > [4]: https://github.com/eggert/tz
> > [5]:
> >
> https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> > [6]:
> >
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13
> >
> > [7]:
> >
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> >
> >
> >
> >
> > Cheers
> > ---
> > 🙎🏻‍♂️ jdoubleu
> > On 4/14/2022 12:19 AM, Brian Inglis wrote:
> >> On 2022-04-13 14:33, Jeff Johnston wrote:
> >>> Looking at the glibc tzset code I have locally (not latest/greatest,
> but
> >>> does support angle brackets):
> >>>
> >>> If there any parse failures, UTC is defaulted.
> >>
> >> We currently leave the time zone info unchanged.
> >>
> >>> Extraneous characters inside brackets or less than 3 characters is a
> >>> parse failure.
> >> ✔ Check    ✔ Check
> >>
> >>> Glibc parses the tz name string char by char and allocates space for
> >>> the name strings so there is no max size.
> >>
> >> The suggestion was that glibc ignores the remaining characters, but
> >> you imply that glibc in fact uses the equivalent of the scanf
> >> "%m[...]" (malloc) modifier, and I think using that would be against
> >> the newlib philosophy to keep things limited and under control to
> >> support small targets.  Larger targets like Cygwin (do our own thing
> >> including zoneinfo), and perhaps RTEMS, can supply their own
> >> enhancements.
> >>
> >>> the name strings so there is no max size.  I am fine if you want to
> >>> mandate a maximum, but if you do, then too many chars should be
> >>> treated as a failure.  If you aren't certain of the limit, make the
> >>> limit higher than you expect.
> >> Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the
> >> most ever likely to be used. It might be reasonable to bump it up to
> >> say 15.
> >>
> >>> If people run into max limit with reasonable timezone format strings,
> >>> then
> >>> we can up the limit.
> >>
> >> The conditions are more or less what is implemented, but we could do
> >> with a couple more tweaks to improve things, like check for more or
> >> extraneous chars within the bracket quotes, and that no characters
> >> remain unconsumed at the end of the parse.
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-14 19:23                           ` Jeff Johnston
@ 2022-04-15 10:10                             ` jdoubleu
  2022-04-27 19:30                               ` Jeff Johnston
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-04-15 10:10 UTC (permalink / raw)
  To: newlib

On Thu, Apr 14, 2022 at 12:31 PM Brian Inglis wrote:
> I am still not hearing from where the requirement originates to set
> UTC/GMT/etc or do anything other than leave everything as is.
> Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?

On 4/14/2022 9:23 PM, Jeff Johnston wrote:
> It is glibc behaviour as I mentioned in my note. The following is also
> from man tzset

Aside from glibc, the BSD man page[1] also states:
> If the TZ environment variable [..] cannot be interpreted as a direct specification, UTC is used.
Where "direct specification" is the formatted string.

The POSIX standard does not seem to explicitly state what happens in an 
error case, besides[2]:
> The interpretation of these fields is unspecified if either field is less than three bytes [..],  > more than {TZNAME_MAX} bytes, or if they contain characters other 
than those specified.and[3]
> If TZ is absent from the environment, implementation-defined default timezone information shall be used.

[1]: https://man.openbsd.org/tzset
[2]: 
https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
[2]: https://pubs.opengroup.org/onlinepubs/9699919799/functions/tzset.html#


Cheers
---
🙎🏻‍♂️ jdoubleu
On 4/14/2022 9:23 PM, Jeff Johnston wrote:
> On Thu, Apr 14, 2022 at 12:31 PM Brian Inglis <
> Brian.Inglis@systematicsw.ab.ca> wrote:
> 
>> I am still not hearing from where the requirement originates to set
>> UTC/GMT/etc or do anything other than leave everything as is.
>> Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?
>>
>>
> It is glibc behaviour as I mentioned in my note.  The following is also
> from man tzset
> 
>        If the TZ variable does not appear in the environment, the tzname
> vari‐
>         able  is  initialized  with  the best approximation of local wall
> clock
>         time, as specified by the tzfile(5)-format file localtime found in
>   the
>         system   timezone   directory   (see  below).   (One  also  often
>   sees
>         /etc/localtime used here, a symlink to the right  file  in  the
>   system
>         timezone directory.)
> 
>         If  the  TZ  variable  does  appear in the environment but its value
> is
>         empty or its value cannot be interpreted using any of the formats
> spec‐
>         ified below, Coordinated Universal Time (UTC) is used.
> 
> Note about if its value is specified and cannot be interpreted using any of
> the formats specified.
> If there is an error, then that clause would apply.  In glibc's case, it is
> less than 3 chars and invalid
> chars.  In our case, exceeding the max limit would also apply.
> 
>  From glibc: tzset.c
> 
>   /* Clear out old state and reset to unnamed UTC.  */
>    memset (tz_rules, '\0', sizeof tz_rules);
>    tz_rules[0].name = tz_rules[1].name = "";
> 
>    /* Get the standard timezone name.  */
>    if (parse_tzname (&tz, 0) && parse_offset (&tz, 0))
> 
> If the parse_tzname fails or parsing the dst name fails, unnamed UTC is
> used.
> 
> -- Jeff J.
> 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-15 10:10                             ` jdoubleu
@ 2022-04-27 19:30                               ` Jeff Johnston
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-04-27 19:30 UTC (permalink / raw)
  To: jdoubleu; +Cc: Newlib

[-- Attachment #1: Type: text/plain, Size: 3528 bytes --]

I have taken a stab at creating a tzset_r.c that handles errors by setting
timezone to unnamed UTC.

Let me know if there are any issues.

-- Jeff J.

On Fri, Apr 15, 2022 at 6:10 AM jdoubleu <hi@jdoubleu.de> wrote:

> On Thu, Apr 14, 2022 at 12:31 PM Brian Inglis wrote:
> > I am still not hearing from where the requirement originates to set
> > UTC/GMT/etc or do anything other than leave everything as is.
> > Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?
>
> On 4/14/2022 9:23 PM, Jeff Johnston wrote:
> > It is glibc behaviour as I mentioned in my note. The following is also
> > from man tzset
>
> Aside from glibc, the BSD man page[1] also states:
> > If the TZ environment variable [..] cannot be interpreted as a direct
> specification, UTC is used.
> Where "direct specification" is the formatted string.
>
> The POSIX standard does not seem to explicitly state what happens in an
> error case, besides[2]:
> > The interpretation of these fields is unspecified if either field is
> less than three bytes [..],  > more than {TZNAME_MAX} bytes, or if they
> contain characters other
> than those specified.and[3]
> > If TZ is absent from the environment, implementation-defined default
> timezone information shall be used.
>
> [1]: https://man.openbsd.org/tzset
> [2]:
>
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> [2]:
> https://pubs.opengroup.org/onlinepubs/9699919799/functions/tzset.html#
>
>
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/14/2022 9:23 PM, Jeff Johnston wrote:
> > On Thu, Apr 14, 2022 at 12:31 PM Brian Inglis <
> > Brian.Inglis@systematicsw.ab.ca> wrote:
> >
> >> I am still not hearing from where the requirement originates to set
> >> UTC/GMT/etc or do anything other than leave everything as is.
> >> Is this glibc behaviour, and why not /etc/localtime or /etc/timezone?
> >>
> >>
> > It is glibc behaviour as I mentioned in my note.  The following is also
> > from man tzset
> >
> >        If the TZ variable does not appear in the environment, the tzname
> > vari‐
> >         able  is  initialized  with  the best approximation of local wall
> > clock
> >         time, as specified by the tzfile(5)-format file localtime found
> in
> >   the
> >         system   timezone   directory   (see  below).   (One  also  often
> >   sees
> >         /etc/localtime used here, a symlink to the right  file  in  the
> >   system
> >         timezone directory.)
> >
> >         If  the  TZ  variable  does  appear in the environment but its
> value
> > is
> >         empty or its value cannot be interpreted using any of the formats
> > spec‐
> >         ified below, Coordinated Universal Time (UTC) is used.
> >
> > Note about if its value is specified and cannot be interpreted using any
> of
> > the formats specified.
> > If there is an error, then that clause would apply.  In glibc's case, it
> is
> > less than 3 chars and invalid
> > chars.  In our case, exceeding the max limit would also apply.
> >
> >  From glibc: tzset.c
> >
> >   /* Clear out old state and reset to unnamed UTC.  */
> >    memset (tz_rules, '\0', sizeof tz_rules);
> >    tz_rules[0].name = tz_rules[1].name = "";
> >
> >    /* Get the standard timezone name.  */
> >    if (parse_tzname (&tz, 0) && parse_offset (&tz, 0))
> >
> > If the parse_tzname fails or parsing the dst name fails, unnamed UTC is
> > used.
> >
> > -- Jeff J.
> >
>
>

[-- Attachment #2: 0001-Modify-tzset_r.c-to-handle-errors.patch --]
[-- Type: text/x-patch, Size: 4523 bytes --]

From 0b5e905696ec12e6408e9330ef4e87ac6a57751d Mon Sep 17 00:00:00 2001
From: Jeff Johnston <jjohnstn@redhat.com>
Date: Wed, 27 Apr 2022 15:27:00 -0400
Subject: [PATCH] Modify tzset_r.c to handle errors

- change __tzset_r so errors end up setting the timezone to
  unnamed UTC
---
 newlib/libc/time/tzset_r.c | 56 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
index 9cb30b1..4d4baff 100644
--- a/newlib/libc/time/tzset_r.c
+++ b/newlib/libc/time/tzset_r.c
@@ -23,6 +23,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   unsigned short hh, mm, ss, m, w, d;
   int sign, n;
   int i, ch;
+  long offset0, offset1;
   __tzinfo_type *tz = __gettzinfo ();
 
   if ((tzenv = _getenv_r (reent_ptr, "TZ")) == NULL)
@@ -44,6 +45,12 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (prev_tzenv != NULL)
     strcpy (prev_tzenv, tzenv);
 
+  /* default to unnamed UTC in case of error */
+  _timezone = 0;
+  _daylight = 0;
+  _tzname[0] = "";
+  _tzname[1] = "";
+
   /* ignore implementation-specific format specifier */
   if (*tzenv == ':')
     ++tzenv;  
@@ -85,8 +92,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) < 1)
     return;
   
-  tz->__tzrule[0].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
-  _tzname[0] = __tzname_std;
+  offset0 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
   tzenv += n;
 
   /* allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330> */
@@ -95,12 +101,16 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       ++tzenv;
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
-      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
-		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0 && tzenv[0] == '>')
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+	{ /* error */
 	  return;
 	}
 
@@ -109,17 +119,20 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   else
     {
       /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
-      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0
-				|| n < TZNAME_MIN || TZNAME_MAX < n)
+      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0)
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n)
+	{ /* error */
 	  return;
 	}
     }
 
-  _tzname[1] = __tzname_dst;
   tzenv += n;
 
   /* otherwise we have a dst name, look for the offset */
@@ -138,9 +151,9 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   
   n  = 0;
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
-    tz->__tzrule[1].offset = tz->__tzrule[0].offset - 3600;
+    offset1 = offset0 - 3600;
   else
-    tz->__tzrule[1].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
+    offset1 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
 
   tzenv += n;
 
@@ -211,13 +224,24 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       n = 0;
       
       if (*tzenv == '/')
-	sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n);
+	if (sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
+	  {
+	    /* error in time format, restore tz rules to default and return */
+	    struct __tzrule_struct default_tzrule = {'J', 0, 0, 0, 0, (time_t)0, 0L };
+	    tz->__tzrule[0] = default_tzrule;
+	    tz->__tzrule[1] = default_tzrule;
+            return;
+          }
 
       tz->__tzrule[i].s = ss + SECSPERMIN * mm + SECSPERHOUR  * hh;
       
       tzenv += n;
     }
 
+  tz->__tzrule[0].offset = offset0;
+  tz->__tzrule[1].offset = offset1;
+  _tzname[0] = __tzname_std;
+  _tzname[1] = __tzname_dst;
   __tzcalc_limits (tz->__tzyear);
   _timezone = tz->__tzrule[0].offset;  
   _daylight = tz->__tzrule[0].offset != tz->__tzrule[1].offset;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-14  8:59                       ` jdoubleu
  2022-04-14 16:31                         ` Brian Inglis
@ 2022-05-14 14:39                         ` jdoubleu
  2022-05-16 16:05                           ` Dimitar Dimitrov
  2022-05-17  8:45                           ` [PATCH] update tzset tests jdoubleu
  1 sibling, 2 replies; 35+ messages in thread
From: jdoubleu @ 2022-05-14 14:39 UTC (permalink / raw)
  To: newlib

> To summarize, the following cases are errors:
> 1. name is too short (less than 3 chars)
> 2. name is too long (more than TZNAME_MAX)
> 3. name includes arbitrary chars (not <>+-ALPHANUM)
> In all of these error cases, the time should be set back to UTC, right?

Here's my patch which adds test cases for the above error cases.

It's based on the latest (pending) changes of tzset[1]. Please apply 
them first.

I wasn't able to run the tests, yet. With glibc they're obviously failing.

Could you please test them?



[1]: https://sourceware.org/pipermail/newlib/2022/019581.html


Cheers
---
🙎🏻‍♂️ jdoubleu
On 4/14/2022 10:59 AM, jdoubleu wrote:
> On 2022-04-13 14:33, Jeff Johnston wrote:
>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>> does support angle brackets): 
> 
> I can confirm the behavior with glibc[1]. As it turns out, glibc does 
> not directly impose a character limit on the timezone name, but requires 
> at least 3 characters. From the man page[2]:
> 
>> The std string specifies an abbreviation for the timezone and must be
>> three or more alphabetic characters.
> 
> To my misunderstanding, they don't even ignore remaining characters, but 
> keep all of them, as you can see in the output[1] and Jeff Johnston 
> explained.
> 
>> but you imply that glibc in fact uses the equivalent of the scanf 
>> "%m[...]" (malloc) > modifier, and I think using that would be against 
>> the newlib 
> philosophy to keep things
>> limited and under control to support small targets.
> 
> I agree, newlib SHOULD impose a limit. Especially, since the POSIX 
> standard[3] already introduces an upper limit, though unspecified.
> 
> The current limit is 11 characters, if I'm not mistaken. The longest 
> name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted 
> names[5]). All others usually are 3 or 4 chars long.
> 
> That said, I think 11 is reasonably large enough.
> 
> However, it could be helpful to get the limit from user-code, because 
> there is no error reporting mechanism used. Right now, the limit is only 
> defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not 
> forget here is to keep limit in sync with the sscanf format's maximum 
> field width[7].
> 
> 
> To summarize, the following cases are errors:
> 1. name is too short (less than 3 chars)
> 2. name is too long (more than TZNAME_MAX)
> 3. name includes arbitrary chars (not <>+-ALPHANUM)
> In all of these error cases, the time should be set back to UTC, right?
> 
> 
> I'm going to prepare some test cases for the test suite to check for the 
> errors as well.
> 
> 
> [1]: https://godbolt.org/z/o93zo3qxv
> [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> [3]: 
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
> 
> [4]: https://github.com/eggert/tz
> [5]: 
> https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> [6]: 
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13 
> 
> [7]: 
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
> 
> 
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/14/2022 12:19 AM, Brian Inglis wrote:
>> On 2022-04-13 14:33, Jeff Johnston wrote:
>>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>>> does support angle brackets):
>>>
>>> If there any parse failures, UTC is defaulted.
>>
>> We currently leave the time zone info unchanged.
>>
>>> Extraneous characters inside brackets or less than 3 characters is a
>>> parse failure.
>> ✔ Check    ✔ Check
>>
>>> Glibc parses the tz name string char by char and allocates space for
>>> the name strings so there is no max size.
>>
>> The suggestion was that glibc ignores the remaining characters, but 
>> you imply that glibc in fact uses the equivalent of the scanf 
>> "%m[...]" (malloc) modifier, and I think using that would be against 
>> the newlib philosophy to keep things limited and under control to 
>> support small targets.  Larger targets like Cygwin (do our own thing 
>> including zoneinfo), and perhaps RTEMS, can supply their own 
>> enhancements.
>>
>>> the name strings so there is no max size.  I am fine if you want to 
>>> mandate a maximum, but if you do, then too many chars should be 
>>> treated as a failure.  If you aren't certain of the limit, make the 
>>> limit higher than you expect.
>> Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the 
>> most ever likely to be used. It might be reasonable to bump it up to 
>> say 15.
>>
>>> If people run into max limit with reasonable timezone format strings, 
>>> then
>>> we can up the limit.
>>
>> The conditions are more or less what is implemented, but we could do 
>> with a couple more tweaks to improve things, like check for more or 
>> extraneous chars within the bracket quotes, and that no characters 
>> remain unconsumed at the end of the parse.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-05-14 14:39                         ` jdoubleu
@ 2022-05-16 16:05                           ` Dimitar Dimitrov
  2022-05-16 17:38                             ` Jeff Johnston
  2022-05-17  8:45                           ` [PATCH] update tzset tests jdoubleu
  1 sibling, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-05-16 16:05 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib, Jeff Johnston

On Sat, May 14, 2022 at 04:39:16PM +0200, jdoubleu wrote:
> > To summarize, the following cases are errors:
> > 1. name is too short (less than 3 chars)
> > 2. name is too long (more than TZNAME_MAX)
> > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > In all of these error cases, the time should be set back to UTC, right?
> 
> Here's my patch which adds test cases for the above error cases.

Did you forget to attach the actual patch with updated test cases? I
can't find it.

Regards,
Dimitar

> 
> It's based on the latest (pending) changes of tzset[1]. Please apply them
> first.
> 
> I wasn't able to run the tests, yet. With glibc they're obviously failing.
> 
> Could you please test them?
> 
> 
> 
> [1]: https://sourceware.org/pipermail/newlib/2022/019581.html
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/14/2022 10:59 AM, jdoubleu wrote:
> > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > Looking at the glibc tzset code I have locally (not latest/greatest, but
> > > does support angle brackets):
> > 
> > I can confirm the behavior with glibc[1]. As it turns out, glibc does
> > not directly impose a character limit on the timezone name, but requires
> > at least 3 characters. From the man page[2]:
> > 
> > > The std string specifies an abbreviation for the timezone and must be
> > > three or more alphabetic characters.
> > 
> > To my misunderstanding, they don't even ignore remaining characters, but
> > keep all of them, as you can see in the output[1] and Jeff Johnston
> > explained.
> > 
> > > but you imply that glibc in fact uses the equivalent of the scanf
> > > "%m[...]" (malloc) > modifier, and I think using that would be
> > > against the newlib
> > philosophy to keep things
> > > limited and under control to support small targets.
> > 
> > I agree, newlib SHOULD impose a limit. Especially, since the POSIX
> > standard[3] already introduces an upper limit, though unspecified.
> > 
> > The current limit is 11 characters, if I'm not mistaken. The longest
> > name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted
> > names[5]). All others usually are 3 or 4 chars long.
> > 
> > That said, I think 11 is reasonably large enough.
> > 
> > However, it could be helpful to get the limit from user-code, because
> > there is no error reporting mechanism used. Right now, the limit is only
> > defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not
> > forget here is to keep limit in sync with the sscanf format's maximum
> > field width[7].
> > 
> > 
> > To summarize, the following cases are errors:
> > 1. name is too short (less than 3 chars)
> > 2. name is too long (more than TZNAME_MAX)
> > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > In all of these error cases, the time should be set back to UTC, right?
> > 
> > 
> > I'm going to prepare some test cases for the test suite to check for the
> > errors as well.
> > 
> > 
> > [1]: https://godbolt.org/z/o93zo3qxv
> > [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> > [3]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> > 
> > [4]: https://github.com/eggert/tz
> > [5]:
> > https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> > [6]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13
> > 
> > [7]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> > 
> > 
> > 
> > 
> > Cheers
> > ---
> > 🙎🏻‍♂️ jdoubleu
> > On 4/14/2022 12:19 AM, Brian Inglis wrote:
> > > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > > Looking at the glibc tzset code I have locally (not latest/greatest, but
> > > > does support angle brackets):
> > > > 
> > > > If there any parse failures, UTC is defaulted.
> > > 
> > > We currently leave the time zone info unchanged.
> > > 
> > > > Extraneous characters inside brackets or less than 3 characters is a
> > > > parse failure.
> > > ✔ Check    ✔ Check
> > > 
> > > > Glibc parses the tz name string char by char and allocates space for
> > > > the name strings so there is no max size.
> > > 
> > > The suggestion was that glibc ignores the remaining characters, but
> > > you imply that glibc in fact uses the equivalent of the scanf
> > > "%m[...]" (malloc) modifier, and I think using that would be against
> > > the newlib philosophy to keep things limited and under control to
> > > support small targets.  Larger targets like Cygwin (do our own thing
> > > including zoneinfo), and perhaps RTEMS, can supply their own
> > > enhancements.
> > > 
> > > > the name strings so there is no max size.  I am fine if you want
> > > > to mandate a maximum, but if you do, then too many chars should
> > > > be treated as a failure.  If you aren't certain of the limit,
> > > > make the limit higher than you expect.
> > > Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the
> > > most ever likely to be used. It might be reasonable to bump it up to
> > > say 15.
> > > 
> > > > If people run into max limit with reasonable timezone format
> > > > strings, then
> > > > we can up the limit.
> > > 
> > > The conditions are more or less what is implemented, but we could do
> > > with a couple more tweaks to improve things, like check for more or
> > > extraneous chars within the bracket quotes, and that no characters
> > > remain unconsumed at the end of the parse.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-05-16 16:05                           ` Dimitar Dimitrov
@ 2022-05-16 17:38                             ` Jeff Johnston
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-05-16 17:38 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: jdoubleu, Newlib

On Mon, May 16, 2022 at 12:06 PM Dimitar Dimitrov <dimitar@dinux.eu> wrote:

> On Sat, May 14, 2022 at 04:39:16PM +0200, jdoubleu wrote:
> > > To summarize, the following cases are errors:
> > > 1. name is too short (less than 3 chars)
> > > 2. name is too long (more than TZNAME_MAX)
> > > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > > In all of these error cases, the time should be set back to UTC, right?
>

Yes.

>
> > Here's my patch which adds test cases for the above error cases.
>
> Did you forget to attach the actual patch with updated test cases? I
> can't find it.
>
> Regards,
> Dimitar
>
> >
> > It's based on the latest (pending) changes of tzset[1]. Please apply them
> > first.
> >
> > I wasn't able to run the tests, yet. With glibc they're obviously
> failing.
> >
> > Could you please test them?
> >
> >
> >
> > [1]: https://sourceware.org/pipermail/newlib/2022/019581.html
> >
> >
> > Cheers
> > ---
> > 🙎🏻‍♂️ jdoubleu
> > On 4/14/2022 10:59 AM, jdoubleu wrote:
> > > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > > Looking at the glibc tzset code I have locally (not latest/greatest,
> but
> > > > does support angle brackets):
> > >
> > > I can confirm the behavior with glibc[1]. As it turns out, glibc does
> > > not directly impose a character limit on the timezone name, but
> requires
> > > at least 3 characters. From the man page[2]:
> > >
> > > > The std string specifies an abbreviation for the timezone and must be
> > > > three or more alphabetic characters.
> > >
> > > To my misunderstanding, they don't even ignore remaining characters,
> but
> > > keep all of them, as you can see in the output[1] and Jeff Johnston
> > > explained.
> > >
> > > > but you imply that glibc in fact uses the equivalent of the scanf
> > > > "%m[...]" (malloc) > modifier, and I think using that would be
> > > > against the newlib
> > > philosophy to keep things
> > > > limited and under control to support small targets.
> > >
> > > I agree, newlib SHOULD impose a limit. Especially, since the POSIX
> > > standard[3] already introduces an upper limit, though unspecified.
> > >
> > > The current limit is 11 characters, if I'm not mistaken. The longest
> > > name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted
> > > names[5]). All others usually are 3 or 4 chars long.
> > >
> > > That said, I think 11 is reasonably large enough.
> > >
> > > However, it could be helpful to get the limit from user-code, because
> > > there is no error reporting mechanism used. Right now, the limit is
> only
> > > defined in tzset_r.c[6]. So maybe move it to limits.h? One thing to not
> > > forget here is to keep limit in sync with the sscanf format's maximum
> > > field width[7].
> > >
> > >
> > > To summarize, the following cases are errors:
> > > 1. name is too short (less than 3 chars)
> > > 2. name is too long (more than TZNAME_MAX)
> > > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > > In all of these error cases, the time should be set back to UTC, right?
> > >
> > >
> > > I'm going to prepare some test cases for the test suite to check for
> the
> > > errors as well.
> > >
> > >
> > > [1]: https://godbolt.org/z/o93zo3qxv
> > > [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> > > [3]:
> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> > >
> > > [4]: https://github.com/eggert/tz
> > > [5]:
> > >
> https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> > > [6]:
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13
> > >
> > > [7]:
> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> > >
> > >
> > >
> > >
> > > Cheers
> > > ---
> > > 🙎🏻‍♂️ jdoubleu
> > > On 4/14/2022 12:19 AM, Brian Inglis wrote:
> > > > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > > > Looking at the glibc tzset code I have locally (not
> latest/greatest, but
> > > > > does support angle brackets):
> > > > >
> > > > > If there any parse failures, UTC is defaulted.
> > > >
> > > > We currently leave the time zone info unchanged.
> > > >
> > > > > Extraneous characters inside brackets or less than 3 characters is
> a
> > > > > parse failure.
> > > > ✔ Check    ✔ Check
> > > >
> > > > > Glibc parses the tz name string char by char and allocates space
> for
> > > > > the name strings so there is no max size.
> > > >
> > > > The suggestion was that glibc ignores the remaining characters, but
> > > > you imply that glibc in fact uses the equivalent of the scanf
> > > > "%m[...]" (malloc) modifier, and I think using that would be against
> > > > the newlib philosophy to keep things limited and under control to
> > > > support small targets.  Larger targets like Cygwin (do our own thing
> > > > including zoneinfo), and perhaps RTEMS, can supply their own
> > > > enhancements.
> > > >
> > > > > the name strings so there is no max size.  I am fine if you want
> > > > > to mandate a maximum, but if you do, then too many chars should
> > > > > be treated as a failure.  If you aren't certain of the limit,
> > > > > make the limit higher than you expect.
> > > > Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the
> > > > most ever likely to be used. It might be reasonable to bump it up to
> > > > say 15.
> > > >
> > > > > If people run into max limit with reasonable timezone format
> > > > > strings, then
> > > > > we can up the limit.
> > > >
> > > > The conditions are more or less what is implemented, but we could do
> > > > with a couple more tweaks to improve things, like check for more or
> > > > extraneous chars within the bracket quotes, and that no characters
> > > > remain unconsumed at the end of the parse.
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH] update tzset tests
  2022-05-14 14:39                         ` jdoubleu
  2022-05-16 16:05                           ` Dimitar Dimitrov
@ 2022-05-17  8:45                           ` jdoubleu
  2022-05-18 18:48                             ` Dimitar Dimitrov
  1 sibling, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-05-17  8:45 UTC (permalink / raw)
  To: newlib

[-- Attachment #1: Type: text/plain, Size: 5337 bytes --]

Sorry, here's the patch.


Cheers
---
🙎🏻‍♂️ jdoubleu
On 5/14/2022 4:39 PM, jdoubleu wrote:
>> To summarize, the following cases are errors:
>> 1. name is too short (less than 3 chars)
>> 2. name is too long (more than TZNAME_MAX)
>> 3. name includes arbitrary chars (not <>+-ALPHANUM)
>> In all of these error cases, the time should be set back to UTC, right?
> 
> Here's my patch which adds test cases for the above error cases.
> 
> It's based on the latest (pending) changes of tzset[1]. Please apply 
> them first.
> 
> I wasn't able to run the tests, yet. With glibc they're obviously failing.
> 
> Could you please test them?
> 
> 
> 
> [1]: https://sourceware.org/pipermail/newlib/2022/019581.html
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/14/2022 10:59 AM, jdoubleu wrote:
>> On 2022-04-13 14:33, Jeff Johnston wrote:
>>> Looking at the glibc tzset code I have locally (not latest/greatest, but
>>> does support angle brackets): 
>>
>> I can confirm the behavior with glibc[1]. As it turns out, glibc does 
>> not directly impose a character limit on the timezone name, but 
>> requires at least 3 characters. From the man page[2]:
>>
>>> The std string specifies an abbreviation for the timezone and must be
>>> three or more alphabetic characters.
>>
>> To my misunderstanding, they don't even ignore remaining characters, 
>> but keep all of them, as you can see in the output[1] and Jeff 
>> Johnston explained.
>>
>>> but you imply that glibc in fact uses the equivalent of the scanf 
>>> "%m[...]" (malloc) > modifier, and I think using that would be 
>>> against the newlib 
>> philosophy to keep things
>>> limited and under control to support small targets.
>>
>> I agree, newlib SHOULD impose a limit. Especially, since the POSIX 
>> standard[3] already introduces an upper limit, though unspecified.
>>
>> The current limit is 11 characters, if I'm not mistaken. The longest 
>> name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted 
>> names[5]). All others usually are 3 or 4 chars long.
>>
>> That said, I think 11 is reasonably large enough.
>>
>> However, it could be helpful to get the limit from user-code, because 
>> there is no error reporting mechanism used. Right now, the limit is 
>> only defined in tzset_r.c[6]. So maybe move it to limits.h? One thing 
>> to not forget here is to keep limit in sync with the sscanf format's 
>> maximum field width[7].
>>
>>
>> To summarize, the following cases are errors:
>> 1. name is too short (less than 3 chars)
>> 2. name is too long (more than TZNAME_MAX)
>> 3. name includes arbitrary chars (not <>+-ALPHANUM)
>> In all of these error cases, the time should be set back to UTC, right?
>>
>>
>> I'm going to prepare some test cases for the test suite to check for 
>> the errors as well.
>>
>>
>> [1]: https://godbolt.org/z/o93zo3qxv
>> [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
>> [3]: 
>> https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 
>>
>> [4]: https://github.com/eggert/tz
>> [5]: 
>> https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv 
>>
>> [6]: 
>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13 
>>
>> [7]: 
>> https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57 
>>
>>
>>
>>
>> Cheers
>> ---
>> 🙎🏻‍♂️ jdoubleu
>> On 4/14/2022 12:19 AM, Brian Inglis wrote:
>>> On 2022-04-13 14:33, Jeff Johnston wrote:
>>>> Looking at the glibc tzset code I have locally (not latest/greatest, 
>>>> but
>>>> does support angle brackets):
>>>>
>>>> If there any parse failures, UTC is defaulted.
>>>
>>> We currently leave the time zone info unchanged.
>>>
>>>> Extraneous characters inside brackets or less than 3 characters is a
>>>> parse failure.
>>> ✔ Check    ✔ Check
>>>
>>>> Glibc parses the tz name string char by char and allocates space for
>>>> the name strings so there is no max size.
>>>
>>> The suggestion was that glibc ignores the remaining characters, but 
>>> you imply that glibc in fact uses the equivalent of the scanf 
>>> "%m[...]" (malloc) modifier, and I think using that would be against 
>>> the newlib philosophy to keep things limited and under control to 
>>> support small targets.  Larger targets like Cygwin (do our own thing 
>>> including zoneinfo), and perhaps RTEMS, can supply their own 
>>> enhancements.
>>>
>>>> the name strings so there is no max size.  I am fine if you want to 
>>>> mandate a maximum, but if you do, then too many chars should be 
>>>> treated as a failure.  If you aren't certain of the limit, make the 
>>>> limit higher than you expect.
>>> Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is the 
>>> most ever likely to be used. It might be reasonable to bump it up to 
>>> say 15.
>>>
>>>> If people run into max limit with reasonable timezone format 
>>>> strings, then
>>>> we can up the limit.
>>>
>>> The conditions are more or less what is implemented, but we could do 
>>> with a couple more tweaks to improve things, like check for more or 
>>> extraneous chars within the bracket quotes, and that no characters 
>>> remain unconsumed at the end of the parse.

[-- Attachment #2: 0001-update-tzset-tests.patch --]
[-- Type: text/plain, Size: 4076 bytes --]

From 4423c43be6a730144b776ba4ec4204cf71b52348 Mon Sep 17 00:00:00 2001
From: jdoubleu <hi@jdoubleu.de>
Date: Sat, 14 May 2022 15:41:22 +0200
Subject: [PATCH] update tzset tests

Add test cases for parser errors after reworked parsing behavior.
---
 newlib/testsuite/newlib.time/tzset.c | 56 ++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 12 deletions(-)

diff --git a/newlib/testsuite/newlib.time/tzset.c b/newlib/testsuite/newlib.time/tzset.c
index 0e5b196c6..8702b58db 100644
--- a/newlib/testsuite/newlib.time/tzset.c
+++ b/newlib/testsuite/newlib.time/tzset.c
@@ -111,13 +111,43 @@ struct tz_test test_timezones[] = {
     { /* Asia/Colombo */            "<+0530>-5:30",                    -IN_SECONDS(5, 30, 0),     NO_TIME},
     { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",      -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
 
-    // END of list
-    {NULL, NO_TIME, NO_TIME}
+    /// test parsing errors
+    // 1. names are too long
+    {"JUSTEXCEEDI1:11:11",                                      0,   NO_TIME},
+    {"AVERYLONGNAMEWHICHEXCEEDSTZNAMEMAX2:22:22",               0,   NO_TIME},
+    {"FIRSTVERYLONGNAME3:33:33SECONDVERYLONGNAME4:44:44",       0,   0},
+    {"<JUSTEXCEEDI>5:55:55",                                    0,   NO_TIME},
+    {"<FIRSTVERYLONGNAME>3:33:33<SECONDVERYLONGNAME>4:44:44",   0,   0},
+    {"<+JUSTEXCEED>5:55:55",                                    0,   NO_TIME},
+
+    // 2. names are too short
+    {"JU6:34:47",               0,   NO_TIME},
+    {"HE6:34:47LO3:34:47",      0,   0},
+    {"<AB>2:34:47",             0,   NO_TIME},
+    {"<AB>2:34:47<CD>3:34:47",  0,   0},
+    
+    // 3. names contain invalid chars
+    {"N?ME2:10:56",     0,   NO_TIME},
+    {"N!ME2:10:56",     0,   NO_TIME},
+    {"N/ME2:10:56",     0,   NO_TIME},
+    {"N$ME2:10:56",     0,   NO_TIME},
+    {"NAME?2:10:56",    0,   NO_TIME},
+    {"?NAME2:10:56",    0,   NO_TIME},
+    {"NAME?UNK4:21:15",                 0,   NO_TIME},
+    {"NAME!UNK4:22:15NEXT/NAME4:23:15", 0,   NO_TIME},
+
+    // 4. bogus strings
+    {"NOINFO",          0,  NO_TIME},
+    {"HOUR:16:18",      0,  NO_TIME},
+    {"<BEGIN",          0,  NO_TIME},
+    {"<NEXT:55",        0,  NO_TIME},
+    {">WRONG<2:15:00",  0,  NO_TIME},
+    {"ST<ART4:30:00",   0,  NO_TIME},
+    //{"MANY8:00:00:00",  0,  NO_TIME},
+    {"\0",              0,  NO_TIME},
+    {"M\0STR7:30:36",   0,  NO_TIME}
 };
 
-// helper macros
-#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name = test_timezones; iter_name->tzstr != NULL; ++iter_name)
-
 // END test vectors
 
 static int failed = 0;
@@ -136,22 +166,24 @@ void test_TimezoneStrings(void)
 {
     char buffer[128];
 
-    FOR_TIMEZONES(ptr)
+    for (int i = 0; i < (sizeof(test_timezones) / sizeof(struct tz_test)); ++i)
     {
-        setenv("TZ", ptr->tzstr, 1);
+        struct tz_test ptr = test_timezones[i];
+
+        setenv("TZ", ptr.tzstr, 1);
         tzset();
 
-        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr->tzstr);
+        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr.tzstr);
 
         struct tm winter_tm_copy = winter_tm; // copy
-        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
+        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr.offset_seconds, mktime(&winter_tm_copy), buffer);
 
-        if (ptr->dst_offset_seconds != NO_TIME)
+        if (ptr.dst_offset_seconds != NO_TIME)
         {
-            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr->tzstr);
+            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr.tzstr);
 
             struct tm summer_tm_copy = summer_tm; // copy
-            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
+            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr.dst_offset_seconds, mktime(&summer_tm_copy), buffer);
         }
     }
 }
-- 
2.35.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] update tzset tests
  2022-05-17  8:45                           ` [PATCH] update tzset tests jdoubleu
@ 2022-05-18 18:48                             ` Dimitar Dimitrov
  2022-05-18 20:56                               ` Keith Packard
  0 siblings, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-05-18 18:48 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib, Jeff Johnston

On Tue, May 17, 2022 at 10:45:11AM +0200, jdoubleu wrote:
> Sorry, here's the patch.

Hi jdoubleu,

I managed to test your change with https://sourceware.org/pipermail/newlib/2022/019710.html

Only the following test case fails in tzset.c:
     {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
Failure message is:
  Assertion failed! Expected 1647906533 to equal 1647893720. winter time, timezone = "<+0123456789ABCDEF>3:33:33"


The rest of the cases pass on pru-unknown-elf target.

BTW, it took me a while to realize that your patch and the source code in
newlib's GIT have different line endings :)

Regards,
Dimitar

> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 5/14/2022 4:39 PM, jdoubleu wrote:
> > > To summarize, the following cases are errors:
> > > 1. name is too short (less than 3 chars)
> > > 2. name is too long (more than TZNAME_MAX)
> > > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > > In all of these error cases, the time should be set back to UTC, right?
> > 
> > Here's my patch which adds test cases for the above error cases.
> > 
> > It's based on the latest (pending) changes of tzset[1]. Please apply
> > them first.
> > 
> > I wasn't able to run the tests, yet. With glibc they're obviously failing.
> > 
> > Could you please test them?
> > 
> > 
> > 
> > [1]: https://sourceware.org/pipermail/newlib/2022/019581.html
> > 
> > 
> > Cheers
> > ---
> > 🙎🏻‍♂️ jdoubleu
> > On 4/14/2022 10:59 AM, jdoubleu wrote:
> > > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > > Looking at the glibc tzset code I have locally (not latest/greatest, but
> > > > does support angle brackets):
> > > 
> > > I can confirm the behavior with glibc[1]. As it turns out, glibc
> > > does not directly impose a character limit on the timezone name, but
> > > requires at least 3 characters. From the man page[2]:
> > > 
> > > > The std string specifies an abbreviation for the timezone and must be
> > > > three or more alphabetic characters.
> > > 
> > > To my misunderstanding, they don't even ignore remaining characters,
> > > but keep all of them, as you can see in the output[1] and Jeff
> > > Johnston explained.
> > > 
> > > > but you imply that glibc in fact uses the equivalent of the
> > > > scanf "%m[...]" (malloc) > modifier, and I think using that
> > > > would be against the newlib
> > > philosophy to keep things
> > > > limited and under control to support small targets.
> > > 
> > > I agree, newlib SHOULD impose a limit. Especially, since the POSIX
> > > standard[3] already introduces an upper limit, though unspecified.
> > > 
> > > The current limit is 11 characters, if I'm not mistaken. The longest
> > > name from the tzdb[4] is "<+1030>" i.e. 5 chars (see all extracted
> > > names[5]). All others usually are 3 or 4 chars long.
> > > 
> > > That said, I think 11 is reasonably large enough.
> > > 
> > > However, it could be helpful to get the limit from user-code,
> > > because there is no error reporting mechanism used. Right now, the
> > > limit is only defined in tzset_r.c[6]. So maybe move it to limits.h?
> > > One thing to not forget here is to keep limit in sync with the
> > > sscanf format's maximum field width[7].
> > > 
> > > 
> > > To summarize, the following cases are errors:
> > > 1. name is too short (less than 3 chars)
> > > 2. name is too long (more than TZNAME_MAX)
> > > 3. name includes arbitrary chars (not <>+-ALPHANUM)
> > > In all of these error cases, the time should be set back to UTC, right?
> > > 
> > > 
> > > I'm going to prepare some test cases for the test suite to check for
> > > the errors as well.
> > > 
> > > 
> > > [1]: https://godbolt.org/z/o93zo3qxv
> > > [2]: https://www.man7.org/linux/man-pages/man3/tzset.3.html
> > > [3]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03
> > > 
> > > [4]: https://github.com/eggert/tz
> > > [5]: https://raw.githubusercontent.com/nayarsystems/posix_tz_db/master/zones.csv
> > > 
> > > [6]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l13
> > > 
> > > [7]: https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=newlib/libc/time/tzset_r.c;h=9cb30b188f989f65ec9eb6417f5d74020f8c72e9;hb=HEAD#l57
> > > 
> > > 
> > > 
> > > 
> > > Cheers
> > > ---
> > > 🙎🏻‍♂️ jdoubleu
> > > On 4/14/2022 12:19 AM, Brian Inglis wrote:
> > > > On 2022-04-13 14:33, Jeff Johnston wrote:
> > > > > Looking at the glibc tzset code I have locally (not
> > > > > latest/greatest, but
> > > > > does support angle brackets):
> > > > > 
> > > > > If there any parse failures, UTC is defaulted.
> > > > 
> > > > We currently leave the time zone info unchanged.
> > > > 
> > > > > Extraneous characters inside brackets or less than 3 characters is a
> > > > > parse failure.
> > > > ✔ Check    ✔ Check
> > > > 
> > > > > Glibc parses the tz name string char by char and allocates space for
> > > > > the name strings so there is no max size.
> > > > 
> > > > The suggestion was that glibc ignores the remaining characters,
> > > > but you imply that glibc in fact uses the equivalent of the
> > > > scanf "%m[...]" (malloc) modifier, and I think using that would
> > > > be against the newlib philosophy to keep things limited and
> > > > under control to support small targets.  Larger targets like
> > > > Cygwin (do our own thing including zoneinfo), and perhaps RTEMS,
> > > > can supply their own enhancements.
> > > > 
> > > > > the name strings so there is no max size.  I am fine if you
> > > > > want to mandate a maximum, but if you do, then too many
> > > > > chars should be treated as a failure.  If you aren't certain
> > > > > of the limit, make the limit higher than you expect.
> > > > Current limits are 3-10 allowing for e.g. <MESZ+03:30> which is
> > > > the most ever likely to be used. It might be reasonable to bump
> > > > it up to say 15.
> > > > 
> > > > > If people run into max limit with reasonable timezone format
> > > > > strings, then
> > > > > we can up the limit.
> > > > 
> > > > The conditions are more or less what is implemented, but we
> > > > could do with a couple more tweaks to improve things, like check
> > > > for more or extraneous chars within the bracket quotes, and that
> > > > no characters remain unconsumed at the end of the parse.

> From 4423c43be6a730144b776ba4ec4204cf71b52348 Mon Sep 17 00:00:00 2001
> From: jdoubleu <hi@jdoubleu.de>
> Date: Sat, 14 May 2022 15:41:22 +0200
> Subject: [PATCH] update tzset tests
> 
> Add test cases for parser errors after reworked parsing behavior.
> ---
>  newlib/testsuite/newlib.time/tzset.c | 56 ++++++++++++++++++++++------
>  1 file changed, 44 insertions(+), 12 deletions(-)
> 
> diff --git a/newlib/testsuite/newlib.time/tzset.c b/newlib/testsuite/newlib.time/tzset.c
> index 0e5b196c6..8702b58db 100644
> --- a/newlib/testsuite/newlib.time/tzset.c
> +++ b/newlib/testsuite/newlib.time/tzset.c
> @@ -111,13 +111,43 @@ struct tz_test test_timezones[] = {
>      { /* Asia/Colombo */            "<+0530>-5:30",                    -IN_SECONDS(5, 30, 0),     NO_TIME},
>      { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",      -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
>  
> -    // END of list
> -    {NULL, NO_TIME, NO_TIME}
> +    /// test parsing errors
> +    // 1. names are too long
> +    {"JUSTEXCEEDI1:11:11",                                      0,   NO_TIME},
> +    {"AVERYLONGNAMEWHICHEXCEEDSTZNAMEMAX2:22:22",               0,   NO_TIME},
> +    {"FIRSTVERYLONGNAME3:33:33SECONDVERYLONGNAME4:44:44",       0,   0},
> +    {"<JUSTEXCEEDI>5:55:55",                                    0,   NO_TIME},
> +    {"<FIRSTVERYLONGNAME>3:33:33<SECONDVERYLONGNAME>4:44:44",   0,   0},
> +    {"<+JUSTEXCEED>5:55:55",                                    0,   NO_TIME},
> +
> +    // 2. names are too short
> +    {"JU6:34:47",               0,   NO_TIME},
> +    {"HE6:34:47LO3:34:47",      0,   0},
> +    {"<AB>2:34:47",             0,   NO_TIME},
> +    {"<AB>2:34:47<CD>3:34:47",  0,   0},
> +    
> +    // 3. names contain invalid chars
> +    {"N?ME2:10:56",     0,   NO_TIME},
> +    {"N!ME2:10:56",     0,   NO_TIME},
> +    {"N/ME2:10:56",     0,   NO_TIME},
> +    {"N$ME2:10:56",     0,   NO_TIME},
> +    {"NAME?2:10:56",    0,   NO_TIME},
> +    {"?NAME2:10:56",    0,   NO_TIME},
> +    {"NAME?UNK4:21:15",                 0,   NO_TIME},
> +    {"NAME!UNK4:22:15NEXT/NAME4:23:15", 0,   NO_TIME},
> +
> +    // 4. bogus strings
> +    {"NOINFO",          0,  NO_TIME},
> +    {"HOUR:16:18",      0,  NO_TIME},
> +    {"<BEGIN",          0,  NO_TIME},
> +    {"<NEXT:55",        0,  NO_TIME},
> +    {">WRONG<2:15:00",  0,  NO_TIME},
> +    {"ST<ART4:30:00",   0,  NO_TIME},
> +    //{"MANY8:00:00:00",  0,  NO_TIME},
> +    {"\0",              0,  NO_TIME},
> +    {"M\0STR7:30:36",   0,  NO_TIME}
>  };
>  
> -// helper macros
> -#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name = test_timezones; iter_name->tzstr != NULL; ++iter_name)
> -
>  // END test vectors
>  
>  static int failed = 0;
> @@ -136,22 +166,24 @@ void test_TimezoneStrings(void)
>  {
>      char buffer[128];
>  
> -    FOR_TIMEZONES(ptr)
> +    for (int i = 0; i < (sizeof(test_timezones) / sizeof(struct tz_test)); ++i)
>      {
> -        setenv("TZ", ptr->tzstr, 1);
> +        struct tz_test ptr = test_timezones[i];
> +
> +        setenv("TZ", ptr.tzstr, 1);
>          tzset();
>  
> -        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr->tzstr);
> +        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr.tzstr);
>  
>          struct tm winter_tm_copy = winter_tm; // copy
> -        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
> +        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr.offset_seconds, mktime(&winter_tm_copy), buffer);
>  
> -        if (ptr->dst_offset_seconds != NO_TIME)
> +        if (ptr.dst_offset_seconds != NO_TIME)
>          {
> -            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr->tzstr);
> +            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr.tzstr);
>  
>              struct tm summer_tm_copy = summer_tm; // copy
> -            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
> +            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr.dst_offset_seconds, mktime(&summer_tm_copy), buffer);
>          }
>      }
>  }
> -- 
> 2.35.1
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] update tzset tests
  2022-05-18 18:48                             ` Dimitar Dimitrov
@ 2022-05-18 20:56                               ` Keith Packard
  2022-05-19  8:47                                 ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Keith Packard @ 2022-05-18 20:56 UTC (permalink / raw)
  To: Dimitar Dimitrov, jdoubleu; +Cc: newlib

[-- Attachment #1: Type: text/plain, Size: 1859 bytes --]

Dimitar Dimitrov <dimitar@dinux.eu> writes:

> On Tue, May 17, 2022 at 10:45:11AM +0200, jdoubleu wrote:
>> Sorry, here's the patch.
>
> Hi jdoubleu,
>
> I managed to test your change with https://sourceware.org/pipermail/newlib/2022/019710.html
>
> Only the following test case fails in tzset.c:
>      {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
> Failure message is:
>   Assertion failed! Expected 1647906533 to equal 1647893720. winter
> time, timezone = "<+0123456789ABCDEF>3:33:33"

Yeah, the code needs a fix to truncate the TZ name but then skip to the
'>' and keep going.

Something like this (line numbers likely wrong; this patch is from picolibc):

@@ -153,13 +55,8 @@
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
       if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
-          || n < TZNAME_MIN || TZNAME_MAX < n)
+		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
         return;
-      while (tzenv[n] != '>') {
-          if (!tzenv[n])
-              return;
-          n++;
-      }
 
       ++tzenv;	/* bump for close quote '>' */
     }
@@ -199,18 +96,13 @@
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
       if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
-          || n < TZNAME_MIN || TZNAME_MAX < n)
+		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
 	{ /* No dst */
 	  _tzname[1] = _tzname[0];
 	  _timezone = tz->__tzrule[0].offset;
 	  _daylight = 0;
-          return;
+	  return;
 	}
-      while (tzenv[n] != '>') {
-          if (!tzenv[n])
-              return;
-          n++;
-      }
 
       ++tzenv;	/* bump for close quote '>' */
     }

-- 
-keith

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 832 bytes --]

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] update tzset tests
  2022-05-18 20:56                               ` Keith Packard
@ 2022-05-19  8:47                                 ` jdoubleu
  2022-05-22  9:51                                   ` [PATCH v2] " jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-05-19  8:47 UTC (permalink / raw)
  To: Keith Packard, Dimitar Dimitrov; +Cc: newlib

On 5/18/2022 8:48 PM, Dimitar Dimitrov wrote:
> Only the following test case fails in tzset.c:
>      {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},

Oh, yes. You're right. This test *should* fail. I forgot to remove it 
from the "succeeding" tests.

> BTW, it took me a while to realize that your patch and the source code in
> newlib's GIT have different line endings 

That's unfortunate, I'm sorry. I'll watch out for it in the future.

On 5/18/2022 10:56 PM, Keith Packard wrote:
> Yeah, the code needs a fix to truncate the TZ name but then skip to the
> '>' and keep going.

I don't think it should. As we discussed earlier[1], it should fail in 
such cases.

I'm going fix my patch. Thanks for running the tests!


[1]: https://sourceware.org/pipermail/newlib/2022/019567.html


Cheers
---
🙎🏻‍♂️ jdoubleu
On 5/18/2022 10:56 PM, Keith Packard wrote:
> Dimitar Dimitrov <dimitar@dinux.eu> writes:
> 
>> On Tue, May 17, 2022 at 10:45:11AM +0200, jdoubleu wrote:
>>> Sorry, here's the patch.
>>
>> Hi jdoubleu,
>>
>> I managed to test your change with https://sourceware.org/pipermail/newlib/2022/019710.html
>>
>> Only the following test case fails in tzset.c:
>>       {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
>> Failure message is:
>>    Assertion failed! Expected 1647906533 to equal 1647893720. winter
>> time, timezone = "<+0123456789ABCDEF>3:33:33"
> 
> Yeah, the code needs a fix to truncate the TZ name but then skip to the
> '>' and keep going.
> 
> Something like this (line numbers likely wrong; this patch is from picolibc):
> 
> @@ -153,13 +55,8 @@
>   
>         /* quit if no items, too few or too many chars, or no close quote '>' */
>         if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
> -          || n < TZNAME_MIN || TZNAME_MAX < n)
> +		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
>           return;
> -      while (tzenv[n] != '>') {
> -          if (!tzenv[n])
> -              return;
> -          n++;
> -      }
>   
>         ++tzenv;	/* bump for close quote '>' */
>       }
> @@ -199,18 +96,13 @@
>   
>         /* quit if no items, too few or too many chars, or no close quote '>' */
>         if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
> -          || n < TZNAME_MIN || TZNAME_MAX < n)
> +		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
>   	{ /* No dst */
>   	  _tzname[1] = _tzname[0];
>   	  _timezone = tz->__tzrule[0].offset;
>   	  _daylight = 0;
> -          return;
> +	  return;
>   	}
> -      while (tzenv[n] != '>') {
> -          if (!tzenv[n])
> -              return;
> -          n++;
> -      }
>   
>         ++tzenv;	/* bump for close quote '>' */
>       }
> 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [PATCH v2] update tzset tests
  2022-05-19  8:47                                 ` jdoubleu
@ 2022-05-22  9:51                                   ` jdoubleu
  2022-05-22 21:02                                     ` Dimitar Dimitrov
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-05-22  9:51 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: newlib

[-- Attachment #1: Type: text/plain, Size: 551 bytes --]

Dimitar Dimitrov <dimitar@dinux.eu> writes:
> Only the following test case fails in tzset.c:

jdoubleu <hi@jdoubleu.de> writes:
> I'm going fix my patch. Thanks for running the tests! 

I've fixed that one test case. I wasn't able to run it myself, yet.

> BTW, it took me a while to realize that your patch and the source code in
> newlib's GIT have different line endings 

The patch is now formated with CLRF line endings. I checked tzset_r.c 
and it also uses CLRF. But maybe I checked it out the wrong way?


Cheers
---
🙎🏻‍♂️ jdoubleu

[-- Attachment #2: 0001-update-tzset-tests.patch --]
[-- Type: text/plain, Size: 4779 bytes --]

From 90f857d6a4ec999074f892fb0641a2d785c09de5 Mon Sep 17 00:00:00 2001
From: jdoubleu <hi@jdoubleu.de>
Date: Sat, 14 May 2022 15:41:22 +0200
Subject: [PATCH] update tzset tests

Add test cases for parser errors after reworked parsing behavior.
---
 newlib/testsuite/newlib.time/tzset.c | 58 +++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/newlib/testsuite/newlib.time/tzset.c b/newlib/testsuite/newlib.time/tzset.c
index 0e5b196c6..19a33be73 100644
--- a/newlib/testsuite/newlib.time/tzset.c
+++ b/newlib/testsuite/newlib.time/tzset.c
@@ -93,7 +93,6 @@ struct tz_test test_timezones[] = {
     {"<WINT+03>3:15<SUM+02>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},
     {"<H3M15>3:15<H2M30S15>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},   // requires TZNAME_MAX >= 8 + 1
     {"<+H6M20S12>6:20:12<-H4M40S14>-4:40:14",    IN_SECONDS(6, 20, 12),  -IN_SECONDS(4, 40, 14)},   // requires TZNAME_MAX >= 9 + 1
-    {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
 
     /* 
      * real-world test vectors.
@@ -111,13 +110,44 @@ struct tz_test test_timezones[] = {
     { /* Asia/Colombo */            "<+0530>-5:30",                    -IN_SECONDS(5, 30, 0),     NO_TIME},
     { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",      -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
 
-    // END of list
-    {NULL, NO_TIME, NO_TIME}
+    /// test parsing errors
+    // 1. names are too long
+    {"<+0123456789ABCDEF>3:33:33",                              0,   NO_TIME},
+    {"JUSTEXCEEDI1:11:11",                                      0,   NO_TIME},
+    {"AVERYLONGNAMEWHICHEXCEEDSTZNAMEMAX2:22:22",               0,   NO_TIME},
+    {"FIRSTVERYLONGNAME3:33:33SECONDVERYLONGNAME4:44:44",       0,   0},
+    {"<JUSTEXCEEDI>5:55:55",                                    0,   NO_TIME},
+    {"<FIRSTVERYLONGNAME>3:33:33<SECONDVERYLONGNAME>4:44:44",   0,   0},
+    {"<+JUSTEXCEED>5:55:55",                                    0,   NO_TIME},
+
+    // 2. names are too short
+    {"JU6:34:47",               0,   NO_TIME},
+    {"HE6:34:47LO3:34:47",      0,   0},
+    {"<AB>2:34:47",             0,   NO_TIME},
+    {"<AB>2:34:47<CD>3:34:47",  0,   0},
+    
+    // 3. names contain invalid chars
+    {"N?ME2:10:56",     0,   NO_TIME},
+    {"N!ME2:10:56",     0,   NO_TIME},
+    {"N/ME2:10:56",     0,   NO_TIME},
+    {"N$ME2:10:56",     0,   NO_TIME},
+    {"NAME?2:10:56",    0,   NO_TIME},
+    {"?NAME2:10:56",    0,   NO_TIME},
+    {"NAME?UNK4:21:15",                 0,   NO_TIME},
+    {"NAME!UNK4:22:15NEXT/NAME4:23:15", 0,   NO_TIME},
+
+    // 4. bogus strings
+    {"NOINFO",          0,  NO_TIME},
+    {"HOUR:16:18",      0,  NO_TIME},
+    {"<BEGIN",          0,  NO_TIME},
+    {"<NEXT:55",        0,  NO_TIME},
+    {">WRONG<2:15:00",  0,  NO_TIME},
+    {"ST<ART4:30:00",   0,  NO_TIME},
+    //{"MANY8:00:00:00",  0,  NO_TIME},
+    {"\0",              0,  NO_TIME},
+    {"M\0STR7:30:36",   0,  NO_TIME}
 };
 
-// helper macros
-#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name = test_timezones; iter_name->tzstr != NULL; ++iter_name)
-
 // END test vectors
 
 static int failed = 0;
@@ -136,22 +166,24 @@ void test_TimezoneStrings(void)
 {
     char buffer[128];
 
-    FOR_TIMEZONES(ptr)
+    for (int i = 0; i < (sizeof(test_timezones) / sizeof(struct tz_test)); ++i)
     {
-        setenv("TZ", ptr->tzstr, 1);
+        struct tz_test ptr = test_timezones[i];
+
+        setenv("TZ", ptr.tzstr, 1);
         tzset();
 
-        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr->tzstr);
+        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr.tzstr);
 
         struct tm winter_tm_copy = winter_tm; // copy
-        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
+        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr.offset_seconds, mktime(&winter_tm_copy), buffer);
 
-        if (ptr->dst_offset_seconds != NO_TIME)
+        if (ptr.dst_offset_seconds != NO_TIME)
         {
-            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr->tzstr);
+            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr.tzstr);
 
             struct tm summer_tm_copy = summer_tm; // copy
-            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
+            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr.dst_offset_seconds, mktime(&summer_tm_copy), buffer);
         }
     }
 }
-- 
2.35.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2] update tzset tests
  2022-05-22  9:51                                   ` [PATCH v2] " jdoubleu
@ 2022-05-22 21:02                                     ` Dimitar Dimitrov
  2022-05-27 15:46                                       ` Jeff Johnston
  0 siblings, 1 reply; 35+ messages in thread
From: Dimitar Dimitrov @ 2022-05-22 21:02 UTC (permalink / raw)
  To: jdoubleu; +Cc: newlib

On Sun, May 22, 2022 at 11:51:45AM +0200, jdoubleu wrote:
> Dimitar Dimitrov <dimitar@dinux.eu> writes:
> > Only the following test case fails in tzset.c:
> 
> jdoubleu <hi@jdoubleu.de> writes:
> > I'm going fix my patch. Thanks for running the tests!
> 
> I've fixed that one test case. I wasn't able to run it myself, yet.
Hi,

The entire tzset test case passes for pru-unknown-elf:
  PASS: newlib.time/tzset.c execution

Thanks,
Dimitar

> 
> > BTW, it took me a while to realize that your patch and the source code in
> > newlib's GIT have different line endings
> 
> The patch is now formated with CLRF line endings. I checked tzset_r.c and it
> also uses CLRF. But maybe I checked it out the wrong way?
I see only LF symbols in your patch, not CRLF. Perhaps it's something with my
email client.
> 
> 
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu

> From 90f857d6a4ec999074f892fb0641a2d785c09de5 Mon Sep 17 00:00:00 2001
> From: jdoubleu <hi@jdoubleu.de>
> Date: Sat, 14 May 2022 15:41:22 +0200
> Subject: [PATCH] update tzset tests
> 
> Add test cases for parser errors after reworked parsing behavior.
> ---
>  newlib/testsuite/newlib.time/tzset.c | 58 +++++++++++++++++++++-------
>  1 file changed, 45 insertions(+), 13 deletions(-)
> 
> diff --git a/newlib/testsuite/newlib.time/tzset.c b/newlib/testsuite/newlib.time/tzset.c
> index 0e5b196c6..19a33be73 100644
> --- a/newlib/testsuite/newlib.time/tzset.c
> +++ b/newlib/testsuite/newlib.time/tzset.c
> @@ -93,7 +93,6 @@ struct tz_test test_timezones[] = {
>      {"<WINT+03>3:15<SUM+02>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},
>      {"<H3M15>3:15<H2M30S15>2:30:15",             IN_SECONDS(3, 15, 0),    IN_SECONDS(2, 30, 15)},   // requires TZNAME_MAX >= 8 + 1
>      {"<+H6M20S12>6:20:12<-H4M40S14>-4:40:14",    IN_SECONDS(6, 20, 12),  -IN_SECONDS(4, 40, 14)},   // requires TZNAME_MAX >= 9 + 1
> -    {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33, 33),   NO_TIME},                 // truncates the name (17 + 1)
>  
>      /* 
>       * real-world test vectors.
> @@ -111,13 +110,44 @@ struct tz_test test_timezones[] = {
>      { /* Asia/Colombo */            "<+0530>-5:30",                    -IN_SECONDS(5, 30, 0),     NO_TIME},
>      { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",      -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
>  
> -    // END of list
> -    {NULL, NO_TIME, NO_TIME}
> +    /// test parsing errors
> +    // 1. names are too long
> +    {"<+0123456789ABCDEF>3:33:33",                              0,   NO_TIME},
> +    {"JUSTEXCEEDI1:11:11",                                      0,   NO_TIME},
> +    {"AVERYLONGNAMEWHICHEXCEEDSTZNAMEMAX2:22:22",               0,   NO_TIME},
> +    {"FIRSTVERYLONGNAME3:33:33SECONDVERYLONGNAME4:44:44",       0,   0},
> +    {"<JUSTEXCEEDI>5:55:55",                                    0,   NO_TIME},
> +    {"<FIRSTVERYLONGNAME>3:33:33<SECONDVERYLONGNAME>4:44:44",   0,   0},
> +    {"<+JUSTEXCEED>5:55:55",                                    0,   NO_TIME},
> +
> +    // 2. names are too short
> +    {"JU6:34:47",               0,   NO_TIME},
> +    {"HE6:34:47LO3:34:47",      0,   0},
> +    {"<AB>2:34:47",             0,   NO_TIME},
> +    {"<AB>2:34:47<CD>3:34:47",  0,   0},
> +    
> +    // 3. names contain invalid chars
> +    {"N?ME2:10:56",     0,   NO_TIME},
> +    {"N!ME2:10:56",     0,   NO_TIME},
> +    {"N/ME2:10:56",     0,   NO_TIME},
> +    {"N$ME2:10:56",     0,   NO_TIME},
> +    {"NAME?2:10:56",    0,   NO_TIME},
> +    {"?NAME2:10:56",    0,   NO_TIME},
> +    {"NAME?UNK4:21:15",                 0,   NO_TIME},
> +    {"NAME!UNK4:22:15NEXT/NAME4:23:15", 0,   NO_TIME},
> +
> +    // 4. bogus strings
> +    {"NOINFO",          0,  NO_TIME},
> +    {"HOUR:16:18",      0,  NO_TIME},
> +    {"<BEGIN",          0,  NO_TIME},
> +    {"<NEXT:55",        0,  NO_TIME},
> +    {">WRONG<2:15:00",  0,  NO_TIME},
> +    {"ST<ART4:30:00",   0,  NO_TIME},
> +    //{"MANY8:00:00:00",  0,  NO_TIME},
> +    {"\0",              0,  NO_TIME},
> +    {"M\0STR7:30:36",   0,  NO_TIME}
>  };
>  
> -// helper macros
> -#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name = test_timezones; iter_name->tzstr != NULL; ++iter_name)
> -
>  // END test vectors
>  
>  static int failed = 0;
> @@ -136,22 +166,24 @@ void test_TimezoneStrings(void)
>  {
>      char buffer[128];
>  
> -    FOR_TIMEZONES(ptr)
> +    for (int i = 0; i < (sizeof(test_timezones) / sizeof(struct tz_test)); ++i)
>      {
> -        setenv("TZ", ptr->tzstr, 1);
> +        struct tz_test ptr = test_timezones[i];
> +
> +        setenv("TZ", ptr.tzstr, 1);
>          tzset();
>  
> -        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr->tzstr);
> +        snprintf(buffer, 128, "winter time, timezone = \"%s\"", ptr.tzstr);
>  
>          struct tm winter_tm_copy = winter_tm; // copy
> -        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
> +        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr.offset_seconds, mktime(&winter_tm_copy), buffer);
>  
> -        if (ptr->dst_offset_seconds != NO_TIME)
> +        if (ptr.dst_offset_seconds != NO_TIME)
>          {
> -            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr->tzstr);
> +            snprintf(buffer, 128, "summer time, timezone = \"%s\"", ptr.tzstr);
>  
>              struct tm summer_tm_copy = summer_tm; // copy
> -            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
> +            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time + ptr.dst_offset_seconds, mktime(&summer_tm_copy), buffer);
>          }
>      }
>  }
> -- 
> 2.35.1
> 


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH v2] update tzset tests
  2022-05-22 21:02                                     ` Dimitar Dimitrov
@ 2022-05-27 15:46                                       ` Jeff Johnston
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-05-27 15:46 UTC (permalink / raw)
  To: Dimitar Dimitrov; +Cc: jdoubleu, Newlib

Thanks to both of you for the patch and testing.

I had to do some manual cut/paste as the patch wouldn't apply but I have
merged the test patch plus my changes for tzset.
If I have messed anything up, please reply to the list.

Regards,

-- Jeff J.

On Sun, May 22, 2022 at 5:02 PM Dimitar Dimitrov <dimitar@dinux.eu> wrote:

> On Sun, May 22, 2022 at 11:51:45AM +0200, jdoubleu wrote:
> > Dimitar Dimitrov <dimitar@dinux.eu> writes:
> > > Only the following test case fails in tzset.c:
> >
> > jdoubleu <hi@jdoubleu.de> writes:
> > > I'm going fix my patch. Thanks for running the tests!
> >
> > I've fixed that one test case. I wasn't able to run it myself, yet.
> Hi,
>
> The entire tzset test case passes for pru-unknown-elf:
>   PASS: newlib.time/tzset.c execution
>
> Thanks,
> Dimitar
>
> >
> > > BTW, it took me a while to realize that your patch and the source code
> in
> > > newlib's GIT have different line endings
> >
> > The patch is now formated with CLRF line endings. I checked tzset_r.c
> and it
> > also uses CLRF. But maybe I checked it out the wrong way?
> I see only LF symbols in your patch, not CRLF. Perhaps it's something with
> my
> email client.
> >
> >
> > Cheers
> > ---
> > 🙎🏻‍♂️ jdoubleu
>
> > From 90f857d6a4ec999074f892fb0641a2d785c09de5 Mon Sep 17 00:00:00 2001
> > From: jdoubleu <hi@jdoubleu.de>
> > Date: Sat, 14 May 2022 15:41:22 +0200
> > Subject: [PATCH] update tzset tests
> >
> > Add test cases for parser errors after reworked parsing behavior.
> > ---
> >  newlib/testsuite/newlib.time/tzset.c | 58 +++++++++++++++++++++-------
> >  1 file changed, 45 insertions(+), 13 deletions(-)
> >
> > diff --git a/newlib/testsuite/newlib.time/tzset.c
> b/newlib/testsuite/newlib.time/tzset.c
> > index 0e5b196c6..19a33be73 100644
> > --- a/newlib/testsuite/newlib.time/tzset.c
> > +++ b/newlib/testsuite/newlib.time/tzset.c
> > @@ -93,7 +93,6 @@ struct tz_test test_timezones[] = {
> >      {"<WINT+03>3:15<SUM+02>2:30:15",             IN_SECONDS(3, 15, 0),
>   IN_SECONDS(2, 30, 15)},
> >      {"<H3M15>3:15<H2M30S15>2:30:15",             IN_SECONDS(3, 15, 0),
>   IN_SECONDS(2, 30, 15)},   // requires TZNAME_MAX >= 8 + 1
> >      {"<+H6M20S12>6:20:12<-H4M40S14>-4:40:14",    IN_SECONDS(6, 20,
> 12),  -IN_SECONDS(4, 40, 14)},   // requires TZNAME_MAX >= 9 + 1
> > -    {"<+0123456789ABCDEF>3:33:33",               IN_SECONDS(3, 33,
> 33),   NO_TIME},                 // truncates the name (17 + 1)
> >
> >      /*
> >       * real-world test vectors.
> > @@ -111,13 +110,44 @@ struct tz_test test_timezones[] = {
> >      { /* Asia/Colombo */            "<+0530>-5:30",
> -IN_SECONDS(5, 30, 0),     NO_TIME},
> >      { /* Europe/Berlin */           "CET-1CEST,M3.5.0,M10.5.0/3",
> -IN_SECONDS(1, 0, 0),     -IN_SECONDS(2, 0, 0)},
> >
> > -    // END of list
> > -    {NULL, NO_TIME, NO_TIME}
> > +    /// test parsing errors
> > +    // 1. names are too long
> > +    {"<+0123456789ABCDEF>3:33:33",                              0,
>  NO_TIME},
> > +    {"JUSTEXCEEDI1:11:11",                                      0,
>  NO_TIME},
> > +    {"AVERYLONGNAMEWHICHEXCEEDSTZNAMEMAX2:22:22",               0,
>  NO_TIME},
> > +    {"FIRSTVERYLONGNAME3:33:33SECONDVERYLONGNAME4:44:44",       0,   0},
> > +    {"<JUSTEXCEEDI>5:55:55",                                    0,
>  NO_TIME},
> > +    {"<FIRSTVERYLONGNAME>3:33:33<SECONDVERYLONGNAME>4:44:44",   0,   0},
> > +    {"<+JUSTEXCEED>5:55:55",                                    0,
>  NO_TIME},
> > +
> > +    // 2. names are too short
> > +    {"JU6:34:47",               0,   NO_TIME},
> > +    {"HE6:34:47LO3:34:47",      0,   0},
> > +    {"<AB>2:34:47",             0,   NO_TIME},
> > +    {"<AB>2:34:47<CD>3:34:47",  0,   0},
> > +
> > +    // 3. names contain invalid chars
> > +    {"N?ME2:10:56",     0,   NO_TIME},
> > +    {"N!ME2:10:56",     0,   NO_TIME},
> > +    {"N/ME2:10:56",     0,   NO_TIME},
> > +    {"N$ME2:10:56",     0,   NO_TIME},
> > +    {"NAME?2:10:56",    0,   NO_TIME},
> > +    {"?NAME2:10:56",    0,   NO_TIME},
> > +    {"NAME?UNK4:21:15",                 0,   NO_TIME},
> > +    {"NAME!UNK4:22:15NEXT/NAME4:23:15", 0,   NO_TIME},
> > +
> > +    // 4. bogus strings
> > +    {"NOINFO",          0,  NO_TIME},
> > +    {"HOUR:16:18",      0,  NO_TIME},
> > +    {"<BEGIN",          0,  NO_TIME},
> > +    {"<NEXT:55",        0,  NO_TIME},
> > +    {">WRONG<2:15:00",  0,  NO_TIME},
> > +    {"ST<ART4:30:00",   0,  NO_TIME},
> > +    //{"MANY8:00:00:00",  0,  NO_TIME},
> > +    {"\0",              0,  NO_TIME},
> > +    {"M\0STR7:30:36",   0,  NO_TIME}
> >  };
> >
> > -// helper macros
> > -#define FOR_TIMEZONES(iter_name) for (struct tz_test* iter_name =
> test_timezones; iter_name->tzstr != NULL; ++iter_name)
> > -
> >  // END test vectors
> >
> >  static int failed = 0;
> > @@ -136,22 +166,24 @@ void test_TimezoneStrings(void)
> >  {
> >      char buffer[128];
> >
> > -    FOR_TIMEZONES(ptr)
> > +    for (int i = 0; i < (sizeof(test_timezones) / sizeof(struct
> tz_test)); ++i)
> >      {
> > -        setenv("TZ", ptr->tzstr, 1);
> > +        struct tz_test ptr = test_timezones[i];
> > +
> > +        setenv("TZ", ptr.tzstr, 1);
> >          tzset();
> >
> > -        snprintf(buffer, 128, "winter time, timezone = \"%s\"",
> ptr->tzstr);
> > +        snprintf(buffer, 128, "winter time, timezone = \"%s\"",
> ptr.tzstr);
> >
> >          struct tm winter_tm_copy = winter_tm; // copy
> > -        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time +
> ptr->offset_seconds, mktime(&winter_tm_copy), buffer);
> > +        TEST_ASSERT_EQUAL_INT_MESSAGE(winter_time + ptr.offset_seconds,
> mktime(&winter_tm_copy), buffer);
> >
> > -        if (ptr->dst_offset_seconds != NO_TIME)
> > +        if (ptr.dst_offset_seconds != NO_TIME)
> >          {
> > -            snprintf(buffer, 128, "summer time, timezone = \"%s\"",
> ptr->tzstr);
> > +            snprintf(buffer, 128, "summer time, timezone = \"%s\"",
> ptr.tzstr);
> >
> >              struct tm summer_tm_copy = summer_tm; // copy
> > -            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time +
> ptr->dst_offset_seconds, mktime(&summer_tm_copy), buffer);
> > +            TEST_ASSERT_EQUAL_INT_MESSAGE(summer_time +
> ptr.dst_offset_seconds, mktime(&summer_tm_copy), buffer);
> >          }
> >      }
> >  }
> > --
> > 2.35.1
> >
>
>

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-05-12 18:35 ` jdoubleu
@ 2022-05-16 17:49   ` Jeff Johnston
  0 siblings, 0 replies; 35+ messages in thread
From: Jeff Johnston @ 2022-05-16 17:49 UTC (permalink / raw)
  To: jdoubleu; +Cc: Newlib

[-- Attachment #1: Type: text/plain, Size: 1370 bytes --]

Hello, thanks for looking at it.  I forgot to increase the name buffers by
1 and pass 11 to the sscanf.  This will
make the max test trigger when needed.

I have included a new patch.

-- Jeff J.


On Thu, May 12, 2022 at 2:36 PM jdoubleu <hi@jdoubleu.de> wrote:

> Hi Jeff,
>
> I finally found some time to have a look at your patch.
> >       if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
> >               || n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
>
> > +      else if (n < TZNAME_MIN || TZNAME_MAX < n)
>
> I think the sub-expression "TZNAME_MAX < n" can never be true, can it?
> sscanf will always stop after 10 characters.
>
> What do you think about including the TZNAME_MAX macro inside the format
> string?
>
> > #define STR(s) #s
> > #define STR_LIT(s) STR(s)
> > ...
> > sscanf(tzenv, "%" STR_LIT(TZNAME_MAX) "[-+0-9A-Za-z]%n", __tzname_dst,
> &n)
>
> I'll try to update the tests in the coming days, so I can verify your
> changes work as expected.
>
>
> Cheers
> ---
> 🙎🏻‍♂️ jdoubleu
> On 4/29/2022 5:46 PM, Jeff Johnston wrote:
> > I have revised my tzset_r.c patch so that the tzrules are initialized so
> we
> > don't inherit the previous settings if not specified for a particular TZ.
> > As well, I defaulted them if TZ is not specified.
> >
> > -- Jeff J.
>
>

[-- Attachment #2: 0001-Modify-tzset_r.c-to-handle-errors.patch --]
[-- Type: text/x-patch, Size: 5926 bytes --]

From 45d794aa7be198fd9fbb10edbd2dab9b802a7b65 Mon Sep 17 00:00:00 2001
From: Jeff Johnston <jjohnstn@redhat.com>
Date: Wed, 27 Apr 2022 15:27:00 -0400
Subject: [PATCH] Modify tzset_r.c to handle errors

- change __tzset_r so errors end up setting the timezone to
  unnamed UTC
---
 newlib/libc/time/tzset_r.c | 68 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
index 9cb30b1..a6f15f6 100644
--- a/newlib/libc/time/tzset_r.c
+++ b/newlib/libc/time/tzset_r.c
@@ -12,8 +12,8 @@
 #define TZNAME_MIN	3	/* POSIX min TZ abbr size local def */
 #define TZNAME_MAX	10	/* POSIX max TZ abbr size local def */
 
-static char __tzname_std[TZNAME_MAX + 1];
-static char __tzname_dst[TZNAME_MAX + 1];
+static char __tzname_std[TZNAME_MAX + 2];
+static char __tzname_dst[TZNAME_MAX + 2];
 static char *prev_tzenv = NULL;
 
 void
@@ -23,7 +23,9 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   unsigned short hh, mm, ss, m, w, d;
   int sign, n;
   int i, ch;
+  long offset0, offset1;
   __tzinfo_type *tz = __gettzinfo ();
+  struct __tzrule_struct default_tzrule = {'J', 0, 0, 0, 0, (time_t)0, 0L };
 
   if ((tzenv = _getenv_r (reent_ptr, "TZ")) == NULL)
       {
@@ -31,6 +33,8 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
 	_daylight = 0;
 	_tzname[0] = "GMT";
 	_tzname[1] = "GMT";
+	tz->__tzrule[0] = default_tzrule;
+	tz->__tzrule[1] = default_tzrule;
 	free(prev_tzenv);
 	prev_tzenv = NULL;
 	return;
@@ -44,6 +48,14 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (prev_tzenv != NULL)
     strcpy (prev_tzenv, tzenv);
 
+  /* default to unnamed UTC in case of error */
+  _timezone = 0;
+  _daylight = 0;
+  _tzname[0] = "";
+  _tzname[1] = "";
+  tz->__tzrule[0] = default_tzrule;
+  tz->__tzrule[1] = default_tzrule;
+
   /* ignore implementation-specific format specifier */
   if (*tzenv == ':')
     ++tzenv;  
@@ -54,7 +66,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       ++tzenv;
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
-      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
+      if (sscanf (tzenv, "%11[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
 		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
         return;
 
@@ -63,7 +75,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   else
     {
       /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
-      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_std, &n) <= 0
+      if (sscanf (tzenv, "%11[A-Za-z]%n", __tzname_std, &n) <= 0
 				|| n < TZNAME_MIN || TZNAME_MAX < n)
         return;
     }
@@ -85,8 +97,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) < 1)
     return;
   
-  tz->__tzrule[0].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
-  _tzname[0] = __tzname_std;
+  offset0 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
   tzenv += n;
 
   /* allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330> */
@@ -95,12 +106,16 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       ++tzenv;
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
-      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
-		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+      if (sscanf (tzenv, "%11[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0 && tzenv[0] == '>')
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+	{ /* error */
 	  return;
 	}
 
@@ -109,17 +124,20 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   else
     {
       /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
-      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0
-				|| n < TZNAME_MIN || TZNAME_MAX < n)
+      if (sscanf (tzenv, "%11[A-Za-z]%n", __tzname_dst, &n) <= 0)
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n)
+	{ /* error */
 	  return;
 	}
     }
 
-  _tzname[1] = __tzname_dst;
   tzenv += n;
 
   /* otherwise we have a dst name, look for the offset */
@@ -138,9 +156,9 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   
   n  = 0;
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
-    tz->__tzrule[1].offset = tz->__tzrule[0].offset - 3600;
+    offset1 = offset0 - 3600;
   else
-    tz->__tzrule[1].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
+    offset1 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
 
   tzenv += n;
 
@@ -211,13 +229,23 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       n = 0;
       
       if (*tzenv == '/')
-	sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n);
+	if (sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
+	  {
+	    /* error in time format, restore tz rules to default and return */
+	    tz->__tzrule[0] = default_tzrule;
+	    tz->__tzrule[1] = default_tzrule;
+            return;
+          }
 
       tz->__tzrule[i].s = ss + SECSPERMIN * mm + SECSPERHOUR  * hh;
       
       tzenv += n;
     }
 
+  tz->__tzrule[0].offset = offset0;
+  tz->__tzrule[1].offset = offset1;
+  _tzname[0] = __tzname_std;
+  _tzname[1] = __tzname_dst;
   __tzcalc_limits (tz->__tzyear);
   _timezone = tz->__tzrule[0].offset;  
   _daylight = tz->__tzrule[0].offset != tz->__tzrule[1].offset;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
  2022-04-29 15:46 Jeff Johnston
@ 2022-05-12 18:35 ` jdoubleu
  2022-05-16 17:49   ` Jeff Johnston
  0 siblings, 1 reply; 35+ messages in thread
From: jdoubleu @ 2022-05-12 18:35 UTC (permalink / raw)
  To: Jeff Johnston; +Cc: newlib

Hi Jeff,

I finally found some time to have a look at your patch.
>       if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_std, &n) <= 0
> 		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])

> +      else if (n < TZNAME_MIN || TZNAME_MAX < n)

I think the sub-expression "TZNAME_MAX < n" can never be true, can it? 
sscanf will always stop after 10 characters.

What do you think about including the TZNAME_MAX macro inside the format 
string?

> #define STR(s) #s
> #define STR_LIT(s) STR(s)
> ...
> sscanf(tzenv, "%" STR_LIT(TZNAME_MAX) "[-+0-9A-Za-z]%n", __tzname_dst, &n)

I'll try to update the tests in the coming days, so I can verify your 
changes work as expected.


Cheers
---
🙎🏻‍♂️ jdoubleu
On 4/29/2022 5:46 PM, Jeff Johnston wrote:
> I have revised my tzset_r.c patch so that the tzrules are initialized so we
> don't inherit the previous settings if not specified for a particular TZ.
> As well, I defaulted them if TZ is not specified.
> 
> -- Jeff J.

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [PATCH] add tests for tzset(3)
@ 2022-04-29 15:46 Jeff Johnston
  2022-05-12 18:35 ` jdoubleu
  0 siblings, 1 reply; 35+ messages in thread
From: Jeff Johnston @ 2022-04-29 15:46 UTC (permalink / raw)
  To: Newlib

[-- Attachment #1: Type: text/plain, Size: 212 bytes --]

I have revised my tzset_r.c patch so that the tzrules are initialized so we
don't inherit the previous settings if not specified for a particular TZ.
As well, I defaulted them if TZ is not specified.

-- Jeff J.

[-- Attachment #2: 0001-Modify-tzset_r.c-to-handle-errors.patch --]
[-- Type: text/x-patch, Size: 4850 bytes --]

From d49236849c69255090fa533177ac71ae3878cd7e Mon Sep 17 00:00:00 2001
From: Jeff Johnston <jjohnstn@redhat.com>
Date: Wed, 27 Apr 2022 15:27:00 -0400
Subject: [PATCH] Modify tzset_r.c to handle errors

- change __tzset_r so errors end up setting the timezone to
  unnamed UTC
---
 newlib/libc/time/tzset_r.c | 60 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 44 insertions(+), 16 deletions(-)

diff --git a/newlib/libc/time/tzset_r.c b/newlib/libc/time/tzset_r.c
index 9cb30b1..962652d 100644
--- a/newlib/libc/time/tzset_r.c
+++ b/newlib/libc/time/tzset_r.c
@@ -23,7 +23,9 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   unsigned short hh, mm, ss, m, w, d;
   int sign, n;
   int i, ch;
+  long offset0, offset1;
   __tzinfo_type *tz = __gettzinfo ();
+  struct __tzrule_struct default_tzrule = {'J', 0, 0, 0, 0, (time_t)0, 0L };
 
   if ((tzenv = _getenv_r (reent_ptr, "TZ")) == NULL)
       {
@@ -31,6 +33,8 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
 	_daylight = 0;
 	_tzname[0] = "GMT";
 	_tzname[1] = "GMT";
+	tz->__tzrule[0] = default_tzrule;
+	tz->__tzrule[1] = default_tzrule;
 	free(prev_tzenv);
 	prev_tzenv = NULL;
 	return;
@@ -44,6 +48,14 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (prev_tzenv != NULL)
     strcpy (prev_tzenv, tzenv);
 
+  /* default to unnamed UTC in case of error */
+  _timezone = 0;
+  _daylight = 0;
+  _tzname[0] = "";
+  _tzname[1] = "";
+  tz->__tzrule[0] = default_tzrule;
+  tz->__tzrule[1] = default_tzrule;
+
   /* ignore implementation-specific format specifier */
   if (*tzenv == ':')
     ++tzenv;  
@@ -85,8 +97,7 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) < 1)
     return;
   
-  tz->__tzrule[0].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
-  _tzname[0] = __tzname_std;
+  offset0 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
   tzenv += n;
 
   /* allow POSIX angle bracket < > quoted signed alphanumeric tz abbr e.g. <MESZ+0330> */
@@ -95,12 +106,16 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       ++tzenv;
 
       /* quit if no items, too few or too many chars, or no close quote '>' */
-      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0
-		|| n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+      if (sscanf (tzenv, "%10[-+0-9A-Za-z]%n", __tzname_dst, &n) <= 0 && tzenv[0] == '>')
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n || '>' != tzenv[n])
+	{ /* error */
 	  return;
 	}
 
@@ -109,17 +124,20 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   else
     {
       /* allow POSIX unquoted alphabetic tz abbr e.g. MESZ */
-      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0
-				|| n < TZNAME_MIN || TZNAME_MAX < n)
+      if (sscanf (tzenv, "%10[A-Za-z]%n", __tzname_dst, &n) <= 0)
 	{ /* No dst */
-	  _tzname[1] = _tzname[0];
-	  _timezone = tz->__tzrule[0].offset;
-	  _daylight = 0;
+          _tzname[0] = __tzname_std;
+          _tzname[1] = _tzname[0];
+          tz->__tzrule[0].offset = offset0;
+          _timezone = offset0;
+	  return;
+        }
+      else if (n < TZNAME_MIN || TZNAME_MAX < n)
+	{ /* error */
 	  return;
 	}
     }
 
-  _tzname[1] = __tzname_dst;
   tzenv += n;
 
   /* otherwise we have a dst name, look for the offset */
@@ -138,9 +156,9 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
   
   n  = 0;
   if (sscanf (tzenv, "%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
-    tz->__tzrule[1].offset = tz->__tzrule[0].offset - 3600;
+    offset1 = offset0 - 3600;
   else
-    tz->__tzrule[1].offset = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
+    offset1 = sign * (ss + SECSPERMIN * mm + SECSPERHOUR * hh);
 
   tzenv += n;
 
@@ -211,13 +229,23 @@ _tzset_unlocked_r (struct _reent *reent_ptr)
       n = 0;
       
       if (*tzenv == '/')
-	sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n);
+	if (sscanf (tzenv, "/%hu%n:%hu%n:%hu%n", &hh, &n, &mm, &n, &ss, &n) <= 0)
+	  {
+	    /* error in time format, restore tz rules to default and return */
+	    tz->__tzrule[0] = default_tzrule;
+	    tz->__tzrule[1] = default_tzrule;
+            return;
+          }
 
       tz->__tzrule[i].s = ss + SECSPERMIN * mm + SECSPERHOUR  * hh;
       
       tzenv += n;
     }
 
+  tz->__tzrule[0].offset = offset0;
+  tz->__tzrule[1].offset = offset1;
+  _tzname[0] = __tzname_std;
+  _tzname[1] = __tzname_dst;
   __tzcalc_limits (tz->__tzyear);
   _timezone = tz->__tzrule[0].offset;  
   _daylight = tz->__tzrule[0].offset != tz->__tzrule[1].offset;
-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2022-05-27 15:47 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-04-07 15:58 [PATCH] add tests for tzset(3) jdoubleu
2022-04-08 21:21 ` Jeff Johnston
2022-04-10  8:43 ` Dimitar Dimitrov
2022-04-10 17:55   ` jdoubleu
2022-04-10 21:00     ` Dimitar Dimitrov
2022-04-11 11:17       ` jdoubleu
2022-04-11 17:27         ` Dimitar Dimitrov
2022-04-12 11:19           ` jdoubleu
2022-04-12 18:33             ` Brian Inglis
2022-04-07 23:34               ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Brian Inglis
2022-04-07 23:34                 ` [PATCH v2 1/2] newlib/libc/time/tzset.c: doc update POSIX angle bracket <> support Brian Inglis
2022-04-07 23:34                 ` [PATCH v2 2/2] newlib/libc/time/tzset_r.c(_tzset_unlocked_r): " Brian Inglis
2022-04-08 19:11                 ` [PATCH v2 0/2] add tzset/_r POSIX angle bracket <> support in TZ env var Jeff Johnston
2022-04-13 17:53                 ` [PATCH] add tests for tzset(3) Brian Inglis
2022-04-13 20:33                   ` Jeff Johnston
2022-04-13 22:19                     ` Brian Inglis
2022-04-14  8:59                       ` jdoubleu
2022-04-14 16:31                         ` Brian Inglis
2022-04-14 19:23                           ` Jeff Johnston
2022-04-15 10:10                             ` jdoubleu
2022-04-27 19:30                               ` Jeff Johnston
2022-05-14 14:39                         ` jdoubleu
2022-05-16 16:05                           ` Dimitar Dimitrov
2022-05-16 17:38                             ` Jeff Johnston
2022-05-17  8:45                           ` [PATCH] update tzset tests jdoubleu
2022-05-18 18:48                             ` Dimitar Dimitrov
2022-05-18 20:56                               ` Keith Packard
2022-05-19  8:47                                 ` jdoubleu
2022-05-22  9:51                                   ` [PATCH v2] " jdoubleu
2022-05-22 21:02                                     ` Dimitar Dimitrov
2022-05-27 15:46                                       ` Jeff Johnston
2022-04-13 22:21               ` [PATCH] add tests for tzset(3) Brian Inglis
2022-04-29 15:46 Jeff Johnston
2022-05-12 18:35 ` jdoubleu
2022-05-16 17:49   ` Jeff Johnston

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).